]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
- return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id)
+ return self._make_url_result(video_id, url=url)
diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py
index fd9bba8bcb..fa12a6a8df 100644
--- a/yt_dlp/extractor/mainstreaming.py
+++ b/yt_dlp/extractor/mainstreaming.py
@@ -1,14 +1,13 @@
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
traverse_obj,
try_get,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/malltv.py b/yt_dlp/extractor/malltv.py
deleted file mode 100644
index e1031d8da4..0000000000
--- a/yt_dlp/extractor/malltv.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- dict_get,
- float_or_none,
- int_or_none,
- merge_dicts,
- parse_duration,
- try_get,
-)
-
-
-class MallTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P
[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
- 'info_dict': {
- 'id': 't0zzt0',
- 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'ext': 'mp4',
- 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
- 'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
- 'duration': 216,
- 'timestamp': 1538870400,
- 'upload_date': '20181007',
- 'view_count': int,
- 'comment_count': int,
- 'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
- 'average_rating': 9.060869565217391,
- 'dislike_count': int,
- 'like_count': int,
- }
- }, {
- 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'only_matching': True,
- }, {
- 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
- 'only_matching': True,
- }, {
- 'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
- 'info_dict': {
- 'id': 'yx010y',
- 'ext': 'mp4',
- 'dislike_count': int,
- 'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
- 'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
- 'comment_count': int,
- 'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
- 'like_count': int,
- 'duration': 752,
- 'timestamp': 1646956800,
- 'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
- 'view_count': int,
- 'upload_date': '20220311',
- 'average_rating': 9.685714285714285,
- }
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, display_id, headers=self.geo_verification_headers())
-
- video = self._parse_json(self._search_regex(
- r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
- webpage, 'video object'), display_id)
-
- video_id = self._search_regex(
- r']+value\s*=\s*(\w+)', webpage, 'video id')
-
- formats = self._extract_m3u8_formats(
- video['VideoSource'], video_id, 'mp4', 'm3u8_native')
-
- subtitles = {}
- for s in (video.get('Subtitles') or {}):
- s_url = s.get('Url')
- if not s_url:
- continue
- subtitles.setdefault(s.get('Language') or 'cz', []).append({
- 'url': s_url,
- })
-
- entity_counts = video.get('EntityCounts') or {}
-
- def get_count(k):
- v = entity_counts.get(k + 's') or {}
- return int_or_none(dict_get(v, ('Count', 'StrCount')))
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- return merge_dicts({
- 'id': str(video_id),
- 'display_id': display_id,
- 'title': video.get('Title'),
- 'description': clean_html(video.get('Description')),
- 'thumbnail': video.get('ThumbnailUrl'),
- 'formats': formats,
- 'subtitles': subtitles,
- 'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
- 'view_count': get_count('View'),
- 'like_count': get_count('Like'),
- 'dislike_count': get_count('Dislike'),
- 'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
- 'comment_count': get_count('Comment'),
- }, info)
diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py
index 2792e6e707..44c321c262 100644
--- a/yt_dlp/extractor/manoto.py
+++ b/yt_dlp/extractor/manoto.py
@@ -1,10 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- traverse_obj
-)
-
+from ..utils import clean_html, int_or_none, traverse_obj
_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py
index 741745378b..2aa3a3c93b 100644
--- a/yt_dlp/extractor/manyvids.py
+++ b/yt_dlp/extractor/manyvids.py
@@ -12,6 +12,7 @@ from ..utils import (
class ManyVidsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P\d+)'
_TESTS = [{
# preview video
diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py
index 53ed79158f..ca465eae96 100644
--- a/yt_dlp/extractor/markiza.py
+++ b/yt_dlp/extractor/markiza.py
@@ -10,6 +10,7 @@ from ..utils import (
class MarkizaIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P\d+)(?:[_/]|$)'
_TESTS = [{
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
@@ -68,6 +69,7 @@ class MarkizaIE(InfoExtractor):
class MarkizaPageIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P\d+)_'
_TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py
index 716f1c9615..c3c58d7d01 100644
--- a/yt_dlp/extractor/masters.py
+++ b/yt_dlp/extractor/masters.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
traverse_obj,
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index eeb5b85f38..d040fb48f1 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -4,12 +4,11 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- format_field,
float_or_none,
+ format_field,
int_or_none,
str_or_none,
traverse_obj,
- update_url_query,
)
@@ -82,7 +81,7 @@ class MedalTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
+ webpage = self._download_webpage(url, video_id, query={'mobilebypass': 'true'})
hydration_data = self._search_json(
r'',
- webpage, 'json data'),
- episode_id)['episodes'][show_id][episode_id]
-
- title = episode['title']
-
- show_title = episode.get('showTitle')
- if show_title:
- title = '%s - %s' % (show_title, title)
-
- formats = [{
- 'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
- 'format_id': compat_str(abr),
- 'abr': abr,
- 'vcodec': 'none',
- } for abr in (96, 128, 192, 256)]
- self._check_formats(formats, episode_id)
-
- description = clean_html(episode.get('longTeaser'))
- thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
- duration = int_or_none(episode.get('duration'))
- timestamp = unified_timestamp(episode.get('publishedAt'))
-
- return {
- 'id': episode_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- }
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 79d9c8e31e..9c382e257d 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -5,11 +5,11 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
strip_or_none,
traverse_obj,
- try_get
+ try_get,
)
@@ -28,7 +28,7 @@ class RCTIPlusBaseIE(InfoExtractor):
class RCTIPlusIE(RCTIPlusBaseIE):
- _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?Pepisode|clip|extra|live-event|missed-event)/(?P\d+)/(?P[^/?#&]+)'
+ _VALID_URL = r'https?://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?Pepisode|clip|extra|live-event|missed-event)/(?P\d+)/(?P[^/?#&]+)'
_TESTS = [{
'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola',
'md5': '56ed45affad45fa18d5592a1bc199997',
@@ -218,7 +218,7 @@ class RCTIPlusIE(RCTIPlusBaseIE):
class RCTIPlusSeriesIE(RCTIPlusBaseIE):
- _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P\d+)/(?P[^/?#&]+)(?:/(?Pepisodes|extras|clips))?'
+ _VALID_URL = r'https?://www\.rctiplus\.com/programs/(?P\d+)/(?P[^/?#&]+)(?:/(?Pepisodes|extras|clips))?'
_TESTS = [{
'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran',
'playlist_mincount': 1019,
@@ -229,7 +229,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'age_limit': 2,
'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'],
'display_id': 'putri-untuk-pangeran',
- 'tag': 'count:18',
+ 'tags': 'count:18',
},
}, { # No episodes
'url': 'https://www.rctiplus.com/programs/615/inews-pagi',
@@ -239,7 +239,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'title': 'iNews Pagi',
'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04',
'age_limit': 2,
- 'tag': 'count:11',
+ 'tags': 'count:11',
'display_id': 'inews-pagi',
}
}]
@@ -327,8 +327,8 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]),
'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'),
expected_type=lambda x: strip_or_none(x) or None),
- 'tag': traverse_obj(series_meta, ('tag', ..., 'name'),
- expected_type=lambda x: strip_or_none(x) or None),
+ 'tags': traverse_obj(series_meta, ('tag', ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
}
return self.playlist_result(
self._series_entries(series_id, display_id, video_type, metadata), series_id,
@@ -336,7 +336,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
class RCTIPlusTVIE(RCTIPlusBaseIE):
- _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P\w+))|(?Plive-event|missed-event))'
+ _VALID_URL = r'https?://www\.rctiplus\.com/((tv/(?P\w+))|(?Plive-event|missed-event))'
_TESTS = [{
'url': 'https://www.rctiplus.com/tv/rcti',
'info_dict': {
diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py
index 9a2e0d9851..cc76b898ad 100644
--- a/yt_dlp/extractor/rds.py
+++ b/yt_dlp/extractor/rds.py
@@ -1,13 +1,14 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ js_to_json,
parse_duration,
parse_iso8601,
- js_to_json,
)
-from ..compat import compat_str
class RDSIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P[^/]+)-\d+\.\d+'
diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py
index b59b518b13..4d71133b3c 100644
--- a/yt_dlp/extractor/redbee.py
+++ b/yt_dlp/extractor/redbee.py
@@ -134,6 +134,7 @@ class ParliamentLiveUKIE(RedBeeBaseIE):
class RTBFIE(RedBeeBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
https?://(?:www\.)?rtbf\.be/
(?:
diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py
index d1de2490fc..fac51b9efe 100644
--- a/yt_dlp/extractor/redbulltv.py
+++ b/yt_dlp/extractor/redbulltv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- float_or_none,
ExtractorError,
+ float_or_none,
)
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 62f669f35d..bc3e5f7eee 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -5,11 +5,13 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ parse_qs,
traverse_obj,
try_get,
unescapeHTML,
- urlencode_postdata,
+ update_url_query,
url_or_none,
+ urlencode_postdata,
)
@@ -76,7 +78,7 @@ class RedditIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'comment_count': int,
- 'age_limit': 0,
+ 'age_limit': 18,
'channel_id': 'u_creepyt0es',
},
'params': {
@@ -150,6 +152,51 @@ class RedditIE(InfoExtractor):
'like_count': int,
},
'skip': 'Requires account that has opted-in to the GenZedong subreddit',
+ }, {
+ # subtitles in HLS manifest
+ 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/',
+ 'info_dict': {
+ 'id': 'a2mdj5d57qyc1',
+ 'ext': 'mp4',
+ 'display_id': '1cl9h0u',
+ 'title': 'The insurance claim will be interesting',
+ 'uploader': 'darrenpauli',
+ 'channel_id': 'Unexpected',
+ 'duration': 53,
+ 'upload_date': '20240506',
+ 'timestamp': 1714966382,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # subtitles from caption-url
+ 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/',
+ 'info_dict': {
+ 'id': 'xbmj4t3igy1d1',
+ 'ext': 'mp4',
+ 'display_id': '1cxwzso',
+ 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
+ 'uploader': 'Woodstovia',
+ 'channel_id': 'soccer',
+ 'duration': 30,
+ 'upload_date': '20240522',
+ 'timestamp': 1716373798,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ 'writesubtitles': True,
+ },
}, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
@@ -197,6 +244,12 @@ class RedditIE(InfoExtractor):
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
raise ExtractorError('Unable to login, no cookie was returned')
+ def _get_subtitles(self, video_id):
+ # Fallback if there were no subtitles provided by DASH or HLS manifests
+ caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt'
+ if self._is_valid_url(caption_url, video_id, item='subtitles'):
+ return {'en': [{'url': caption_url}]}
+
def _real_extract(self, url):
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
@@ -307,6 +360,10 @@ class RedditIE(InfoExtractor):
dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+ qs = traverse_obj(parse_qs(hls_playlist_url), {
+ 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}),
+ })
+ hls_playlist_url = update_url_query(hls_playlist_url, qs)
formats = [{
'url': unescapeHTML(reddit_video['fallback_url']),
@@ -332,7 +389,7 @@ class RedditIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles,
+ 'subtitles': subtitles or self.extract_subtitles(video_id),
'duration': int_or_none(reddit_video.get('duration')),
}
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
index f9453202b7..d0546bbfaf 100644
--- a/yt_dlp/extractor/redgifs.py
+++ b/yt_dlp/extractor/redgifs.py
@@ -5,10 +5,10 @@ from ..compat import compat_parse_qs
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ OnDemandPagedList,
int_or_none,
qualities,
try_get,
- OnDemandPagedList,
)
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 965abbee8a..14ed0edab2 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
merge_dicts,
str_to_int,
diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py
deleted file mode 100644
index edb6ae5bce..0000000000
--- a/yt_dlp/extractor/regiotv.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from .common import InfoExtractor
-from ..networking import Request
-from ..utils import xpath_text, xpath_with_ns
-
-
-class RegioTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.regio-tv.de/video/395808.html',
- 'info_dict': {
- 'id': '395808',
- 'ext': 'mp4',
- 'title': 'Wir in Ludwigsburg',
- 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!',
- }
- }, {
- 'url': 'http://www.regio-tv.de/video/395808',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- key = self._search_regex(
- r'key\s*:\s*(["\'])(?P.+?)\1', webpage, 'key', group='key')
- title = self._og_search_title(webpage)
-
- SOAP_TEMPLATE = '<{0} xmlns="http://v.telvi.de/">{1}{0}>'
-
- request = Request(
- 'http://v.telvi.de/',
- SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
- video_data = self._download_xml(request, video_id, 'Downloading video XML')
-
- NS_MAP = {
- 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
- 'soap': 'http://schemas.xmlsoap.org/soap/envelope/',
- }
-
- video_url = xpath_text(
- video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True)
- thumbnail = xpath_text(
- video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail')
- description = self._og_search_description(
- webpage) or self._html_search_meta('description', webpage)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py
index fdde31704c..abb537cf3e 100644
--- a/yt_dlp/extractor/rentv.py
+++ b/yt_dlp/extractor/rentv.py
@@ -8,6 +8,7 @@ from ..utils import (
class RENTVIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P\d+)'
_TESTS = [{
'url': 'http://ren.tv/video/epizod/118577',
@@ -59,6 +60,7 @@ class RENTVIE(InfoExtractor):
class RENTVArticleIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P[^/?#]+)'
_TESTS = [{
'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py
index 6d032564d3..f49262a650 100644
--- a/yt_dlp/extractor/restudy.py
+++ b/yt_dlp/extractor/restudy.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class RestudyIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P[0-9]+)'
_TESTS = [{
'url': 'https://www.restudy.dk/video/play/id/1637',
diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py
index 6919425f3a..9c9bac6af9 100644
--- a/yt_dlp/extractor/reuters.py
+++ b/yt_dlp/extractor/reuters.py
@@ -2,13 +2,14 @@ import re
from .common import InfoExtractor
from ..utils import (
- js_to_json,
int_or_none,
+ js_to_json,
unescapeHTML,
)
class ReutersIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P[0-9]+)'
_TEST = {
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
diff --git a/yt_dlp/extractor/ridehome.py b/yt_dlp/extractor/ridehome.py
new file mode 100644
index 0000000000..78f838ac15
--- /dev/null
+++ b/yt_dlp/extractor/ridehome.py
@@ -0,0 +1,96 @@
+from .art19 import Art19IE
+from .common import InfoExtractor
+from ..utils import extract_attributes, get_elements_html_by_class
+from ..utils.traversal import traverse_obj
+
+
+class RideHomeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ridehome\.info/show/[\w-]+/(?P[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs/',
+ 'info_dict': {
+ 'id': 'thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'c84ea3cc96950a9ab86fe540f3edc588',
+ 'info_dict': {
+ 'id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'ext': 'mp3',
+ 'title': 'Thu. 12/28 – Will 2024 Be The Year Apple Gets Serious About Gaming On Macs?',
+ 'description': 'md5:9dba86ae9b5047a8150eceddeeb629c2',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20231228',
+ 'timestamp': 1703780995,
+ 'modified_date': '20231230',
+ 'episode_id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'modified_timestamp': 1703912404,
+ 'release_date': '20231228',
+ 'release_timestamp': 1703782800,
+ 'duration': 1000.1502,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$',
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/portfolio-profile-sensel-with-ilyarosenberg/',
+ 'info_dict': {
+ 'id': 'portfolio-profile-sensel-with-ilyarosenberg',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'bf9d6efad221008ce71aea09d5533cf6',
+ 'info_dict': {
+ 'id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'ext': 'mp3',
+ 'title': '(Portfolio Profile) Sensel - With @IlyaRosenberg',
+ 'description': 'md5:e1e4a970bce04290e0ba6f030b0125db',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20220108',
+ 'timestamp': 1641656064,
+ 'modified_date': '20230418',
+ 'episode_id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'modified_timestamp': 1681843318,
+ 'release_date': '20220108',
+ 'release_timestamp': 1641672000,
+ 'duration': 2789.38122,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/spacecasts/big-tech-news-apples-macbook-pro-event/',
+ 'info_dict': {
+ 'id': 'big-tech-news-apples-macbook-pro-event',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'b1428530c6e03904a8271e978007fc05',
+ 'info_dict': {
+ 'id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'ext': 'mp3',
+ 'title': 'md5:e6c05d44d59b6577a4145ac339de5040',
+ 'description': 'md5:14152f7228c8a301a77e3d6bc891b145',
+ 'series': 'SpaceCasts',
+ 'series_id': '8e3e837d-7fe0-4a23-8e11-894917e07e17',
+ 'upload_date': '20211026',
+ 'timestamp': 1635271450,
+ 'modified_date': '20230502',
+ 'episode_id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'modified_timestamp': 1683057500,
+ 'release_date': '20211026',
+ 'release_timestamp': 1635272124,
+ 'duration': 2266.30531,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ urls = traverse_obj(
+ get_elements_html_by_class('iframeContainer', webpage),
+ (..., {extract_attributes}, lambda k, v: k == 'data-src' and Art19IE.suitable(v)))
+ return self.playlist_from_matches(urls, article_id, ie=Art19IE)
diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py
index 8d29b302bb..bc59ed07e4 100644
--- a/yt_dlp/extractor/rmcdecouverte.py
+++ b/yt_dlp/extractor/rmcdecouverte.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
+from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py
index c491aaf534..16622430cd 100644
--- a/yt_dlp/extractor/rockstargames.py
+++ b/yt_dlp/extractor/rockstargames.py
@@ -6,6 +6,7 @@ from ..utils import (
class RockstarGamesIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P\d+)'
_TESTS = [{
'url': 'https://www.rockstargames.com/videos/video/11544/',
@@ -37,14 +38,14 @@ class RockstarGamesIE(InfoExtractor):
title = video['title']
formats = []
- for video in video['files_processed']['video/mp4']:
- if not video.get('src'):
+ for v in video['files_processed']['video/mp4']:
+ if not v.get('src'):
continue
- resolution = video.get('resolution')
+ resolution = v.get('resolution')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', resolution or '', 'height', default=None))
formats.append({
- 'url': self._proto_relative_url(video['src']),
+ 'url': self._proto_relative_url(v['src']),
'format_id': resolution,
'height': height,
})
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index cad76f0c99..3bc5f3cab2 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -1,8 +1,8 @@
+import datetime as dt
import itertools
import json
import re
import urllib.parse
-from datetime import datetime
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
@@ -38,7 +38,7 @@ class RokfinIE(InfoExtractor):
'upload_date': '20211023',
'timestamp': 1634998029,
'channel': 'Jimmy Dore',
- 'channel_id': 65429,
+ 'channel_id': '65429',
'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
'availability': 'public',
'live_status': 'not_live',
@@ -56,7 +56,7 @@ class RokfinIE(InfoExtractor):
'upload_date': '20190412',
'timestamp': 1555052644,
'channel': 'Ron Placone',
- 'channel_id': 10,
+ 'channel_id': '10',
'channel_url': 'https://rokfin.com/RonPlacone',
'availability': 'public',
'live_status': 'not_live',
@@ -73,7 +73,7 @@ class RokfinIE(InfoExtractor):
'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
'channel': 'TLAVagabond',
- 'channel_id': 53856,
+ 'channel_id': '53856',
'channel_url': 'https://rokfin.com/TLAVagabond',
'availability': 'public',
'is_live': False,
@@ -86,7 +86,6 @@ class RokfinIE(InfoExtractor):
'dislike_count': int,
'like_count': int,
'tags': ['FreeThinkingMedia^'],
- 'duration': None,
}
}, {
'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer',
@@ -96,7 +95,7 @@ class RokfinIE(InfoExtractor):
'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer',
'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
'channel': 'Jay Dyer',
- 'channel_id': 186881,
+ 'channel_id': '186881',
'channel_url': 'https://rokfin.com/jaydyer',
'availability': 'premium_only',
'live_status': 'not_live',
@@ -116,7 +115,7 @@ class RokfinIE(InfoExtractor):
'title': 'The Grayzone live on Nordstream blame game',
'thumbnail': r're:https://image\.v\.rokfin\.com/.+',
'channel': 'Max Blumenthal',
- 'channel_id': 248902,
+ 'channel_id': '248902',
'channel_url': 'https://rokfin.com/MaxBlumenthal',
'availability': 'premium_only',
'live_status': 'was_live',
@@ -157,7 +156,7 @@ class RokfinIE(InfoExtractor):
self.raise_login_required('This video is only available to premium users', True, method='cookies')
elif scheduled:
self.raise_no_formats(
- f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
+ f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
video_id=video_id, expected=True)
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
@@ -174,7 +173,7 @@ class RokfinIE(InfoExtractor):
'like_count': int_or_none(metadata.get('likeCount')),
'dislike_count': int_or_none(metadata.get('dislikeCount')),
'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))),
- 'channel_id': traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id')),
+ 'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))),
'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None,
'timestamp': timestamp,
'release_timestamp': timestamp if live_status != 'not_live' else None,
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 94e673b133..5c622399df 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -2,17 +2,18 @@ from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ LazyList,
int_or_none,
join_nonempty,
- LazyList,
+ parse_iso8601,
parse_qs,
+ smuggle_url,
str_or_none,
- traverse_obj,
url_or_none,
urlencode_postdata,
urljoin,
- update_url_query,
)
+from ..utils.traversal import traverse_obj
class RoosterTeethBaseIE(InfoExtractor):
@@ -57,19 +58,27 @@ class RoosterTeethBaseIE(InfoExtractor):
title = traverse_obj(attributes, 'title', 'display_title')
sub_only = attributes.get('is_sponsors_only')
+ episode_id = str_or_none(data.get('uuid'))
+ video_id = str_or_none(data.get('id'))
+ if video_id and 'parent_content_id' in attributes: # parent_content_id is a bonus-only key
+ video_id += '-bonus' # there are collisions with bonus ids and regular ids
+ elif not video_id:
+ video_id = episode_id
+
return {
- 'id': str(data.get('id')),
+ 'id': video_id,
'display_id': attributes.get('slug'),
'title': title,
'description': traverse_obj(attributes, 'description', 'caption'),
- 'series': attributes.get('show_title'),
+ 'series': traverse_obj(attributes, 'show_title', 'parent_content_title'),
'season_number': int_or_none(attributes.get('season_number')),
- 'season_id': attributes.get('season_id'),
+ 'season_id': str_or_none(attributes.get('season_id')),
'episode': title,
'episode_number': int_or_none(attributes.get('number')),
- 'episode_id': str_or_none(data.get('uuid')),
+ 'episode_id': episode_id,
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
+ 'release_timestamp': parse_iso8601(attributes.get('original_air_date')),
'thumbnails': thumbnails,
'availability': self._availability(
needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
@@ -79,7 +88,7 @@ class RoosterTeethBaseIE(InfoExtractor):
class RoosterTeethIE(RoosterTeethBaseIE):
- _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P[^/?#&]+)'
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:bonus-feature|episode|watch)/(?P[^/?#&]+)'
_TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'info_dict': {
@@ -91,6 +100,17 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'thumbnail': r're:^https?://.*\.png$',
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
+ 'tags': ['Game Show', 'Sketch'],
+ 'season_number': 2,
+ 'availability': 'public',
+ 'episode_number': 10,
+ 'episode_id': '00374575-464e-11e7-a302-065410f210c4',
+ 'season': 'Season 2',
+ 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4',
+ 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939',
+ 'duration': 145,
+ 'release_timestamp': 1462982400,
+ 'release_date': '20160511',
},
'params': {'skip_download': True},
}, {
@@ -104,8 +124,90 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
'ext': 'mp4',
+ 'availability': 'public',
+ 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c',
+ 'episode_number': 3,
+ 'tags': ['Animation'],
+ 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8',
+ 'season': 'Season 1',
+ 'series': 'RWBY: World of Remnant',
+ 'season_number': 1,
+ 'duration': 216,
+ 'release_timestamp': 1413489600,
+ 'release_date': '20141016',
},
'params': {'skip_download': True},
+ }, {
+ # bonus feature with /watch/ url
+ 'url': 'https://roosterteeth.com/watch/rwby-bonus-21',
+ 'info_dict': {
+ 'id': '33-bonus',
+ 'display_id': 'rwby-bonus-21',
+ 'title': 'Volume 5 Yang Character Short',
+ 'description': 'md5:8c2440bc763ea90c52cfe0a68093e1f7',
+ 'episode': 'Volume 5 Yang Character Short',
+ 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'ext': 'mp4',
+ 'availability': 'public',
+ 'episode_id': 'f2a9f132-1fe2-44ad-8956-63d7c0267720',
+ 'episode_number': 55,
+ 'series': 'RWBY',
+ 'duration': 255,
+ 'release_timestamp': 1507993200,
+ 'release_date': '20171014',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # only works with video_data['attributes']['url'] m3u8 url
+ 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman',
+ 'info_dict': {
+ 'id': '25394',
+ 'ext': 'mp4',
+ 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'description': 'md5:91bb934698344fb9647b1c7351f16964',
+ 'availability': 'public',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'episode_number': 71,
+ 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4',
+ 'season': 'Season 2008',
+ 'tags': ['Gaming'],
+ 'series': 'Achievement Hunter',
+ 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31',
+ 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4',
+ 'season_number': 2008,
+ 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7',
+ 'duration': 189,
+ 'release_timestamp': 1228317300,
+ 'release_date': '20081203',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # brightcove fallback extraction needed
+ 'url': 'https://roosterteeth.com/watch/lets-play-2013-126',
+ 'info_dict': {
+ 'id': '17845',
+ 'ext': 'mp4',
+ 'title': 'WWE \'13',
+ 'availability': 'public',
+ 'series': 'Let\'s Play',
+ 'episode_number': 10,
+ 'season_id': 'ffa23d9c-464d-11e7-a302-065410f210c4',
+ 'channel_id': '75ba87e8-06fd-4482-bad9-52a4da2c6181',
+ 'episode': 'WWE \'13',
+ 'episode_id': 'ffdbe55e-464d-11e7-a302-065410f210c4',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'tags': ['Gaming', 'Our Favorites'],
+ 'description': 'md5:b4a5226d2bbcf0dafbde11a2ba27262d',
+ 'display_id': 'lets-play-2013-126',
+ 'season_number': 3,
+ 'season': 'Season 3',
+ 'release_timestamp': 1359999840,
+ 'release_date': '20130204',
+ },
+ 'expected_warnings': ['Direct m3u8 URL returned HTTP Error 403'],
+ 'params': {'skip_download': True},
}, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True,
@@ -125,18 +227,29 @@ class RoosterTeethIE(RoosterTeethBaseIE):
}, {
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/bonus-feature/camp-camp-soundtrack-another-rap-song-about-foreign-cars-richie-branson',
+ 'only_matching': True,
}]
+ _BRIGHTCOVE_ACCOUNT_ID = '6203312018001'
+
+ def _extract_brightcove_formats_and_subtitles(self, bc_id, url, m3u8_url):
+ account_id = self._search_regex(
+ r'/accounts/(\d+)/videos/', m3u8_url, 'account id', default=self._BRIGHTCOVE_ACCOUNT_ID)
+ info = self._downloader.get_info_extractor('BrightcoveNew').extract(smuggle_url(
+ f'https://players.brightcove.net/{account_id}/default_default/index.html?videoId={bc_id}',
+ {'referrer': url}))
+ return info['formats'], info['subtitles']
+
def _real_extract(self, url):
display_id = self._match_id(url)
api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}'
try:
video_data = self._download_json(
- api_episode_url + '/videos', display_id,
- 'Downloading video JSON metadata')['data'][0]
- m3u8_url = video_data['attributes']['url']
- # XXX: additional URL at video_data['links']['download']
+ api_episode_url + '/videos', display_id, 'Downloading video JSON metadata',
+ headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False:
@@ -144,8 +257,21 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'%s is only available for FIRST members' % display_id)
raise
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors
+ m3u8_url = video_data['attributes']['url']
+ is_brightcove = traverse_obj(video_data, ('attributes', 'encoding_pipeline')) == 'brightcove'
+ bc_id = traverse_obj(video_data, ('attributes', 'uid', {str}))
+
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ except ExtractorError as e:
+ if is_brightcove and bc_id and isinstance(e.cause, HTTPError) and e.cause.status == 403:
+ self.report_warning(
+ 'Direct m3u8 URL returned HTTP Error 403; retrying with Brightcove extraction')
+ formats, subtitles = self._extract_brightcove_formats_and_subtitles(bc_id, url, m3u8_url)
+ else:
+ raise
episode = self._download_json(
api_episode_url, display_id,
@@ -167,38 +293,53 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE):
'info_dict': {
'id': 'rwby-7',
'title': 'RWBY - Season 7',
- }
+ },
+ }, {
+ 'url': 'https://roosterteeth.com/series/the-weird-place',
+ 'playlist_count': 7,
+ 'info_dict': {
+ 'id': 'the-weird-place',
+ 'title': 'The Weird Place',
+ },
}, {
'url': 'https://roosterteeth.com/series/role-initiative',
'playlist_mincount': 16,
'info_dict': {
'id': 'role-initiative',
'title': 'Role Initiative',
- }
+ },
}, {
'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9',
'playlist_mincount': 50,
'info_dict': {
'id': 'let-s-play-minecraft-9',
'title': 'Let\'s Play Minecraft - Season 9',
- }
+ },
}]
def _entries(self, series_id, season_number):
display_id = join_nonempty(series_id, season_number)
- # TODO: extract bonus material
- for data in self._download_json(
- f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']:
- idx = traverse_obj(data, ('attributes', 'number'))
- if season_number and idx != season_number:
- continue
- season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000})
- season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data']
- for episode in season:
+
+ def yield_episodes(data):
+ for episode in traverse_obj(data, ('data', lambda _, v: v['canonical_links']['self'])):
yield self.url_result(
- f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}',
- RoosterTeethIE.ie_key(),
- **self._extract_video_info(episode))
+ urljoin('https://www.roosterteeth.com', episode['canonical_links']['self']),
+ RoosterTeethIE, **self._extract_video_info(episode))
+
+ series_data = self._download_json(
+ f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)
+ for season_data in traverse_obj(series_data, ('data', lambda _, v: v['links']['episodes'])):
+ idx = traverse_obj(season_data, ('attributes', 'number'))
+ if season_number is not None and idx != season_number:
+ continue
+ yield from yield_episodes(self._download_json(
+ urljoin(self._API_BASE, season_data['links']['episodes']), display_id,
+ f'Downloading season {idx} JSON metadata', query={'per_page': 1000}))
+
+ if season_number is None: # extract series-level bonus features
+ yield from yield_episodes(self._download_json(
+ f'{self._API_BASE_URL}/shows/{series_id}/bonus_features?order=asc&order_by&per_page=1000',
+ display_id, 'Downloading bonus features JSON metadata', fatal=False))
def _real_extract(self, url):
series_id = self._match_id(url)
diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py
index 63134322dc..411a625192 100644
--- a/yt_dlp/extractor/rozhlas.py
+++ b/yt_dlp/extractor/rozhlas.py
@@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
'md5': '6f8fd68663e64936623e67c152a669e0',
'info_dict': {
- 'id': '10739193',
+ 'id': '10787730',
'ext': 'mp3',
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
'timestamp': 1684915200,
- 'modified_timestamp': 1684922446,
+ 'modified_timestamp': 1687550432,
'series': 'Vykopávky',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
'channel_id': 'radio-wave',
'upload_date': '20230524',
- 'modified_date': '20230524',
+ 'modified_date': '20230623',
},
}, {
# serial extraction
@@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
'title': 'Nespavci',
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
},
+ }, {
+ # serialPart
+ 'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
+ 'info_dict': {
+ 'id': '8889035',
+ 'ext': 'm4a',
+ 'title': 'Gustavo Adolfo Bécquer: Hora duchů',
+ 'description': 'md5:343a15257b376c276e210b78e900ffea',
+ 'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
+ 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
+ 'timestamp': 1708173000,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'series': 'Povídka',
+ 'modified_date': '20240217',
+ 'upload_date': '20240217',
+ 'modified_timestamp': 1708173198,
+ 'channel_id': 'vltava',
+ },
+ 'params': {'skip_download': 'dash'},
}]
def _call_api(self, path, item_id, msg='API JSON'):
@@ -322,7 +342,7 @@ class MujRozhlasIE(RozhlasBaseIE):
entity = info['siteEntityBundle']
- if entity == 'episode':
+ if entity in ('episode', 'serialPart'):
return self._extract_audio_entry(self._call_api(
'episodes', info['contentId'], 'episode info API JSON'))
diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py
index 7ba80d4ba7..729804d23e 100644
--- a/yt_dlp/extractor/rte.py
+++ b/yt_dlp/extractor/rte.py
@@ -3,13 +3,13 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
float_or_none,
parse_iso8601,
str_or_none,
try_get,
unescapeHTML,
url_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py
index 5928a207ae..ec78d0a669 100644
--- a/yt_dlp/extractor/rtp.py
+++ b/yt_dlp/extractor/rtp.py
@@ -1,9 +1,10 @@
+import base64
+import json
+import re
+import urllib.parse
+
from .common import InfoExtractor
from ..utils import js_to_json
-import re
-import json
-import urllib.parse
-import base64
class RTPIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py
index 9f73d1811f..bce5cba82a 100644
--- a/yt_dlp/extractor/rts.py
+++ b/yt_dlp/extractor/rts.py
@@ -13,6 +13,7 @@ from ..utils import (
class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
IE_DESC = 'RTS.ch'
_VALID_URL = r'rts:(?P\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html'
diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py
index 741c472621..e7dcd5fd61 100644
--- a/yt_dlp/extractor/rtvcplay.py
+++ b/yt_dlp/extractor/rtvcplay.py
@@ -1,16 +1,17 @@
import re
-from .common import InfoExtractor, ExtractorError
+from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- int_or_none,
float_or_none,
+ int_or_none,
js_to_json,
mimetype2ext,
traverse_obj,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
index a84a78da8d..defb8d741f 100644
--- a/yt_dlp/extractor/rtvs.py
+++ b/yt_dlp/extractor/rtvs.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
parse_duration,
traverse_obj,
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 1dc049ac8f..837a324e62 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -90,7 +90,6 @@ class RumbleEmbedIE(InfoExtractor):
'channel_url': 'https://rumble.com/c/LofiGirl',
'channel': 'Lofi Girl',
'thumbnail': r're:https://.+\.jpg',
- 'duration': None,
'uploader': 'Lofi Girl',
'live_status': 'is_live',
},
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
index 08d9b9257d..eb12f32faf 100644
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@@ -5,8 +5,8 @@ from ..compat import (
compat_str,
)
from ..utils import (
- determine_ext,
bool_or_none,
+ determine_ext,
int_or_none,
parse_qs,
try_get,
@@ -46,7 +46,7 @@ class RutubeBaseIE(InfoExtractor):
'uploader': try_get(video, lambda x: x['author']['name']),
'uploader_id': compat_str(uploader_id) if uploader_id else None,
'timestamp': unified_timestamp(video.get('created_ts')),
- 'category': [category] if category else None,
+ 'categories': [category] if category else None,
'age_limit': age_limit,
'view_count': int_or_none(video.get('hits')),
'comment_count': int_or_none(video.get('comments_count')),
@@ -112,7 +112,7 @@ class RutubeIE(RutubeBaseIE):
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
- 'category': ['Новости и СМИ'],
+ 'categories': ['Новости и СМИ'],
'chapters': [],
},
'expected_warnings': ['Unable to download f4m'],
@@ -144,7 +144,7 @@ class RutubeIE(RutubeBaseIE):
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
- 'category': ['Видеоигры'],
+ 'categories': ['Видеоигры'],
'chapters': [],
},
'expected_warnings': ['Unable to download f4m'],
@@ -154,7 +154,7 @@ class RutubeIE(RutubeBaseIE):
'id': 'c65b465ad0c98c89f3b25cb03dcc87c6',
'ext': 'mp4',
'chapters': 'count:4',
- 'category': ['Бизнес и предпринимательство'],
+ 'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d',
'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
'duration': 782,
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
index d7f9a73377..726d49111a 100644
--- a/yt_dlp/extractor/rutv.py
+++ b/yt_dlp/extractor/rutv.py
@@ -1,11 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- str_to_int
-)
+from ..utils import ExtractorError, int_or_none, str_to_int
class RUTVIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py
index 33f6652df5..dc61387be7 100644
--- a/yt_dlp/extractor/ruutu.py
+++ b/yt_dlp/extractor/ruutu.py
@@ -4,8 +4,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
find_xpath_attr,
int_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
index 8d322d7105..17dff0afa2 100644
--- a/yt_dlp/extractor/safari.py
+++ b/yt_dlp/extractor/safari.py
@@ -2,7 +2,6 @@ import json
import re
from .common import InfoExtractor
-
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py
index d2f60e92ff..a5f05e1d05 100644
--- a/yt_dlp/extractor/saitosan.py
+++ b/yt_dlp/extractor/saitosan.py
@@ -3,6 +3,7 @@ from ..utils import ExtractorError, try_get
class SaitosanIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'Saitosan'
_VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P[0-9]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/savefrom.py b/yt_dlp/extractor/savefrom.py
deleted file mode 100644
index 9c9e74b6dd..0000000000
--- a/yt_dlp/extractor/savefrom.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os.path
-
-from .common import InfoExtractor
-
-
-class SaveFromIE(InfoExtractor):
- IE_NAME = 'savefrom.net'
- _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$'
-
- _TEST = {
- 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
- 'info_dict': {
- 'id': 'UlVRAPW2WJY',
- 'ext': 'mp4',
- 'title': 'About Team Radical MMA | MMA Fighting',
- 'upload_date': '20120816',
- 'uploader': 'Howcast',
- 'uploader_id': 'Howcast',
- 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
- },
- 'params': {
- 'skip_download': True
- }
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = os.path.splitext(url.split('/')[-1])[0]
-
- return self.url_result(mobj.group('url'), video_id=video_id)
diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
index 7a91150475..8d61e22fce 100644
--- a/yt_dlp/extractor/sbs.py
+++ b/yt_dlp/extractor/sbs.py
@@ -44,8 +44,6 @@ class SBSIE(InfoExtractor):
'timestamp': 1408613220,
'upload_date': '20140821',
'uploader': 'SBSC',
- 'tags': None,
- 'categories': None,
},
'expected_warnings': ['Unable to download JSON metadata'],
}, {
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
index 3912f77865..85d51cd59a 100644
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -1,8 +1,8 @@
-import json
import hashlib
+import json
-from .aws import AWSIE
from .anvato import AnvatoIE
+from .aws import AWSIE
from .common import InfoExtractor
from ..utils import (
smuggle_url,
diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py
index 9c2ca8c518..fc91d60e17 100644
--- a/yt_dlp/extractor/scte.py
+++ b/yt_dlp/extractor/scte.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- decode_packed_codes,
ExtractorError,
+ decode_packed_codes,
urlencode_postdata,
)
diff --git a/yt_dlp/extractor/seeker.py b/yt_dlp/extractor/seeker.py
deleted file mode 100644
index 65eb16a09d..0000000000
--- a/yt_dlp/extractor/seeker.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- get_element_by_class,
- strip_or_none,
-)
-
-
-class SeekerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html'
- _TESTS = [{
- 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
- 'md5': '897d44bbe0d8986a2ead96de565a92db',
- 'info_dict': {
- 'id': 'Elrn3gnY',
- 'ext': 'mp4',
- 'title': 'Should Trump Be Required To Release His Tax Returns?',
- 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
- 'timestamp': 1490090165,
- 'upload_date': '20170321',
- }
- }, {
- 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
- 'playlist': [
- {
- 'md5': '0497b9f20495174be73ae136949707d2',
- 'info_dict': {
- 'id': 'FihYQ8AE',
- 'ext': 'mp4',
- 'title': 'The Pros & Cons Of Zoos',
- 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
- 'timestamp': 1490039133,
- 'upload_date': '20170320',
- },
- }
- ],
- 'info_dict': {
- 'id': '1834116536',
- 'title': 'After Gorilla Killing, Changes Ahead for Zoos',
- 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
- },
- }]
-
- def _real_extract(self, url):
- display_id, article_id = self._match_valid_url(url).groups()
- webpage = self._download_webpage(url, display_id)
- entries = []
- for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
- entries.append(self.url_result(
- 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
- return self.playlist_result(
- entries, article_id,
- self._og_search_title(webpage),
- strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/sejmpl.py b/yt_dlp/extractor/sejmpl.py
index 29cb0152a2..eb433d2ac3 100644
--- a/yt_dlp/extractor/sejmpl.py
+++ b/yt_dlp/extractor/sejmpl.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
from .common import InfoExtractor
from .redge import RedCDNLivxIE
@@ -13,16 +13,16 @@ from ..utils.traversal import traverse_obj
def is_dst(date):
- last_march = datetime.datetime(date.year, 3, 31)
- last_october = datetime.datetime(date.year, 10, 31)
- last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
- last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
+ last_march = dt.datetime(date.year, 3, 31)
+ last_october = dt.datetime(date.year, 10, 31)
+ last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7)
+ last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7)
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
def rfc3339_to_atende(date):
- date = datetime.datetime.fromisoformat(date)
- date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
+ date = dt.datetime.fromisoformat(date)
+ date = date + dt.timedelta(hours=1 if is_dst(date) else 0)
return int((date.timestamp() - 978307200) * 1000)
diff --git a/yt_dlp/extractor/senalcolombia.py b/yt_dlp/extractor/senalcolombia.py
index f3c066da77..b2f354faef 100644
--- a/yt_dlp/extractor/senalcolombia.py
+++ b/yt_dlp/extractor/senalcolombia.py
@@ -3,6 +3,7 @@ from .rtvcplay import RTVCKalturaIE
class SenalColombiaLiveIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?Psenal-en-vivo)'
_TESTS = [{
diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py
index 3600e2e740..99fcf51f16 100644
--- a/yt_dlp/extractor/sendtonews.py
+++ b/yt_dlp/extractor/sendtonews.py
@@ -2,16 +2,17 @@ import re
from .common import InfoExtractor
from ..utils import (
- float_or_none,
- parse_iso8601,
- update_url_query,
- int_or_none,
determine_protocol,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
unescapeHTML,
+ update_url_query,
)
class SendtoNewsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)'
_TEST = {
diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py
index 3117f81e38..989b63c721 100644
--- a/yt_dlp/extractor/sexu.py
+++ b/yt_dlp/extractor/sexu.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class SexuIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P\d+)'
_TEST = {
'url': 'http://sexu.com/961791/',
diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py
index 79e8885835..b31d566dfe 100644
--- a/yt_dlp/extractor/seznamzpravy.py
+++ b/yt_dlp/extractor/seznamzpravy.py
@@ -4,11 +4,11 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
- urljoin,
int_or_none,
parse_codecs,
parse_qs,
try_get,
+ urljoin,
)
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
index d509e8879c..89aee27280 100644
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@@ -5,9 +5,9 @@ import re
from .aws import AWSIE
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
int_or_none,
parse_iso8601,
str_or_none,
diff --git a/yt_dlp/extractor/sharepoint.py b/yt_dlp/extractor/sharepoint.py
new file mode 100644
index 0000000000..d4d5af04f0
--- /dev/null
+++ b/yt_dlp/extractor/sharepoint.py
@@ -0,0 +1,112 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class SharePointIE(InfoExtractor):
+ _BASE_URL_RE = r'https?://[\w-]+\.sharepoint\.com/'
+ _VALID_URL = [
+ rf'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P[^/?#]{{46}})/?(?:$|[?#])',
+ rf'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P[^]+)',
+ ]
+ _TESTS = [{
+ 'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw',
+ 'md5': '2950821d0d4937a0a76373782093b435',
+ 'info_dict': {
+ 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
+ 'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST',
+ 'duration': 54.567,
+ 'thumbnail': r're:https://.+/thumbnail',
+ 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
+ },
+ }, {
+ 'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb',
+ 'md5': 'c496a01644223273bff12e93e501afd1',
+ 'info_dict': {
+ 'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB',
+ 'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg',
+ 'ext': 'mp4',
+ 'title': '930103681233985536',
+ 'duration': 3797.326,
+ 'thumbnail': r're:https://.+/thumbnail',
+ },
+ }, {
+ 'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view',
+ 'info_dict': {
+ 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
+ 'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST',
+ 'duration': 54.567,
+ 'thumbnail': r're:https://.+/thumbnail',
+ 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
+ },
+ 'skip': 'Session cookies needed',
+ }, {
+ 'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = urllib.parse.unquote(self._match_id(url))
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if urllib.parse.urlparse(urlh.url).hostname == 'login.microsoftonline.com':
+ self.raise_login_required(
+ 'Session cookies are required for this URL and can be passed '
+ 'with the --cookies option. The --cookies-from-browser option will not work', method=None)
+
+ video_data = self._search_json(r'g_fileInfo\s*=', webpage, 'player config', display_id)
+ video_id = video_data['VroomItemId']
+
+ parsed_url = urllib.parse.urlparse(video_data['.transformUrl'])
+ base_media_url = urllib.parse.urlunparse(parsed_url._replace(
+ path=urllib.parse.urljoin(f'{parsed_url.path}/', '../videomanifest'),
+ query=urllib.parse.urlencode({
+ **urllib.parse.parse_qs(parsed_url.query),
+ 'cTag': video_data['.ctag'],
+ 'action': 'Access',
+ 'part': 'index',
+ }, doseq=True)))
+
+ # Web player adds more params to the format URLs but we still get all formats without them
+ formats = self._extract_mpd_formats(
+ base_media_url, video_id, mpd_id='dash', query={'format': 'dash'}, fatal=False)
+ for hls_type in ('hls', 'hls-vnext'):
+ formats.extend(self._extract_m3u8_formats(
+ base_media_url, video_id, 'mp4', m3u8_id=hls_type,
+ query={'format': hls_type}, fatal=False, quality=-2))
+
+ if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})):
+ formats.append({
+ 'url': video_url,
+ 'ext': determine_ext(video_data.get('extension') or video_data.get('name')),
+ 'quality': 1,
+ 'format_id': 'source',
+ 'filesize': int_or_none(video_data.get('size')),
+ 'vcodec': 'none' if video_data.get('isAudio') is True else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data.get('title') or video_data.get('displayName'),
+ 'display_id': display_id,
+ 'uploader_id': video_data.get('authorId'),
+ 'duration': traverse_obj(video_data, (
+ 'MediaServiceFastMetadata', {json.loads}, 'media', 'duration', {lambda x: x / 10000000})),
+ 'thumbnail': url_or_none(video_data.get('thumbnailUrl')),
+ }
diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
index ec9938b8cb..cca86ed6c0 100644
--- a/yt_dlp/extractor/shemaroome.py
+++ b/yt_dlp/extractor/shemaroome.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_b64decode,
)
from ..utils import (
- bytes_to_intlist,
ExtractorError,
+ bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
)
diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py
index ef93b92768..44619a16c6 100644
--- a/yt_dlp/extractor/sixplay.py
+++ b/yt_dlp/extractor/sixplay.py
@@ -6,8 +6,8 @@ from ..utils import (
determine_ext,
int_or_none,
parse_qs,
- try_get,
qualities,
+ try_get,
)
diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py
index e02f8cef0e..54dfdc441d 100644
--- a/yt_dlp/extractor/skeb.py
+++ b/yt_dlp/extractor/skeb.py
@@ -10,7 +10,7 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '466853',
'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
- 'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
+ 'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
'uploader': '姫ノ森りぃる@一周年',
'uploader_id': 'riiru_wm',
'age_limit': 0,
@@ -34,7 +34,7 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '489408',
'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
- 'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
+ 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
'uploader': '古川ノブ@音楽とVlogのVtuber',
'uploader_id': 'furukawa_nob',
'age_limit': 0,
@@ -61,12 +61,12 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '6',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
- 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
'_type': 'playlist',
'entries': [{
'id': '486430',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
- 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
}, {
'id': '486431',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
@@ -81,7 +81,7 @@ class SkebIE(InfoExtractor):
parent = {
'id': video_id,
'title': nuxt_data.get('title'),
- 'descripion': nuxt_data.get('description'),
+ 'description': nuxt_data.get('description'),
'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
'age_limit': 18 if nuxt_data.get('nsfw') else 0,
diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py
index 4292bb2ae5..197407c18d 100644
--- a/yt_dlp/extractor/skylinewebcams.py
+++ b/yt_dlp/extractor/skylinewebcams.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class SkylineWebcamsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P[^/]+)\.html'
_TEST = {
'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py
index 6264b04bb3..234703cf70 100644
--- a/yt_dlp/extractor/skynewsarabia.py
+++ b/yt_dlp/extractor/skynewsarabia.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
parse_duration,
+ parse_iso8601,
)
@@ -38,6 +38,7 @@ class SkyNewsArabiaBaseIE(InfoExtractor):
class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P[0-9]+)'
_TEST = {
@@ -64,6 +65,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
IE_NAME = 'skynewsarabia:article'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P[0-9]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py
index df2af3b35d..a1328dee2d 100644
--- a/yt_dlp/extractor/slideslive.py
+++ b/yt_dlp/extractor/slideslive.py
@@ -25,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38902413',
'ext': 'mp4',
'title': 'GCC IA16 backend',
- 'timestamp': 1648189972,
- 'upload_date': '20220325',
+ 'timestamp': 1697793372,
+ 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:42',
'chapters': 'count:41',
@@ -42,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
- 'upload_date': '20211115',
- 'timestamp': 1636996003,
+ 'upload_date': '20231020',
+ 'timestamp': 1697807002,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:640',
'chapters': 'count:639',
@@ -59,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
'id': '38973182',
'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
- 'upload_date': '20220201',
+ 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1643728135,
+ 'timestamp': 1697822521,
'thumbnails': 'count:3',
'chapters': 'count:2',
'duration': 5889,
@@ -70,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
'skip_download': 'm3u8',
},
}, {
- # service_name = youtube, only XML slides info
+ # formerly youtube, converted to native
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': {
- 'id': 'jmg02wCJD5M',
- 'display_id': '38897546',
+ 'id': '38897546',
'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
- 'description': 'Watch full version of this video at https://slideslive.com/38897546.',
- 'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
- 'channel': 'SlidesLive Videos - G1',
- 'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
- 'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
- 'uploader': 'SlidesLive Videos - G1',
- 'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
- 'live_status': 'not_live',
- 'upload_date': '20160710',
- 'timestamp': 1618786715,
- 'duration': 6827,
- 'like_count': int,
- 'view_count': int,
- 'comment_count': int,
- 'channel_follower_count': int,
- 'age_limit': 0,
- 'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20231029',
+ 'timestamp': 1698588144,
'thumbnails': 'count:169',
- 'playable_in_embed': True,
- 'availability': 'unlisted',
- 'tags': [],
- 'categories': ['People & Blogs'],
'chapters': 'count:168',
+ 'duration': 6827,
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
},
}, {
# embed-only presentation, only XML slides info
@@ -111,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
- 'timestamp': 1629671508,
- 'upload_date': '20210822',
+ 'timestamp': 1697803109,
+ 'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@@ -128,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'MoReL: Multi-omics Relational Learning',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:7',
- 'timestamp': 1654714970,
- 'upload_date': '20220608',
+ 'timestamp': 1697824939,
+ 'upload_date': '20231020',
'chapters': 'count:6',
'duration': 171,
},
@@ -145,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Decentralized Attribution of Generative Models',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:16',
- 'timestamp': 1622806321,
- 'upload_date': '20210604',
+ 'timestamp': 1697814901,
+ 'upload_date': '20231020',
'chapters': 'count:15',
'duration': 306,
},
@@ -162,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:9',
- 'timestamp': 1654714896,
- 'upload_date': '20220608',
+ 'timestamp': 1697824757,
+ 'upload_date': '20231020',
'chapters': 'count:8',
'duration': 295,
},
@@ -177,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979880',
'ext': 'mp4',
'title': 'The Representation Power of Neural Networks',
- 'timestamp': 1654714962,
+ 'timestamp': 1697824919,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:22',
- 'upload_date': '20220608',
+ 'upload_date': '20231020',
'chapters': 'count:21',
'duration': 294,
},
@@ -200,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979682',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
- 'timestamp': 1654714920,
+ 'timestamp': 1697824815,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30',
- 'upload_date': '20220608',
+ 'upload_date': '20231020',
'chapters': 'count:31',
'duration': 272,
},
@@ -213,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
'duration': 3,
- 'timestamp': 1654714920,
- 'upload_date': '20220608',
+ 'timestamp': 1697824815,
+ 'upload_date': '20231020',
},
}, {
'info_dict': {
@@ -222,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
'duration': 4,
- 'timestamp': 1654714920,
- 'upload_date': '20220608',
+ 'timestamp': 1697824815,
+ 'upload_date': '20231020',
},
}],
'params': {
@@ -242,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
'id': '38979481',
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
- 'timestamp': 1654714877,
+ 'timestamp': 1697824716,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:43',
- 'upload_date': '20220608',
+ 'upload_date': '20231020',
'chapters': 'count:43',
'duration': 315,
},
@@ -255,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
'duration': 3,
- 'timestamp': 1654714877,
- 'upload_date': '20220608',
+ 'timestamp': 1697824716,
+ 'upload_date': '20231020',
},
}],
'params': {
@@ -275,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader': 'SlidesLive Videos - A',
- 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
- 'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
+ 'uploader_id': '@slideslivevideos-a6075',
+ 'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
'upload_date': '20200903',
- 'timestamp': 1602599092,
+ 'timestamp': 1697805922,
'duration': 942,
'age_limit': 0,
'live_status': 'not_live',
@@ -303,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
'id': '38983994',
'ext': 'mp4',
'title': 'Zero-Shot AutoML with Pretrained Models',
- 'timestamp': 1662384834,
- 'upload_date': '20220905',
+ 'timestamp': 1697826708,
+ 'upload_date': '20231020',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:23',
'chapters': 'count:22',
@@ -336,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
- 'timestamp': 1629671508,
- 'upload_date': '20210822',
+ 'timestamp': 1697803109,
+ 'upload_date': '20231020',
'chapters': 'count:7',
'duration': 326,
},
@@ -386,7 +371,7 @@ class SlidesLiveIE(InfoExtractor):
if not line.startswith('#EXT-SL-'):
continue
tag, _, value = line.partition(':')
- key = lookup.get(tag.lstrip('#EXT-SL-'))
+ key = lookup.get(tag[8:])
if not key:
continue
m3u8_dict[key] = value
diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py
index c0ff4f9aa8..a41ad303a5 100644
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@@ -8,13 +8,13 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
- url_or_none,
- unified_timestamp,
- try_get,
- urljoin,
+ int_or_none,
traverse_obj,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py
index 4379572592..7c914acbed 100644
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@@ -1,4 +1,5 @@
-import datetime
+import datetime as dt
+import itertools
import json
import math
import random
@@ -12,8 +13,8 @@ from ..utils import (
int_or_none,
jwt_decode_hs256,
try_call,
- try_get,
)
+from ..utils.traversal import traverse_obj
class SonyLIVIE(InfoExtractor):
@@ -93,7 +94,7 @@ class SonyLIVIE(InfoExtractor):
'mobileNumber': username,
'channelPartnerID': 'MSMIND',
'country': 'IN',
- 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
'otpSize': 6,
'loginType': 'REGISTERORSIGNIN',
'isMobileMandatory': True,
@@ -110,7 +111,7 @@ class SonyLIVIE(InfoExtractor):
'otp': self._get_tfa_info('OTP'),
'dmaId': 'IN',
'ageConfirmation': True,
- 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
'isMobileMandatory': True,
}).encode())
if otp_verify_json['resultCode'] == 'KO':
@@ -183,17 +184,21 @@ class SonyLIVIE(InfoExtractor):
class SonyLIVSeriesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P\d{10})$'
+ _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P\d{10})/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
- 'playlist_mincount': 456,
+ 'playlist_mincount': 452,
'info_dict': {
'id': '1700000091',
},
+ }, {
+ 'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
+ 'playlist_mincount': 358,
+ 'info_dict': {
+ 'id': '1700000007',
+ },
}]
- _API_SHOW_URL = "https://apiv2.sonyliv.com/AGL/1.9/R/ENG/WEB/IN/DL/DETAIL/{}?kids_safe=false&from=0&to=49"
- _API_EPISODES_URL = "https://apiv2.sonyliv.com/AGL/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{}?from=0&to=1000&orderBy=episodeNumber&sortOrder=asc"
- _API_SECURITY_URL = 'https://apiv2.sonyliv.com/AGL/1.4/A/ENG/WEB/ALL/GETTOKEN'
+ _API_BASE = 'https://apiv2.sonyliv.com/AGL'
def _entries(self, show_id):
headers = {
@@ -201,19 +206,34 @@ class SonyLIVSeriesIE(InfoExtractor):
'Referer': 'https://www.sonyliv.com',
}
headers['security_token'] = self._download_json(
- self._API_SECURITY_URL, video_id=show_id, headers=headers,
- note='Downloading security token')['resultObj']
- seasons = try_get(
- self._download_json(self._API_SHOW_URL.format(show_id), video_id=show_id, headers=headers),
- lambda x: x['resultObj']['containers'][0]['containers'], list)
- for season in seasons or []:
- season_id = season['id']
- episodes = try_get(
- self._download_json(self._API_EPISODES_URL.format(season_id), video_id=season_id, headers=headers),
- lambda x: x['resultObj']['containers'][0]['containers'], list)
- for episode in episodes or []:
- video_id = episode.get('id')
- yield self.url_result('sonyliv:%s' % video_id, ie=SonyLIVIE.ie_key(), video_id=video_id)
+ f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
+ 'Downloading security token', headers=headers)['resultObj']
+ seasons = traverse_obj(self._download_json(
+ f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
+ 'Downloading series JSON', headers=headers, query={
+ 'kids_safe': 'false',
+ 'from': '0',
+ 'to': '49',
+ }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+ for season in seasons:
+ season_id = str(season['id'])
+ note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
+ cursor = 0
+ for page_num in itertools.count(1):
+ episodes = traverse_obj(self._download_json(
+ f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
+ season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
+ 'from': str(cursor),
+ 'to': str(cursor + 99),
+ 'orderBy': 'episodeNumber',
+ 'sortOrder': 'asc',
+ }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+ if not episodes:
+ break
+ for episode in episodes:
+ video_id = str(episode['id'])
+ yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
+ cursor += 100
def _real_extract(self, url):
show_id = self._match_id(url)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index a7c2afd497..358146171f 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -1,30 +1,29 @@
+import functools
import itertools
-import re
import json
-# import random
+import re
-from .common import (
- InfoExtractor,
- SearchInfoExtractor
-)
+from .common import InfoExtractor, SearchInfoExtractor
from ..compat import compat_str
-from ..networking import HEADRequest, Request
+from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
- error_to_compat_str,
+ KNOWN_EXTENSIONS,
ExtractorError,
+ error_to_compat_str,
float_or_none,
int_or_none,
- KNOWN_EXTENSIONS,
+ join_nonempty,
mimetype2ext,
parse_qs,
str_or_none,
- try_get,
+ try_call,
unified_timestamp,
update_url_query,
url_or_none,
urlhandle_detect_ext,
)
+from ..utils.traversal import traverse_obj
class SoundcloudEmbedIE(InfoExtractor):
@@ -54,7 +53,6 @@ class SoundcloudBaseIE(InfoExtractor):
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
_HEADERS = {}
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@@ -72,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor):
'original': 0,
}
+ _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3']
+
+ @functools.cached_property
+ def _is_requested(self):
+ return re.compile(r'|'.join(set(
+ re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default'
+ else '|'.join(map(re.escape, self._DEFAULT_FORMATS))
+ for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE)
+ ))).fullmatch
+
def _store_client_id(self, client_id):
self.cache.store('soundcloud', 'client_id', client_id)
@@ -112,21 +120,31 @@ class SoundcloudBaseIE(InfoExtractor):
def _initialize_pre_login(self):
self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
- def _perform_login(self, username, password):
- if username != 'oauth':
- self.report_warning(
- 'Login using username and password is not currently supported. '
- 'Use "--username oauth --password " to login using an oauth token')
- self._access_token = password
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- payload = {'session': {'access_token': self._access_token}}
- token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
- if response is not False:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ def _verify_oauth_token(self, token):
+ if self._request_webpage(
+ self._API_VERIFY_AUTH_TOKEN % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
+ None, note='Verifying login token...', fatal=False,
+ data=json.dumps({'session': {'access_token': token}}).encode()):
+ self._HEADERS['Authorization'] = f'OAuth {token}'
self.report_login()
else:
- self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
+ self.report_warning('Provided authorization token is invalid. Continuing as guest')
+
+ def _real_initialize(self):
+ if self._HEADERS:
+ return
+ if token := try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value):
+ self._verify_oauth_token(token)
+
+ def _perform_login(self, username, password):
+ if username != 'oauth':
+ raise ExtractorError(
+ 'Login using username and password is not currently supported. '
+ 'Use "--username oauth --password " to login using an oauth token, '
+ f'or else {self._login_hint(method="cookies")}', expected=True)
+ if self._HEADERS:
+ return
+ self._verify_oauth_token(password)
r'''
def genDevId():
@@ -147,14 +165,17 @@ class SoundcloudBaseIE(InfoExtractor):
'user_agent': self._USER_AGENT
}
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(login, None)
- self._access_token = response.get('session').get('access_token')
- if not self._access_token:
- self.report_warning('Unable to get access token, login may has failed')
- else:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ response = self._download_json(
+ self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
+ None, note='Verifying login token...', fatal=False,
+ data=json.dumps(payload).encode())
+
+ if token := traverse_obj(response, ('session', 'access_token', {str})):
+ self._HEADERS['Authorization'] = f'OAuth {token}'
+ self.report_login()
+ return
+
+ raise ExtractorError('Unable to get access token, login may have failed', expected=True)
'''
# signature generation
@@ -207,7 +228,7 @@ class SoundcloudBaseIE(InfoExtractor):
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
if redirect_url:
urlh = self._request_webpage(
- HEADRequest(redirect_url), track_id, fatal=False)
+ HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False)
if urlh:
format_url = urlh.url
format_urls.add(format_url)
@@ -217,6 +238,7 @@ class SoundcloudBaseIE(InfoExtractor):
'filesize': int_or_none(urlh.headers.get('Content-Length')),
'url': format_url,
'quality': 10,
+ 'format_note': 'Original',
})
def invalid_url(url):
@@ -233,9 +255,13 @@ class SoundcloudBaseIE(InfoExtractor):
format_id_list.append(protocol)
ext = f.get('ext')
if ext == 'aac':
- f['abr'] = '256'
+ f.update({
+ 'abr': 256,
+ 'quality': 5,
+ 'format_note': 'Premium',
+ })
for k in ('ext', 'abr'):
- v = f.get(k)
+ v = str_or_none(f.get(k))
if v:
format_id_list.append(v)
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
@@ -244,7 +270,7 @@ class SoundcloudBaseIE(InfoExtractor):
abr = f.get('abr')
if abr:
f['abr'] = int(abr)
- if protocol == 'hls':
+ if protocol in ('hls', 'hls-aes'):
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
else:
protocol = 'http'
@@ -256,37 +282,54 @@ class SoundcloudBaseIE(InfoExtractor):
formats.append(f)
# New API
- transcodings = try_get(
- info, lambda x: x['media']['transcodings'], list) or []
- for t in transcodings:
- if not isinstance(t, dict):
+ for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
+ if extract_flat:
+ break
+ format_url = t['url']
+
+ protocol = traverse_obj(t, ('format', 'protocol', {str}))
+ if protocol == 'progressive':
+ protocol = 'http'
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
+ protocol = 'hls-aes'
+
+ ext = None
+ if preset := traverse_obj(t, ('preset', {str_or_none})):
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
+
+ identifier = join_nonempty(protocol, ext, delim='_')
+ if not self._is_requested(identifier):
+ self.write_debug(f'"{identifier}" is not a requested format, skipping')
continue
- format_url = url_or_none(t.get('url'))
- if not format_url:
- continue
- stream = None if extract_flat else self._download_json(
- format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
+
+ stream = None
+ for retry in self.RetryManager(fatal=False):
+ try:
+ stream = self._download_json(
+ format_url, track_id, f'Downloading {identifier} format info JSON',
+ query=query, headers=self._HEADERS)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 429:
+ self.report_warning(
+ 'You have reached the API rate limit, which is ~600 requests per '
+ '10 minutes. Use the --extractor-retries and --retry-sleep options '
+ 'to configure an appropriate retry count and wait time', only_once=True)
+ retry.error = e.cause
+ else:
+ self.report_warning(e.msg)
+
+ stream_url = traverse_obj(stream, ('url', {url_or_none}))
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
- stream_format = t.get('format') or {}
- protocol = stream_format.get('protocol')
- if protocol != 'hls' and '/hls' in format_url:
- protocol = 'hls'
- ext = None
- preset = str_or_none(t.get('preset'))
- if preset:
- ext = preset.split('_')[0]
- if ext not in KNOWN_EXTENSIONS:
- ext = mimetype2ext(stream_format.get('mime_type'))
add_format({
'url': stream_url,
'ext': ext,
- }, 'http' if protocol == 'progressive' else protocol,
- t.get('snipped') or '/preview/' in format_url)
+ }, protocol, t.get('snipped') or '/preview/' in format_url)
for f in formats:
f['vcodec'] = 'none'
@@ -338,7 +381,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
+ 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'formats': formats if not extract_flat else None
}
@@ -372,10 +415,10 @@ class SoundcloudIE(SoundcloudBaseIE):
_TESTS = [
{
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'info_dict': {
'id': '62986583',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
@@ -388,6 +431,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/ethmusic',
+ 'genres': [],
}
},
# geo-restricted
@@ -395,7 +441,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'id': '47127627',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
@@ -408,6 +454,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/the-concept-band',
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
+ 'genres': ['Alternative'],
},
},
# private link
@@ -429,6 +478,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# private link (alt format)
@@ -450,6 +502,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# downloadable song
@@ -459,6 +514,21 @@ class SoundcloudIE(SoundcloudBaseIE):
'info_dict': {
'id': '343609555',
'ext': 'wav',
+ 'title': 'The Following',
+ 'description': '',
+ 'uploader': '80M',
+ 'uploader_id': '312384765',
+ 'uploader_url': 'https://soundcloud.com/the80m',
+ 'upload_date': '20170922',
+ 'timestamp': 1506120436,
+ 'duration': 397.228,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
+ 'license': 'all-rights-reserved',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'view_count': int,
+ 'genres': ['Dance & EDM'],
},
},
# private link, downloadable format
@@ -480,6 +550,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/oriuplift',
+ 'genres': ['Trance'],
},
},
# no album art, use avatar pic for thumbnail
@@ -502,6 +575,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/garyvee',
+ 'genres': [],
},
'params': {
'skip_download': True,
@@ -509,13 +584,13 @@ class SoundcloudIE(SoundcloudBaseIE):
},
{
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
- 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'md5': '8227c3473a4264df6b02ad7e5b7527ac',
'info_dict': {
'id': '583011102',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Mezzo Valzer',
- 'description': 'md5:4138d582f81866a530317bae316e8b61',
- 'uploader': 'Micronie',
+ 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
+ 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531',
'timestamp': 1551394171,
'upload_date': '20190228',
@@ -526,6 +601,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'genres': ['Piano'],
+ 'uploader_url': 'https://soundcloud.com/giovannisarani',
},
},
{
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 493eea2a69..773ddd3445 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- try_get,
- unified_timestamp
-)
+from ..utils import try_get, unified_timestamp
class SovietsClosetBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py
index 43da34a325..c73f7971d0 100644
--- a/yt_dlp/extractor/spankbang.py
+++ b/yt_dlp/extractor/spankbang.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
merge_dicts,
parse_duration,
parse_resolution,
diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py
index a98584a27d..bdb8ef4968 100644
--- a/yt_dlp/extractor/springboardplatform.py
+++ b/yt_dlp/extractor/springboardplatform.py
@@ -4,11 +4,11 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
- xpath_attr,
- xpath_text,
- xpath_element,
unescapeHTML,
unified_timestamp,
+ xpath_attr,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/stacommu.py b/yt_dlp/extractor/stacommu.py
index 1308c595da..d2f207fcc5 100644
--- a/yt_dlp/extractor/stacommu.py
+++ b/yt_dlp/extractor/stacommu.py
@@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P\w+)'
IE_NAME = 'theatercomplextown:vod'
_TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+ 'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
@@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P\w+)'
IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
+ 'only_matching': True,
}]
_API_PATH = 'events'
diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py
index 4bed4d646a..77e4362fc6 100644
--- a/yt_dlp/extractor/stageplus.py
+++ b/yt_dlp/extractor/stageplus.py
@@ -21,7 +21,7 @@ class StagePlusVODConcertIE(InfoExtractor):
'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
- 'artist': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'artists': ['Yuja Wang', 'Lorenzo Viotti'],
'upload_date': '20230331',
'timestamp': 1680249600,
'release_date': '20210709',
@@ -40,10 +40,10 @@ class StagePlusVODConcertIE(InfoExtractor):
'release_timestamp': 1625788800,
'duration': 2207,
'chapters': 'count:5',
- 'artist': ['Yuja Wang'],
- 'composer': ['Sergei Rachmaninoff'],
+ 'artists': ['Yuja Wang'],
+ 'composers': ['Sergei Rachmaninoff'],
'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
- 'album_artist': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'album_artists': ['Yuja Wang', 'Lorenzo Viotti'],
'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
'track_number': 1,
'genre': 'Instrumental Concerto',
@@ -474,7 +474,7 @@ fragment BannerFields on Banner {
metadata = traverse_obj(data, {
'title': 'title',
'description': ('shortDescription', {str}),
- 'artist': ('artists', 'edges', ..., 'node', 'name'),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
'timestamp': ('archiveReleaseDate', {unified_timestamp}),
'release_timestamp': ('productionDate', {unified_timestamp}),
})
@@ -494,7 +494,7 @@ fragment BannerFields on Banner {
'formats': formats,
'subtitles': subtitles,
'album': metadata.get('title'),
- 'album_artist': metadata.get('artist'),
+ 'album_artists': metadata.get('artist'),
'track_number': idx,
**metadata,
**traverse_obj(video, {
@@ -506,8 +506,8 @@ fragment BannerFields on Banner {
'title': 'title',
'start_time': ('mark', {float_or_none}),
}),
- 'artist': ('artists', 'edges', ..., 'node', 'name'),
- 'composer': ('work', 'composers', ..., 'name'),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
+ 'composers': ('work', 'composers', ..., 'name'),
'genre': ('work', 'genre', 'title'),
}),
})
diff --git a/yt_dlp/extractor/startrek.py b/yt_dlp/extractor/startrek.py
index e92122f9b7..94efb589c6 100644
--- a/yt_dlp/extractor/startrek.py
+++ b/yt_dlp/extractor/startrek.py
@@ -3,6 +3,7 @@ from ..utils import int_or_none, urljoin
class StarTrekIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'(?Phttps?://(?:intl|www)\.startrek\.com)/videos/(?P[^/]+)'
_TESTS = [{
'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py
index bb6e8f1ea5..312a4fde08 100644
--- a/yt_dlp/extractor/startv.py
+++ b/yt_dlp/extractor/startv.py
@@ -3,10 +3,10 @@ from ..compat import (
compat_str,
)
from ..utils import (
- clean_html,
ExtractorError,
- traverse_obj,
+ clean_html,
int_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index 7daee2fe03..63da9662ad 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -2,9 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- extract_attributes,
ExtractorError,
+ extract_attributes,
get_element_by_class,
+ str_or_none,
)
@@ -30,7 +31,6 @@ class SteamIE(InfoExtractor):
'ext': 'mp4',
'title': 'Terraria video 256785003',
'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 2,
}
},
{
@@ -39,9 +39,7 @@ class SteamIE(InfoExtractor):
'id': '2040428',
'ext': 'mp4',
'title': 'Terraria video 2040428',
- 'playlist_index': 2,
'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 2,
}
}
],
@@ -55,12 +53,10 @@ class SteamIE(InfoExtractor):
}, {
'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
'info_dict': {
- 'id': '256757115',
- 'title': 'Grand Theft Auto V video 256757115',
- 'ext': 'mp4',
- 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 20,
+ 'id': '271590',
+ 'title': 'Grand Theft Auto V',
},
+ 'playlist_count': 23,
}]
def _real_extract(self, url):
@@ -136,7 +132,7 @@ class SteamCommunityBroadcastIE(InfoExtractor):
'id': '76561199073851486',
'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
'ext': 'mp4',
- 'uploader_id': 1113585758,
+ 'uploader_id': '1113585758',
'uploader': 'pepperm!nt',
'live_status': 'is_live',
},
@@ -169,6 +165,6 @@ class SteamCommunityBroadcastIE(InfoExtractor):
'live_status': 'is_live',
'view_count': json_data.get('num_view'),
'uploader': uploader_json.get('persona_name'),
- 'uploader_id': uploader_json.get('accountid'),
+ 'uploader_id': str_or_none(uploader_json.get('accountid')),
'subtitles': subs,
}
diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py
index 2fd200f87a..46a15e6a18 100644
--- a/yt_dlp/extractor/stitcher.py
+++ b/yt_dlp/extractor/stitcher.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
clean_podcast_url,
- ExtractorError,
int_or_none,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 566f777827..20a70a7bcd 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -2,9 +2,9 @@ import functools
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
format_field,
int_or_none,
- OnDemandPagedList,
smuggle_url,
)
diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py
index 462861e0e0..c303ac53ac 100644
--- a/yt_dlp/extractor/streamable.py
+++ b/yt_dlp/extractor/streamable.py
@@ -3,8 +3,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
- try_get,
parse_codecs,
+ try_get,
)
diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py
deleted file mode 100644
index 93c42942c3..0000000000
--- a/yt_dlp/extractor/streamff.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from .common import InfoExtractor
-from ..utils import int_or_none, parse_iso8601
-
-
-class StreamFFIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P[a-zA-Z0-9]+)'
-
- _TESTS = [{
- 'url': 'https://streamff.com/v/55cc94',
- 'md5': '8745a67bb5e5c570738efe7983826370',
- 'info_dict': {
- 'id': '55cc94',
- 'ext': 'mp4',
- 'title': '55cc94',
- 'timestamp': 1634764643,
- 'upload_date': '20211020',
- 'view_count': int,
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id)
- return {
- 'id': video_id,
- 'title': json_data.get('name') or video_id,
- 'url': 'https://streamff.com/%s' % json_data['videoLink'],
- 'view_count': int_or_none(json_data.get('views')),
- 'timestamp': parse_iso8601(json_data.get('date')),
- }
diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py
index b9523c8654..a847925e47 100644
--- a/yt_dlp/extractor/stripchat.py
+++ b/yt_dlp/extractor/stripchat.py
@@ -3,7 +3,7 @@ from ..utils import (
ExtractorError,
UserNotLive,
lowercase_escape,
- traverse_obj
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 8b3e63538c..0ab7801004 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
ptype, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id, fatal=False) or ''
- props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+ props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
player_api_cache = try_get(
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py
index 708873a956..501156e513 100644
--- a/yt_dlp/extractor/sunporno.py
+++ b/yt_dlp/extractor/sunporno.py
@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
- int_or_none,
- qualities,
determine_ext,
+ int_or_none,
+ parse_duration,
+ qualities,
)
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
index afcdbf7804..29e5e573fe 100644
--- a/yt_dlp/extractor/syfy.py
+++ b/yt_dlp/extractor/syfy.py
@@ -1,11 +1,12 @@
from .adobepass import AdobePassIE
from ..utils import (
- update_url_query,
smuggle_url,
+ update_url_query,
)
class SyfyIE(AdobePassIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)'
_TESTS = [{
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py
index e23b490b00..c69c13d0bb 100644
--- a/yt_dlp/extractor/tagesschau.py
+++ b/yt_dlp/extractor/tagesschau.py
@@ -12,6 +12,7 @@ from ..utils import (
class TagesschauIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P[^/]+/(?:[^/]+/)*?(?P[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
_TESTS = [{
diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py
new file mode 100644
index 0000000000..56f2f0ef4b
--- /dev/null
+++ b/yt_dlp/extractor/taptap.py
@@ -0,0 +1,275 @@
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class TapTapBaseIE(InfoExtractor):
+ _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC'
+ _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get'
+ _INFO_API = None
+ _INFO_QUERY_KEY = 'id'
+ _DATA_PATH = None
+ _ID_PATH = None
+ _META_PATH = None
+
+ def _get_api(self, url, video_id, query, **kwargs):
+ query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())}
+ return self._download_json(url, video_id, query=query, **kwargs)['data']
+
+ def _extract_video(self, video_id):
+ video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0]
+
+ # h265 playlist contains both h265 and h264 formats
+ video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any))
+ formats = self._extract_m3u8_formats(video_url, video_id, fatal=False)
+ for format in formats:
+ if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')):
+ format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_')
+
+ return {
+ 'id': str(video_id),
+ 'formats': formats,
+ **traverse_obj(video_data, ({
+ 'duration': ('info', 'duration', {int_or_none}),
+ 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}),
+ }), get_all=False)
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = {self._INFO_QUERY_KEY: video_id}
+
+ data = traverse_obj(
+ self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH)
+
+ metainfo = traverse_obj(data, self._META_PATH)
+ entries = [{
+ **metainfo,
+ **self._extract_video(id)
+ } for id in set(traverse_obj(data, self._ID_PATH))]
+
+ return self.playlist_result(entries, **metainfo, id=video_id)
+
+
+class TapTapMomentIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail'
+ _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = ('moment', {
+ 'timestamp': ('created_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('author', 'user', 'name', {str}),
+ 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}),
+ 'title': ('topic', 'title', {str}),
+ 'description': ('topic', 'summary', {str}),
+ })
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/moment/194618230982052443',
+ 'info_dict': {
+ 'id': '194618230982052443',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2202584',
+ 'ext': 'mp4',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'duration': 66,
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/521630629209573493',
+ 'info_dict': {
+ 'id': '521630629209573493',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4006511',
+ 'ext': 'mp4',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'duration': 173,
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/540493587511511299',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '540493587511511299',
+ 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!',
+ 'description': 'md5:d60842350e686ddb242291ddfb8e39c9',
+ 'timestamp': 1715920200,
+ 'upload_date': '20240517',
+ 'modified_timestamp': 1715942225,
+ 'modified_date': '20240517',
+ 'uploader': 'TapTap 编辑',
+ 'uploader_id': '7159244',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapAppIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/app/(?P\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/app/168332',
+ 'info_dict': {
+ 'id': '168332',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4058443',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 26,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '4058462',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 295,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapIntlBase(TapTapBaseIE):
+ _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0'
+ _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get'
+
+
+class TapTapAppIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/app/(?P\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail'
+ _DATA_PATH = 'app'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/app/233287',
+ 'info_dict': {
+ 'id': '233287',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149708997',
+ 'ext': 'mp4',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ 'duration': 78,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapPostIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/post/(?P\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail'
+ _INFO_QUERY_KEY = 'id_str'
+ _DATA_PATH = 'post'
+ _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = {
+ 'timestamp': ('published_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': ('user', 'id', {int}, {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('list_fields', 'summary', {str}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/post/571785',
+ 'info_dict': {
+ 'id': '571785',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149491903',
+ 'ext': 'mp4',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'duration': 122,
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py
index 67e544a6a3..d4c5b41a71 100644
--- a/yt_dlp/extractor/tass.py
+++ b/yt_dlp/extractor/tass.py
@@ -8,6 +8,7 @@ from ..utils import (
class TassIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P\d+)'
_TESTS = [
{
diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py
index 808c6c73d3..4e178593f4 100644
--- a/yt_dlp/extractor/tbs.py
+++ b/yt_dlp/extractor/tbs.py
@@ -2,8 +2,8 @@ import re
from .turner import TurnerBaseIE
from ..compat import (
- compat_urllib_parse_urlparse,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
diff --git a/yt_dlp/extractor/tdslifeway.py b/yt_dlp/extractor/tdslifeway.py
deleted file mode 100644
index 3623a68c8c..0000000000
--- a/yt_dlp/extractor/tdslifeway.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from .common import InfoExtractor
-
-
-class TDSLifewayIE(InfoExtractor):
- _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P\d+)/index\.html'
-
- _TEST = {
- # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
- 'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb®istration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
- 'info_dict': {
- 'id': '3453494717001',
- 'ext': 'mp4',
- 'title': 'The Gospel by Numbers',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'upload_date': '20140410',
- 'description': 'Coming soon from T4G 2014!',
- 'uploader_id': '2034960640001',
- 'timestamp': 1397145591,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['BrightcoveNew'],
- }
-
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- brightcove_id = self._match_id(url)
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 01906bda9d..778fa1263d 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from .wistia import WistiaIE
from ..utils import (
- clean_html,
ExtractorError,
- int_or_none,
+ clean_html,
get_element_by_class,
+ int_or_none,
strip_or_none,
urlencode_postdata,
urljoin,
@@ -99,6 +99,7 @@ class TeachableBaseIE(InfoExtractor):
class TeachableIE(TeachableBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P[^/]+)|
diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py
index c3eec2784f..7402409936 100644
--- a/yt_dlp/extractor/teachertube.py
+++ b/yt_dlp/extractor/teachertube.py
@@ -2,13 +2,14 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
qualities,
)
class TeacherTubeIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos'
@@ -87,6 +88,7 @@ class TeacherTubeIE(InfoExtractor):
class TeacherTubeUserIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'teachertube:user:collection'
IE_DESC = 'teachertube.com user and collection videos'
diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py
index 275f6d1f92..5791292a95 100644
--- a/yt_dlp/extractor/teachingchannel.py
+++ b/yt_dlp/extractor/teachingchannel.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class TeachingChannelIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P[^/?]+)'
_TEST = {
diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py
index d32f81262a..3fb899cac5 100644
--- a/yt_dlp/extractor/teamcoco.py
+++ b/yt_dlp/extractor/teamcoco.py
@@ -13,8 +13,8 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_timestamp,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py
index dd802db5b7..ba25cdcf65 100644
--- a/yt_dlp/extractor/teamtreehouse.py
+++ b/yt_dlp/extractor/teamtreehouse.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
get_element_by_class,
get_element_by_id,
diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py
index c28a154987..0969bbb036 100644
--- a/yt_dlp/extractor/ted.py
+++ b/yt_dlp/extractor/ted.py
@@ -2,14 +2,13 @@ import itertools
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
+ parse_duration,
str_to_int,
try_get,
- url_or_none,
unified_strdate,
- parse_duration,
+ url_or_none,
)
diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py
index 212af37850..1705c2d556 100644
--- a/yt_dlp/extractor/tele13.py
+++ b/yt_dlp/extractor/tele13.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
+ determine_ext,
js_to_json,
qualities,
- determine_ext,
)
diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py
index 9260db2b44..a455375415 100644
--- a/yt_dlp/extractor/tele5.py
+++ b/yt_dlp/extractor/tele5.py
@@ -1,88 +1,77 @@
-from .dplay import DPlayIE
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- extract_attributes,
-)
+import functools
+
+from .dplay import DiscoveryPlusBaseIE
+from ..utils import join_nonempty
+from ..utils.traversal import traverse_obj
-class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P[^/?#&]+)'
- _GEO_COUNTRIES = ['DE']
+class Tele5IE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P[\w-]+)/(?P[\w-]+)(?:/(?P[\w-]+))?'
_TESTS = [{
- 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ # slug_a and slug_b
+ 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane',
'info_dict': {
- 'id': '1549416',
+ 'id': '6852024',
'ext': 'mp4',
- 'upload_date': '20180814',
- 'timestamp': 1534290623,
- 'title': 'Pandorum',
+ 'title': 'Quarantäne',
+ 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675',
+ 'episode': 'Episode 73',
+ 'episode_number': 73,
+ 'season': 'Season 4',
+ 'season_number': 4,
+ 'series': 'Stargate Atlantis',
+ 'upload_date': '20240525',
+ 'timestamp': 1716643200,
+ 'duration': 2503.2,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'No longer available: "404 Seite nicht gefunden"',
}, {
- # jwplatform, nexx unavailable
- 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ # only slug_a
+ 'url': 'https://tele5.de/mediathek/inside-out',
'info_dict': {
- 'id': 'WJuiOlUp',
+ 'id': '6819502',
'ext': 'mp4',
- 'upload_date': '20200603',
- 'timestamp': 1591214400,
- 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
- 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ 'title': 'Inside out',
+ 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd',
+ 'series': 'Inside out',
+ 'upload_date': '20240523',
+ 'timestamp': 1716494400,
+ 'duration': 5343.4,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'No longer available, redirects to Filme page',
}, {
- 'url': 'https://tele5.de/mediathek/angel-of-mine/',
+ # playlist
+ 'url': 'https://tele5.de/mediathek/schlefaz',
'info_dict': {
- 'id': '1252360',
- 'ext': 'mp4',
- 'upload_date': '20220109',
- 'timestamp': 1641762000,
- 'title': 'Angel of Mine',
- 'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
+ 'id': 'mediathek-schlefaz',
},
- 'params': {
- 'format': 'bestvideo',
- },
- }, {
- 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/anders-ist-sevda/',
- 'only_matching': True,
+ 'playlist_mincount': 3,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- player_element = self._search_regex(r'(]+?>)', webpage, 'video player')
- player_info = extract_attributes(player_element)
- asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
- endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
- source_type = player_info.get('sourcetype')
- if source_type:
- endpoint = '%s-%s' % (source_type, endpoint)
- try:
- return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
- except ExtractorError as e:
- if getattr(e, 'message', '') == 'Missing deviceId in context':
- self.report_drm(video_id)
- raise
+ parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
+ playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
+
+ query = {'environment': 'tele5', 'v': '2'}
+ if not slug_b:
+ endpoint = f'page/{slug_a}'
+ query['parent_slug'] = parent_slug
+ else:
+ endpoint = f'videos/{slug_b}'
+ query['filter[show.slug]'] = slug_a
+ cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
+
+ return self.playlist_result(map(
+ functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),
+ traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py
index 3d29dace30..a71b14c27c 100644
--- a/yt_dlp/extractor/telemb.py
+++ b/yt_dlp/extractor/telemb.py
@@ -5,6 +5,7 @@ from ..utils import remove_start
class TeleMBIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P.+?)_d_(?P\d+)\.html'
_TESTS = [
{
diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py
index 54e74a6c02..84b24dead8 100644
--- a/yt_dlp/extractor/telemundo.py
+++ b/yt_dlp/extractor/telemundo.py
@@ -4,7 +4,7 @@ from ..utils import try_get, unified_timestamp
class TelemundoIE(InfoExtractor):
-
+ _WORKING = False
_VALID_URL = r'https?:\/\/(?:www\.)?telemundo\.com\/.+?video\/[^\/]+(?Ptmvo\d{7})'
_TESTS = [{
'url': 'https://www.telemundo.com/noticias/noticias-telemundo-en-la-noche/empleo/video/esta-aplicacion-gratuita-esta-ayudando-los-latinos-encontrar-trabajo-en-estados-unidos-tmvo9829325',
diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py
index e89137269a..08a083714e 100644
--- a/yt_dlp/extractor/telequebec.py
+++ b/yt_dlp/extractor/telequebec.py
@@ -83,7 +83,7 @@ class TeleQuebecIE(TeleQuebecBaseIE):
class TeleQuebecSquatIE(InfoExtractor):
- _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P\d+)'
+ _VALID_URL = r'https?://squat\.telequebec\.tv/videos/(?P\d+)'
_TESTS = [{
'url': 'https://squat.telequebec.tv/videos/9314',
'info_dict': {
diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py
index a73dd68fba..fd831f580a 100644
--- a/yt_dlp/extractor/teletask.py
+++ b/yt_dlp/extractor/teletask.py
@@ -5,6 +5,7 @@ from ..utils import unified_strdate
class TeleTaskIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P[0-9]+)'
_TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py
index 9378ed0214..380c84d98b 100644
--- a/yt_dlp/extractor/telewebion.py
+++ b/yt_dlp/extractor/telewebion.py
@@ -1,8 +1,8 @@
from __future__ import annotations
+import functools
import json
-from functools import partial
-from textwrap import dedent
+import textwrap
from .common import InfoExtractor
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
@@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj
def _fmt_url(url):
- return partial(format_field, template=url, default=None)
+ return functools.partial(format_field, template=url, default=None)
class TelewebionIE(InfoExtractor):
@@ -88,7 +88,7 @@ class TelewebionIE(InfoExtractor):
if not video_id.startswith('0x'):
video_id = hex(int(video_id))
- episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
+ episode_data = self._call_graphql_api('getEpisodeDetail', video_id, textwrap.dedent('''
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
title
program {
@@ -127,7 +127,7 @@ class TelewebionIE(InfoExtractor):
'formats': (
'channel', 'descriptor', {str},
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
- {partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
+ {functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
}))
info_dict['id'] = video_id
return info_dict
diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py
index 9318d6f9ad..71e54eb0cf 100644
--- a/yt_dlp/extractor/tempo.py
+++ b/yt_dlp/extractor/tempo.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
- try_call
+ try_call,
)
diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py
index 6618ea4e6e..ae2cb483f7 100644
--- a/yt_dlp/extractor/tencent.py
+++ b/yt_dlp/extractor/tencent.py
@@ -8,8 +8,8 @@ from .common import InfoExtractor
from ..aes import aes_cbc_encrypt_bytes
from ..utils import (
ExtractorError,
- float_or_none,
determine_ext,
+ float_or_none,
int_or_none,
js_to_json,
traverse_obj,
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
index 7ce7cbf849..11cc5705e9 100644
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -1,7 +1,7 @@
import base64
+import datetime as dt
import functools
import itertools
-from datetime import datetime
from .common import InfoExtractor
from ..networking import HEADRequest
@@ -20,7 +20,8 @@ class TenPlayIE(InfoExtractor):
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
'duration': 186,
- 'season': 39,
+ 'season': 'Season 39',
+ 'season_number': 39,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
@@ -69,7 +70,7 @@ class TenPlayIE(InfoExtractor):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
- _timestamp = datetime.now().strftime('%Y%m%d000000')
+ _timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
@@ -108,7 +109,7 @@ class TenPlayIE(InfoExtractor):
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
- 'season': int_or_none(data.get('season')),
+ 'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py
index a231eccf4b..fb6407715c 100644
--- a/yt_dlp/extractor/theguardian.py
+++ b/yt_dlp/extractor/theguardian.py
@@ -10,7 +10,7 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_strdate,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py
index a991a4dfd0..99f0d42ef5 100644
--- a/yt_dlp/extractor/theintercept.py
+++ b/yt_dlp/extractor/theintercept.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
- int_or_none,
ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 9160f5ec6b..eeb33a6606 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -1,29 +1,27 @@
-import re
-import time
-import hmac
import binascii
import hashlib
+import hmac
+import re
+import time
-
-from .once import OnceIE
from .adobepass import AdobePassIE
-from ..networking import Request
+from .once import OnceIE
+from ..networking import HEADRequest, Request
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
+ find_xpath_attr,
float_or_none,
int_or_none,
- parse_qs,
- unsmuggle_url,
- update_url_query,
- xpath_with_ns,
mimetype2ext,
- find_xpath_attr,
+ parse_qs,
traverse_obj,
+ unsmuggle_url,
update_url,
+ update_url_query,
urlhandle_detect_ext,
+ xpath_with_ns,
)
-from ..networking import HEADRequest
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py
index 15f8380d36..fbc12d55d9 100644
--- a/yt_dlp/extractor/thisoldhouse.py
+++ b/yt_dlp/extractor/thisoldhouse.py
@@ -1,5 +1,6 @@
import json
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from .zype import ZypeIE
from ..networking import HEADRequest
@@ -8,6 +9,7 @@ from ..utils import (
ExtractorError,
filter_dict,
parse_qs,
+ smuggle_url,
try_call,
urlencode_postdata,
)
@@ -17,23 +19,43 @@ class ThisOldHouseIE(InfoExtractor):
_NETRC_MACHINE = 'thisoldhouse'
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P[^/?#]+)'
_TESTS = [{
+ # Unresolved Brightcove URL embed (formerly Zype), free
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
'info_dict': {
- 'id': '5dcdddf673c3f956ef5db202',
+ 'id': '6325298523112',
'ext': 'mp4',
'title': 'How to Build a Storage Bench',
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
- 'timestamp': 1442548800,
- 'upload_date': '20150918',
- 'duration': 674,
- 'view_count': int,
- 'average_rating': 0,
- 'thumbnail': r're:^https?://.*\.jpg\?\d+$',
- 'display_id': 'how-to-build-a-storage-bench',
+ 'timestamp': 1681793639,
+ 'upload_date': '20230418',
+ 'duration': 674.54,
+ 'tags': 'count:11',
+ 'uploader_id': '6314471934001',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': True,
},
+ }, {
+ # Brightcove embed, authwalled
+ 'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
+ 'info_dict': {
+ 'id': '6349675446112',
+ 'ext': 'mp4',
+ 'title': 'E17 | Glen Ridge Generational | Multi-Generational',
+ 'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
+ 'timestamp': 1711382202,
+ 'upload_date': '20240325',
+ 'duration': 1422.229,
+ 'tags': 'count:13',
+ 'uploader_id': '6314471934001',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'expected_warnings': ['Login with password is not supported for this website'],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires subscription',
}, {
# Page no longer has video
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
@@ -98,7 +120,15 @@ class ThisOldHouseIE(InfoExtractor):
video_url, video_id = self._search_regex(
r'