mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-18 23:03:05 +01:00
Update to ytdl-commit-cf2dbec
cf2dbec630
Except: [kakao] improve info extraction and detect geo restrictiond8085580f6
This commit is contained in:
parent
5e41dca334
commit
bc2ca1bb75
19 changed files with 1013 additions and 395 deletions
|
@ -12,6 +12,7 @@ from test.helper import FakeYDL
|
||||||
|
|
||||||
from youtube_dlc.extractor import (
|
from youtube_dlc.extractor import (
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
|
YoutubeTabIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -57,14 +58,22 @@ class TestYoutubeLists(unittest.TestCase):
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(len(entries), 100)
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_flat_playlist_titles(self):
|
def test_youtube_flat_playlist_extraction(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
dl.params['extract_flat'] = True
|
dl.params['extract_flat'] = True
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubeTabIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
|
result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
for entry in result['entries']:
|
entries = list(result['entries'])
|
||||||
self.assertTrue(entry.get('title'))
|
self.assertTrue(len(entries) == 1)
|
||||||
|
video = entries[0]
|
||||||
|
self.assertEqual(video['_type'], 'url_transparent')
|
||||||
|
self.assertEqual(video['ie_key'], 'Youtube')
|
||||||
|
self.assertEqual(video['id'], 'BaW_jenozKc')
|
||||||
|
self.assertEqual(video['url'], 'BaW_jenozKc')
|
||||||
|
self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
|
||||||
|
self.assertEqual(video['duration'], 10)
|
||||||
|
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -324,20 +324,42 @@ class ARDIE(InfoExtractor):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for a in video_node.findall('.//asset'):
|
for a in video_node.findall('.//asset'):
|
||||||
|
file_name = xpath_text(a, './fileName', default=None)
|
||||||
|
if not file_name:
|
||||||
|
continue
|
||||||
|
format_type = a.attrib.get('type')
|
||||||
|
format_url = url_or_none(file_name)
|
||||||
|
if format_url:
|
||||||
|
ext = determine_ext(file_name)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_type or 'hls', fatal=False))
|
||||||
|
continue
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
update_url_query(format_url, {'hdcore': '3.7.0'}),
|
||||||
|
display_id, f4m_id=format_type or 'hds', fatal=False))
|
||||||
|
continue
|
||||||
f = {
|
f = {
|
||||||
'format_id': a.attrib['type'],
|
'format_id': format_type,
|
||||||
'width': int_or_none(a.find('./frameWidth').text),
|
'width': int_or_none(xpath_text(a, './frameWidth')),
|
||||||
'height': int_or_none(a.find('./frameHeight').text),
|
'height': int_or_none(xpath_text(a, './frameHeight')),
|
||||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
|
||||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
|
||||||
'vcodec': a.find('./codecVideo').text,
|
'vcodec': xpath_text(a, './codecVideo'),
|
||||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
|
||||||
}
|
}
|
||||||
if a.find('./serverPrefix').text:
|
server_prefix = xpath_text(a, './serverPrefix', default=None)
|
||||||
f['url'] = a.find('./serverPrefix').text
|
if server_prefix:
|
||||||
f['playpath'] = a.find('./fileName').text
|
f.update({
|
||||||
|
'url': server_prefix,
|
||||||
|
'playpath': file_name,
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
f['url'] = a.find('./fileName').text
|
if not format_url:
|
||||||
|
continue
|
||||||
|
f['url'] = format_url
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
|
@ -7,19 +7,21 @@ from .common import InfoExtractor
|
||||||
from .gigya import GigyaBaseIE
|
from .gigya import GigyaBaseIE
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
strip_or_none,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
class CanvasIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||||
|
@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'season_number': int_or_none(page.get('episode_season')),
|
'season_number': int_or_none(page.get('episode_season')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class DagelijkseKostIE(InfoExtractor):
|
||||||
|
IE_DESC = 'dagelijksekost.een.be'
|
||||||
|
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||||
|
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||||
|
'display_id': 'hachis-parmentier-met-witloof',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hachis parmentier met witloof',
|
||||||
|
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 283.02,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = strip_or_none(get_element_by_class(
|
||||||
|
'dish-metadata__title', webpage
|
||||||
|
) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage))
|
||||||
|
|
||||||
|
description = clean_html(get_element_by_class(
|
||||||
|
'dish-description', webpage)
|
||||||
|
) or self._html_search_meta(
|
||||||
|
('description', 'twitter:description', 'og:description'),
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||||
|
group='id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||||
|
'ie_key': CanvasIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_timezone,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
@ -97,8 +99,9 @@ class CCMAIE(InfoExtractor):
|
||||||
timestamp = None
|
timestamp = None
|
||||||
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||||
try:
|
try:
|
||||||
timestamp = datetime.datetime.strptime(
|
timezone, data_utc = extract_timezone(data_utc)
|
||||||
data_utc, '%Y-%d-%mT%H:%M:%S%z').timestamp()
|
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||||
|
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||||
except TypeError:
|
except TypeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -10,11 +11,13 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DPlayIE(InfoExtractor):
|
class DPlayIE(InfoExtractor):
|
||||||
|
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?P<domain>
|
(?P<domain>
|
||||||
(?:www\.)?(?P<host>d
|
(?:www\.)?(?P<host>d
|
||||||
|
@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
|
||||||
)
|
)
|
||||||
)|
|
)|
|
||||||
(?P<subdomain_country>es|it)\.dplay\.com
|
(?P<subdomain_country>es|it)\.dplay\.com
|
||||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
)/[^/]+''' + _PATH_REGEX
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# non geo restricted, via secure api, unsigned download hls URL
|
# non geo restricted, via secure api, unsigned download hls URL
|
||||||
|
@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _process_errors(self, e, geo_countries):
|
||||||
|
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||||
|
error = info['errors'][0]
|
||||||
|
error_code = error.get('code')
|
||||||
|
if error_code == 'access.denied.geoblocked':
|
||||||
|
self.raise_geo_restricted(countries=geo_countries)
|
||||||
|
elif error_code in ('access.denied.missingpackage', 'invalid.token'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available for registered users. You may want to use --cookies.', expected=True)
|
||||||
|
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers['Authorization'] = 'Bearer ' + self._download_json(
|
||||||
|
disco_base + 'token', display_id, 'Downloading token',
|
||||||
|
query={
|
||||||
|
'realm': realm,
|
||||||
|
})['data']['attributes']['token']
|
||||||
|
|
||||||
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
|
streaming = self._download_json(
|
||||||
|
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||||
|
video_id, headers=headers)['data']['attributes']['streaming']
|
||||||
|
streaming_list = []
|
||||||
|
for format_id, format_dict in streaming.items():
|
||||||
|
streaming_list.append({
|
||||||
|
'type': format_id,
|
||||||
|
'url': format_dict.get('url'),
|
||||||
|
})
|
||||||
|
return streaming_list
|
||||||
|
|
||||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||||
geo_countries = [country.upper()]
|
geo_countries = [country.upper()]
|
||||||
self._initialize_geo_bypass({
|
self._initialize_geo_bypass({
|
||||||
'countries': geo_countries,
|
'countries': geo_countries,
|
||||||
})
|
})
|
||||||
disco_base = 'https://%s/' % disco_host
|
disco_base = 'https://%s/' % disco_host
|
||||||
token = self._download_json(
|
|
||||||
disco_base + 'token', display_id, 'Downloading token',
|
|
||||||
query={
|
|
||||||
'realm': realm,
|
|
||||||
})['data']['attributes']['token']
|
|
||||||
headers = {
|
headers = {
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Authorization': 'Bearer ' + token,
|
|
||||||
}
|
}
|
||||||
video = self._download_json(
|
self._update_disco_api_headers(headers, disco_base, display_id, realm)
|
||||||
disco_base + 'content/videos/' + display_id, display_id,
|
try:
|
||||||
headers=headers, query={
|
video = self._download_json(
|
||||||
'fields[channel]': 'name',
|
disco_base + 'content/videos/' + display_id, display_id,
|
||||||
'fields[image]': 'height,src,width',
|
headers=headers, query={
|
||||||
'fields[show]': 'name',
|
'fields[channel]': 'name',
|
||||||
'fields[tag]': 'name',
|
'fields[image]': 'height,src,width',
|
||||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
'fields[show]': 'name',
|
||||||
'include': 'images,primaryChannel,show,tags'
|
'fields[tag]': 'name',
|
||||||
})
|
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||||
|
'include': 'images,primaryChannel,show,tags'
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
self._process_errors(e, geo_countries)
|
||||||
|
raise
|
||||||
video_id = video['data']['id']
|
video_id = video['data']['id']
|
||||||
info = video['data']['attributes']
|
info = video['data']['attributes']
|
||||||
title = info['name'].strip()
|
title = info['name'].strip()
|
||||||
formats = []
|
formats = []
|
||||||
try:
|
try:
|
||||||
streaming = self._download_json(
|
streaming = self._download_video_playback_info(
|
||||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
disco_base, video_id, headers)
|
||||||
display_id, headers=headers)['data']['attributes']['streaming']
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
self._process_errors(e, geo_countries)
|
||||||
error = info['errors'][0]
|
|
||||||
error_code = error.get('code')
|
|
||||||
if error_code == 'access.denied.geoblocked':
|
|
||||||
self.raise_geo_restricted(countries=geo_countries)
|
|
||||||
elif error_code == 'access.denied.missingpackage':
|
|
||||||
self.raise_login_required()
|
|
||||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
|
||||||
raise
|
raise
|
||||||
for format_id, format_dict in streaming.items():
|
for format_dict in streaming:
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = format_dict.get('url')
|
format_url = format_dict.get('url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
format_id = format_dict.get('type')
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if format_id == 'dash' or ext == 'mpd':
|
if format_id == 'dash' or ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
@ -248,7 +274,7 @@ class DPlayIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': info.get('description'),
|
'description': strip_or_none(info.get('description')),
|
||||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||||
'series': series,
|
'series': series,
|
||||||
|
@ -268,3 +294,75 @@ class DPlayIE(InfoExtractor):
|
||||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||||
return self._get_disco_api_info(
|
return self._get_disco_api_info(
|
||||||
url, display_id, host, 'dplay' + country, country)
|
url, display_id, host, 'dplay' + country, country)
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryPlusIE(DPlayIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1140794',
|
||||||
|
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Food and Family',
|
||||||
|
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||||
|
'duration': 2583.113,
|
||||||
|
'timestamp': 1609304400,
|
||||||
|
'upload_date': '20201230',
|
||||||
|
'creator': 'HGTV',
|
||||||
|
'series': 'Property Brothers: Forever Home',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'skip': 'Available for Premium users',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||||
|
|
||||||
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
|
return self._download_json(
|
||||||
|
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||||
|
video_id, headers=headers, data=json.dumps({
|
||||||
|
'deviceInfo': {
|
||||||
|
'adBlocker': False,
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
'wisteriaProperties': {
|
||||||
|
'platform': 'desktop',
|
||||||
|
},
|
||||||
|
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||||
|
|
||||||
|
|
||||||
|
class HGTVDeIE(DPlayIE):
|
||||||
|
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '151205',
|
||||||
|
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wer braucht schon eine Toilette',
|
||||||
|
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||||
|
'duration': 1177.024,
|
||||||
|
'timestamp': 1595705400,
|
||||||
|
'upload_date': '20200725',
|
||||||
|
'creator': 'HGTV',
|
||||||
|
'series': 'Tiny House - klein, aber oho',
|
||||||
|
'season_number': 3,
|
||||||
|
'episode_number': 3,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||||
|
|
193
youtube_dlc/extractor/dreisat.py
Normal file
193
youtube_dlc/extractor/dreisat.py
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
xpath_text,
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DreiSatIE(InfoExtractor):
|
||||||
|
IE_NAME = '3sat'
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||||
|
'md5': 'be37228896d30a88f315b638900a026e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45918',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Waidmannsheil',
|
||||||
|
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||||
|
'uploader': 'SCHWEIZWEIT',
|
||||||
|
'uploader_id': '100000210',
|
||||||
|
'upload_date': '20140913'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 downloads
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
|
param_groups = {}
|
||||||
|
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
||||||
|
group_id = param_group.get(self._xpath_ns(
|
||||||
|
'id', 'http://www.w3.org/XML/1998/namespace'))
|
||||||
|
params = {}
|
||||||
|
for param in param_group:
|
||||||
|
params[param.get('name')] = param.get('value')
|
||||||
|
param_groups[group_id] = params
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||||
|
src = video.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
|
group_id = video.get('paramGroup')
|
||||||
|
param_group = param_groups[group_id]
|
||||||
|
for proto in param_group['protocols'].split(','):
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s' % (proto, param_group['host']),
|
||||||
|
'app': param_group['app'],
|
||||||
|
'play_path': src,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': '%s-%d' % (proto, bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def extract_from_xml_url(self, video_id, xml_url):
|
||||||
|
doc = self._download_xml(
|
||||||
|
xml_url, video_id,
|
||||||
|
note='Downloading video info',
|
||||||
|
errnote='Failed to download video info')
|
||||||
|
|
||||||
|
status_code = xpath_text(doc, './status/statuscode')
|
||||||
|
if status_code and status_code != 'ok':
|
||||||
|
if status_code == 'notVisibleAnymore':
|
||||||
|
message = 'Video %s is not available' % video_id
|
||||||
|
else:
|
||||||
|
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
|
title = xpath_text(doc, './/information/title', 'title', True)
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
formats = []
|
||||||
|
for fnode in doc.findall('.//formitaeten/formitaet'):
|
||||||
|
video_url = xpath_text(fnode, 'url')
|
||||||
|
if not video_url or video_url in urls:
|
||||||
|
continue
|
||||||
|
urls.append(video_url)
|
||||||
|
|
||||||
|
is_available = 'http://www.metafilegenerator' not in video_url
|
||||||
|
geoloced = 'static_geoloced_online' in video_url
|
||||||
|
if not is_available or geoloced:
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_id = fnode.attrib['basetype']
|
||||||
|
format_m = re.match(r'''(?x)
|
||||||
|
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||||
|
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||||
|
''', format_id)
|
||||||
|
|
||||||
|
ext = determine_ext(video_url, None) or format_m.group('container')
|
||||||
|
|
||||||
|
if ext == 'meta':
|
||||||
|
continue
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
video_url, video_id, fatal=False))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
# the certificates are misconfigured (see
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/8665)
|
||||||
|
if video_url.startswith('https://'):
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
quality = xpath_text(fnode, './quality')
|
||||||
|
if quality:
|
||||||
|
format_id += '-' + quality
|
||||||
|
|
||||||
|
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
||||||
|
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
||||||
|
|
||||||
|
tbr = int_or_none(self._search_regex(
|
||||||
|
r'_(\d+)k', video_url, 'bitrate', None))
|
||||||
|
if tbr and vbr and not abr:
|
||||||
|
abr = tbr - vbr
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'acodec': format_m.group('acodec'),
|
||||||
|
'vcodec': format_m.group('vcodec'),
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'tbr': tbr,
|
||||||
|
'width': int_or_none(xpath_text(fnode, './width')),
|
||||||
|
'height': int_or_none(xpath_text(fnode, './height')),
|
||||||
|
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
||||||
|
'protocol': format_m.group('proto').lower(),
|
||||||
|
})
|
||||||
|
|
||||||
|
geolocation = xpath_text(doc, './/details/geolocation')
|
||||||
|
if not formats and geolocation and geolocation != 'none':
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for node in doc.findall('.//teaserimages/teaserimage'):
|
||||||
|
thumbnail_url = node.text
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnail = {
|
||||||
|
'url': thumbnail_url,
|
||||||
|
}
|
||||||
|
thumbnail_key = node.get('key')
|
||||||
|
if thumbnail_key:
|
||||||
|
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||||
|
if m:
|
||||||
|
thumbnail['width'] = int(m.group(1))
|
||||||
|
thumbnail['height'] = int(m.group(2))
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': xpath_text(doc, './/information/detail'),
|
||||||
|
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
||||||
|
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
||||||
|
return self.extract_from_xml_url(video_id, details_url)
|
|
@ -182,6 +182,7 @@ from .canvas import (
|
||||||
CanvasIE,
|
CanvasIE,
|
||||||
CanvasEenIE,
|
CanvasEenIE,
|
||||||
VrtNUIE,
|
VrtNUIE,
|
||||||
|
DagelijkseKostIE,
|
||||||
)
|
)
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
|
@ -309,7 +310,12 @@ from .douyutv import (
|
||||||
DouyuShowIE,
|
DouyuShowIE,
|
||||||
DouyuTVIE,
|
DouyuTVIE,
|
||||||
)
|
)
|
||||||
from .dplay import DPlayIE
|
from .dplay import (
|
||||||
|
DPlayIE,
|
||||||
|
DiscoveryPlusIE,
|
||||||
|
HGTVDeIE,
|
||||||
|
)
|
||||||
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import (
|
from .drtv import (
|
||||||
|
@ -1107,6 +1113,11 @@ from .shared import (
|
||||||
VivoIE,
|
VivoIE,
|
||||||
)
|
)
|
||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
|
from .simplecast import (
|
||||||
|
SimplecastIE,
|
||||||
|
SimplecastEpisodeIE,
|
||||||
|
SimplecastPodcastIE,
|
||||||
|
)
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
from .skyit import (
|
from .skyit import (
|
||||||
|
@ -1165,11 +1176,6 @@ from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
)
|
)
|
||||||
from .storyfire import (
|
|
||||||
StoryFireIE,
|
|
||||||
StoryFireUserIE,
|
|
||||||
StoryFireSeriesIE,
|
|
||||||
)
|
|
||||||
from .stitcher import StitcherIE
|
from .stitcher import StitcherIE
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
|
@ -1193,6 +1199,11 @@ from .srgssr import (
|
||||||
from .srmediathek import SRMediathekIE
|
from .srmediathek import SRMediathekIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
|
from .storyfire import (
|
||||||
|
StoryFireIE,
|
||||||
|
StoryFireUserIE,
|
||||||
|
StoryFireSeriesIE,
|
||||||
|
)
|
||||||
from .streamable import StreamableIE
|
from .streamable import StreamableIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
|
@ -1652,6 +1663,7 @@ from .zattoo import (
|
||||||
ZattooLiveIE,
|
ZattooLiveIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
|
from .zhihu import ZhihuIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import ZingMp3IE
|
||||||
from .zoom import ZoomIE
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|
|
@ -133,6 +133,7 @@ from .bitchute import BitChuteIE
|
||||||
from .rumble import RumbleEmbedIE
|
from .rumble import RumbleEmbedIE
|
||||||
from .arcpublishing import ArcPublishingIE
|
from .arcpublishing import ArcPublishingIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
|
from .simplecast import SimplecastIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -2240,6 +2241,15 @@ class GenericIE(InfoExtractor):
|
||||||
'duration': 159,
|
'duration': 159,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Simplecast player embed
|
||||||
|
'url': 'https://www.bio.org/podcast',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'podcast',
|
||||||
|
'title': 'I AM BIO Podcast | BIO',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 52,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -2794,6 +2804,12 @@ class GenericIE(InfoExtractor):
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
|
|
||||||
|
# Look for Simplecast embeds
|
||||||
|
simplecast_urls = SimplecastIE._extract_urls(webpage)
|
||||||
|
if simplecast_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
simplecast_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for BBC iPlayer embed
|
# Look for BBC iPlayer embed
|
||||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
|
|
|
@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,7 +15,7 @@ class NineGagIE(InfoExtractor):
|
||||||
IE_NAME = '9gag'
|
IE_NAME = '9gag'
|
||||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ae5Ag7B',
|
'id': 'ae5Ag7B',
|
||||||
|
@ -29,7 +30,11 @@ class NineGagIE(InfoExtractor):
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# HTML escaped title
|
||||||
|
'url': 'https://9gag.com/gag/av5nvyb',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
post_id = self._match_id(url)
|
post_id = self._match_id(url)
|
||||||
|
@ -43,7 +48,7 @@ class NineGagIE(InfoExtractor):
|
||||||
'The given url does not contain a video',
|
'The given url does not contain a video',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
title = post['title']
|
title = unescapeHTML(post['title'])
|
||||||
|
|
||||||
duration = None
|
duration = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
160
youtube_dlc/extractor/simplecast.py
Normal file
160
youtube_dlc/extractor/simplecast.py
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_podcast_url,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastBaseIE(InfoExtractor):
|
||||||
|
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||||
|
_API_BASE = 'https://api.simplecast.com/'
|
||||||
|
|
||||||
|
def _call_api(self, path_tmpl, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE + path_tmpl % video_id, video_id)
|
||||||
|
|
||||||
|
def _call_search_api(self, resource, resource_id, resource_url):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.simplecast.com/%ss/search' % resource, resource_id,
|
||||||
|
data=urlencode_postdata({'url': resource_url}))
|
||||||
|
|
||||||
|
def _parse_episode(self, episode):
|
||||||
|
episode_id = episode['id']
|
||||||
|
title = episode['title'].strip()
|
||||||
|
audio_file = episode.get('audio_file') or {}
|
||||||
|
audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
|
||||||
|
|
||||||
|
season = episode.get('season') or {}
|
||||||
|
season_href = season.get('href')
|
||||||
|
season_id = None
|
||||||
|
if season_href:
|
||||||
|
season_id = self._search_regex(
|
||||||
|
r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
|
||||||
|
season_href, 'season id', default=None)
|
||||||
|
|
||||||
|
webpage_url = episode.get('episode_url')
|
||||||
|
channel_url = None
|
||||||
|
if webpage_url:
|
||||||
|
channel_url = self._search_regex(
|
||||||
|
r'(https?://[^/]+\.simplecast\.com)',
|
||||||
|
webpage_url, 'channel url', default=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'display_id': episode.get('slug'),
|
||||||
|
'title': title,
|
||||||
|
'url': clean_podcast_url(audio_file_url),
|
||||||
|
'webpage_url': webpage_url,
|
||||||
|
'channel_url': channel_url,
|
||||||
|
'series': try_get(episode, lambda x: x['podcast']['title']),
|
||||||
|
'season_number': int_or_none(season.get('number')),
|
||||||
|
'season_id': season_id,
|
||||||
|
'thumbnail': episode.get('image_url'),
|
||||||
|
'episode_id': episode_id,
|
||||||
|
'episode_number': int_or_none(episode.get('number')),
|
||||||
|
'description': strip_or_none(episode.get('description')),
|
||||||
|
'timestamp': parse_iso8601(episode.get('published_at')),
|
||||||
|
'duration': int_or_none(episode.get('duration')),
|
||||||
|
'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast'
|
||||||
|
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
|
||||||
|
_COMMON_TEST_INFO = {
|
||||||
|
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
|
||||||
|
'season_number': 1,
|
||||||
|
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
|
||||||
|
'series': 'The RE:BIND.io Podcast',
|
||||||
|
'duration': 5343,
|
||||||
|
'timestamp': 1580979475,
|
||||||
|
'upload_date': '20200206',
|
||||||
|
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
|
||||||
|
}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||||
|
'info_dict': _COMMON_TEST_INFO,
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'''(?x)<iframe[^>]+src=["\']
|
||||||
|
(
|
||||||
|
https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
|
||||||
|
player\.simplecast\.com/%s
|
||||||
|
))''' % SimplecastBaseIE._UUID_REGEX, webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
episode = self._call_api('episodes/%s', episode_id)
|
||||||
|
return self._parse_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastEpisodeIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast:episode'
|
||||||
|
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||||
|
'info_dict': SimplecastIE._COMMON_TEST_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
episode = self._call_search_api(
|
||||||
|
'episode', mobj.group(1), mobj.group(0))
|
||||||
|
return self._parse_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastPodcastIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast:podcast'
|
||||||
|
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com',
|
||||||
|
'playlist_mincount': 33,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
|
||||||
|
'title': 'The RE:BIND.io Podcast',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
subdomain = self._match_id(url)
|
||||||
|
site = self._call_search_api('site', subdomain, url)
|
||||||
|
podcast = site['podcast']
|
||||||
|
podcast_id = podcast['id']
|
||||||
|
podcast_title = podcast.get('title')
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
episodes = self._call_api('podcasts/%s/episodes', podcast_id)
|
||||||
|
for episode in (episodes.get('collection') or []):
|
||||||
|
info = self._parse_episode(episode)
|
||||||
|
info['series'] = podcast_title
|
||||||
|
yield info
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), podcast_id, podcast_title)
|
|
@ -1,255 +1,151 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
# HEADRequest,
|
||||||
|
int_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
|
smuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class StoryFireIE(InfoExtractor):
|
class StoryFireBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)'
|
_VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
|
||||||
_TESTS = [{
|
|
||||||
|
def _call_api(self, path, video_id, resource, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
|
||||||
|
'Downloading %s JSON metadata' % resource, query=query)
|
||||||
|
|
||||||
|
def _parse_video(self, video):
|
||||||
|
title = video['title']
|
||||||
|
vimeo_id = self._search_regex(
|
||||||
|
r'https?://player\.vimeo\.com/external/(\d+)',
|
||||||
|
video['vimeoVideoURL'], 'vimeo id')
|
||||||
|
|
||||||
|
# video_url = self._request_webpage(
|
||||||
|
# HEADRequest(video['vimeoVideoURL']), video_id).geturl()
|
||||||
|
# formats = []
|
||||||
|
# for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
|
||||||
|
# formats.extend(self._extract_m3u8_formats(
|
||||||
|
# v_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
# m3u8_id='hls' + suffix, fatal=False))
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# v_url.replace('.m3u8', '.mpd'), video_id,
|
||||||
|
# mpd_id='dash' + suffix, fatal=False))
|
||||||
|
# self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader_id = video.get('hostID')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': vimeo_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'url': smuggle_url(
|
||||||
|
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': 'https://storyfire.com/',
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
# 'formats': formats,
|
||||||
|
'thumbnail': video.get('storyImage'),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'like_count': int_or_none(video.get('likesCount')),
|
||||||
|
'comment_count': int_or_none(video.get('commentsCount')),
|
||||||
|
'duration': int_or_none(video.get('videoDuration')),
|
||||||
|
'timestamp': int_or_none(video.get('publishDate')),
|
||||||
|
'uploader': video.get('username'),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
|
||||||
|
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class StoryFireIE(StoryFireBaseIE):
|
||||||
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
|
||||||
|
_TEST = {
|
||||||
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
||||||
'md5': '560953bfca81a69003cfa5e53ac8a920',
|
'md5': 'caec54b9e4621186d6079c7ec100c1eb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5df1d132b6378700117f9181',
|
'id': '378954662',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Buzzfeed Teaches You About Memes',
|
'title': 'Buzzfeed Teaches You About Memes',
|
||||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
||||||
'timestamp': 1576129028,
|
'timestamp': 1576129028,
|
||||||
'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies',
|
'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
|
||||||
'uploader': 'whang!',
|
'uploader': 'whang!',
|
||||||
'upload_date': '20191212',
|
'upload_date': '20191212',
|
||||||
|
'duration': 418,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'params': {'format': 'bestvideo'} # There are no merged formats in the playlist.
|
'params': {
|
||||||
}, {
|
'skip_download': True,
|
||||||
'url': 'https://storyfire.app.link/5GxAvWOQr8', # Alternate URL format, with unrelated short ID
|
|
||||||
'md5': '7a2dc6d60c4889edfed459c620fe690d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5f1e11ecd78a57b6c702001d',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'title': 'Weird Nintendo Prototype Leaks',
|
|
||||||
'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis',
|
|
||||||
'timestamp': 1595808576,
|
|
||||||
'upload_date': '20200727',
|
|
||||||
'uploader': 'whang!',
|
|
||||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
|
||||||
},
|
},
|
||||||
'params': {'format': 'bestaudio'} # Verifying audio extraction
|
'expected_warnings': ['Unable to download JSON metadata']
|
||||||
|
|
||||||
}]
|
|
||||||
|
|
||||||
_aformats = {
|
|
||||||
'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10},
|
|
||||||
'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video = self._call_api(
|
||||||
|
'generic/video-detail', video_id, 'video')['video']
|
||||||
# Extracting the json blob is mandatory to proceed with extraction.
|
return self._parse_video(video)
|
||||||
jsontext = self._html_search_regex(
|
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>',
|
|
||||||
webpage, 'json_data')
|
|
||||||
|
|
||||||
json = self._parse_json(jsontext, video_id)
|
|
||||||
|
|
||||||
# The currentVideo field in the json is mandatory
|
|
||||||
# because it contains the only link to the m3u playlist
|
|
||||||
video = json['props']['initialState']['video']['currentVideo']
|
|
||||||
videourl = video['vimeoVideoURL'] # Video URL is mandatory
|
|
||||||
|
|
||||||
# Extract other fields from the json in an error tolerant fashion
|
|
||||||
# ID may be incorrect (on short URL format), correct it.
|
|
||||||
parsed_id = video.get('_id')
|
|
||||||
if parsed_id:
|
|
||||||
video_id = parsed_id
|
|
||||||
|
|
||||||
title = video.get('title')
|
|
||||||
description = video.get('description')
|
|
||||||
|
|
||||||
thumbnail = video.get('storyImage')
|
|
||||||
views = video.get('views')
|
|
||||||
likes = video.get('likesCount')
|
|
||||||
comments = video.get('commentsCount')
|
|
||||||
duration = video.get('videoDuration')
|
|
||||||
publishdate = video.get('publishDate') # Apparently epoch time, day only
|
|
||||||
|
|
||||||
uploader = video.get('username')
|
|
||||||
uploader_id = video.get('hostID')
|
|
||||||
# Construct an uploader URL
|
|
||||||
uploader_url = None
|
|
||||||
if uploader_id:
|
|
||||||
uploader_url = "https://storyfire.com/user/%s/video" % uploader_id
|
|
||||||
|
|
||||||
# Collect root playlist to determine formats
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
videourl, video_id, 'mp4', 'm3u8_native')
|
|
||||||
|
|
||||||
# Modify formats to fill in missing information about audio codecs
|
|
||||||
for format in formats:
|
|
||||||
aformat = self._aformats.get(format['format_id'])
|
|
||||||
if aformat:
|
|
||||||
format['acodec'] = aformat['acodec']
|
|
||||||
format['abr'] = aformat['abr']
|
|
||||||
format['quality'] = aformat['preference']
|
|
||||||
format['ext'] = 'm4a'
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'ext': "mp4",
|
|
||||||
'url': videourl,
|
|
||||||
'formats': formats,
|
|
||||||
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'view_count': views,
|
|
||||||
'like_count': likes,
|
|
||||||
'comment_count': comments,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': publishdate,
|
|
||||||
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'uploader_url': uploader_url,
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class StoryFireUserIE(InfoExtractor):
|
class StoryFireUserIE(StoryFireBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video'
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
|
||||||
'title': 'whang!',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 18
|
|
||||||
}, {
|
|
||||||
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
||||||
'title': 'McJuggerNuggets',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 143
|
'playlist_mincount': 151,
|
||||||
|
}
|
||||||
|
_PAGE_SIZE = 20
|
||||||
|
|
||||||
}]
|
def _fetch_page(self, user_id, page):
|
||||||
|
videos = self._call_api(
|
||||||
# Generator for fetching playlist items
|
'publicVideos', user_id, 'page %d' % (page + 1), {
|
||||||
def _enum_videos(self, baseurl, user_id, firstjson):
|
'skip': page * self._PAGE_SIZE,
|
||||||
totalVideos = int(firstjson['videosCount'])
|
})['videos']
|
||||||
haveVideos = 0
|
for video in videos:
|
||||||
json = firstjson
|
yield self._parse_video(video)
|
||||||
|
|
||||||
for page in itertools.count(1):
|
|
||||||
for video in json['videos']:
|
|
||||||
id = video['_id']
|
|
||||||
url = "https://storyfire.com/video-details/%s" % id
|
|
||||||
haveVideos += 1
|
|
||||||
yield {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': id,
|
|
||||||
'url': url,
|
|
||||||
'ie_key': 'StoryFire',
|
|
||||||
|
|
||||||
'title': video.get('title'),
|
|
||||||
'description': video.get('description'),
|
|
||||||
'view_count': video.get('views'),
|
|
||||||
'comment_count': video.get('commentsCount'),
|
|
||||||
'duration': video.get('videoDuration'),
|
|
||||||
'timestamp': video.get('publishDate'),
|
|
||||||
}
|
|
||||||
# Are there more pages we could fetch?
|
|
||||||
if haveVideos < totalVideos:
|
|
||||||
pageurl = baseurl + ("%i" % haveVideos)
|
|
||||||
json = self._download_json(pageurl, user_id,
|
|
||||||
note='Downloading page %s' % page)
|
|
||||||
|
|
||||||
# Are there any videos in the new json?
|
|
||||||
videos = json.get('videos')
|
|
||||||
if not videos or len(videos) == 0:
|
|
||||||
break # no videos
|
|
||||||
|
|
||||||
else:
|
|
||||||
break # We have fetched all the videos, stop
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
user_id = self._match_id(url)
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id
|
self._fetch_page, user_id), self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(entries, user_id)
|
||||||
# Download first page to ensure it can be downloaded, and get user information if available.
|
|
||||||
firstpage = baseurl + "0"
|
|
||||||
firstjson = self._download_json(firstpage, user_id)
|
|
||||||
|
|
||||||
title = None
|
|
||||||
videos = firstjson.get('videos')
|
|
||||||
if videos and len(videos):
|
|
||||||
title = videos[1].get('username')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'entries': self._enum_videos(baseurl, user_id, firstjson),
|
|
||||||
'id': user_id,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class StoryFireSeriesIE(InfoExtractor):
|
class StoryFireSeriesIE(StoryFireBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)'
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-Lq6MsuIHLODO6d2dDkr',
|
'id': '-Lq6MsuIHLODO6d2dDkr',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13
|
'playlist_mincount': 13,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'the_mortal_one',
|
'id': 'the_mortal_one',
|
||||||
},
|
},
|
||||||
'playlist_count': 0 # This playlist has entries, but no videos.
|
'playlist_count': 0,
|
||||||
}, {
|
|
||||||
'url': 'https://storyfire.com/write/series/stories/story_time',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'story_time',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# Generator for returning playlist items
|
def _extract_videos(self, stories):
|
||||||
# This object is substantially different than the one in the user videos page above
|
for story in stories.values():
|
||||||
def _enum_videos(self, jsonlist):
|
if story.get('hasVideo'):
|
||||||
for video in jsonlist:
|
yield self._parse_video(story)
|
||||||
id = video['_id']
|
|
||||||
if video.get('hasVideo'): # Boolean element
|
|
||||||
url = "https://storyfire.com/video-details/%s" % id
|
|
||||||
yield {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': id,
|
|
||||||
'url': url,
|
|
||||||
'ie_key': 'StoryFire',
|
|
||||||
|
|
||||||
'title': video.get('title'),
|
|
||||||
'description': video.get('description'),
|
|
||||||
'view_count': video.get('views'),
|
|
||||||
'likes_count': video.get('likesCount'),
|
|
||||||
'comment_count': video.get('commentsCount'),
|
|
||||||
'duration': video.get('videoDuration'),
|
|
||||||
'timestamp': video.get('publishDate'),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
series_id = self._match_id(url)
|
||||||
|
stories = self._call_api(
|
||||||
listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id
|
'seriesStories', series_id, 'series stories')
|
||||||
json = self._download_json(listurl, list_id)
|
return self.playlist_result(self._extract_videos(stories), series_id)
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'entries': self._enum_videos(json),
|
|
||||||
'id': list_id
|
|
||||||
}
|
|
||||||
|
|
|
@ -4,21 +4,22 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
qualities,
|
qualities,
|
||||||
random_birthday,
|
random_birthday,
|
||||||
try_get,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VideoPressIE(InfoExtractor):
|
class VideoPressIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
_ID_REGEX = r'[\da-zA-Z]{8}'
|
||||||
|
_PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
|
||||||
|
_VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://videopress.com/embed/kUJmAcSf',
|
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||||
'md5': '706956a6c875873d51010921310e4bc6',
|
'md5': '706956a6c875873d51010921310e4bc6',
|
||||||
|
@ -36,35 +37,36 @@ class VideoPressIE(InfoExtractor):
|
||||||
# 17+, requires birth_* params
|
# 17+, requires birth_* params
|
||||||
'url': 'https://videopress.com/embed/iH3gstfZ',
|
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.wordpress.com/embed/kUJmAcSf',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||||
|
query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||||
video_id, query=query)
|
video_id, query=query)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
def base_url(scheme):
|
file_url_base = video.get('file_url_base') or {}
|
||||||
return try_get(
|
base_url = file_url_base.get('https') or file_url_base.get('http')
|
||||||
video, lambda x: x['file_url_base'][scheme], compat_str)
|
|
||||||
|
|
||||||
base_url = base_url('https') or base_url('http')
|
|
||||||
|
|
||||||
QUALITIES = ('std', 'dvd', 'hd')
|
QUALITIES = ('std', 'dvd', 'hd')
|
||||||
quality = qualities(QUALITIES)
|
quality = qualities(QUALITIES)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, f in video['files'].items():
|
for format_id, f in (video.get('files') or {}).items():
|
||||||
if not isinstance(f, dict):
|
if not isinstance(f, dict):
|
||||||
continue
|
continue
|
||||||
for ext, path in f.items():
|
for ext, path in f.items():
|
||||||
|
@ -75,12 +77,14 @@ class VideoPressIE(InfoExtractor):
|
||||||
'ext': determine_ext(path, ext),
|
'ext': determine_ext(path, ext),
|
||||||
'quality': quality(format_id),
|
'quality': quality(format_id),
|
||||||
})
|
})
|
||||||
original_url = try_get(video, lambda x: x['original'], compat_str)
|
original_url = video.get('original')
|
||||||
if original_url:
|
if original_url:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': original_url,
|
'url': original_url,
|
||||||
'format_id': 'original',
|
'format_id': 'original',
|
||||||
'quality': len(QUALITIES),
|
'quality': len(QUALITIES),
|
||||||
|
'width': int_or_none(video.get('width')),
|
||||||
|
'height': int_or_none(video.get('height')),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,7 +43,7 @@ class VikiBaseIE(InfoExtractor):
|
||||||
_ERRORS = {
|
_ERRORS = {
|
||||||
'geo': 'Sorry, this content is not available in your region.',
|
'geo': 'Sorry, this content is not available in your region.',
|
||||||
'upcoming': 'Sorry, this content is not yet available.',
|
'upcoming': 'Sorry, this content is not yet available.',
|
||||||
# 'paywall': 'paywall',
|
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||||
|
@ -94,11 +95,13 @@ class VikiBaseIE(InfoExtractor):
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _check_errors(self, data):
|
def _check_errors(self, data):
|
||||||
for reason, status in data.get('blocking', {}).items():
|
for reason, status in (data.get('blocking') or {}).items():
|
||||||
if status and reason in self._ERRORS:
|
if status and reason in self._ERRORS:
|
||||||
message = self._ERRORS[reason]
|
message = self._ERRORS[reason]
|
||||||
if reason == 'geo':
|
if reason == 'geo':
|
||||||
self.raise_geo_restricted(msg=message)
|
self.raise_geo_restricted(msg=message)
|
||||||
|
elif reason == 'paywall':
|
||||||
|
self.raise_login_required(message)
|
||||||
raise ExtractorError('%s said: %s' % (
|
raise ExtractorError('%s said: %s' % (
|
||||||
self.IE_NAME, message), expected=True)
|
self.IE_NAME, message), expected=True)
|
||||||
|
|
||||||
|
@ -143,13 +146,19 @@ class VikiIE(VikiBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1023585v',
|
'id': '1023585v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Heirs Episode 14',
|
'title': 'Heirs - Episode 14',
|
||||||
'uploader': 'SBS',
|
'uploader': 'SBS Contents Hub',
|
||||||
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
'timestamp': 1385047627,
|
||||||
'upload_date': '20131121',
|
'upload_date': '20131121',
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
|
'duration': 3570,
|
||||||
|
'episode_number': 14,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'skip': 'Blocked in the US',
|
'skip': 'Blocked in the US',
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# clip
|
# clip
|
||||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||||
|
@ -165,7 +174,8 @@ class VikiIE(VikiBaseIE):
|
||||||
'uploader': 'Arirang TV',
|
'uploader': 'Arirang TV',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
}
|
},
|
||||||
|
'skip': 'Sorry. There was an error loading this video',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -183,7 +193,7 @@ class VikiIE(VikiBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# episode
|
# episode
|
||||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||||
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
'md5': '0a53dc252e6e690feccd756861495a8c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '44699v',
|
'id': '44699v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -195,6 +205,10 @@ class VikiIE(VikiBaseIE):
|
||||||
'uploader': 'group8',
|
'uploader': 'group8',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
|
@ -221,7 +235,7 @@ class VikiIE(VikiBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# non-English description
|
# non-English description
|
||||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||||
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '158036v',
|
'id': '158036v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -232,6 +246,10 @@ class VikiIE(VikiBaseIE):
|
||||||
'title': 'Love In Magic',
|
'title': 'Love In Magic',
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -249,22 +267,19 @@ class VikiIE(VikiBaseIE):
|
||||||
self._check_errors(video)
|
self._check_errors(video)
|
||||||
|
|
||||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
||||||
|
episode_number = int_or_none(video.get('number'))
|
||||||
if not title:
|
if not title:
|
||||||
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
|
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||||
container_titles = video.get('container', {}).get('titles', {})
|
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||||
container_title = self.dict_selection(container_titles, 'en')
|
container_title = self.dict_selection(container_titles, 'en')
|
||||||
title = '%s - %s' % (container_title, title)
|
title = '%s - %s' % (container_title, title)
|
||||||
|
|
||||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||||
|
|
||||||
duration = int_or_none(video.get('duration'))
|
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||||
timestamp = parse_iso8601(video.get('created_at'))
|
|
||||||
uploader = video.get('author')
|
|
||||||
like_count = int_or_none(video.get('likes', {}).get('count'))
|
|
||||||
age_limit = parse_age_limit(video.get('rating'))
|
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for thumbnail_id, thumbnail in video.get('images', {}).items():
|
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
'id': thumbnail_id,
|
'id': thumbnail_id,
|
||||||
'url': thumbnail.get('url'),
|
'url': thumbnail.get('url'),
|
||||||
|
@ -289,7 +304,7 @@ class VikiIE(VikiBaseIE):
|
||||||
}]
|
}]
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
# fall-back to the old way if there isn't a streamSubtitles attribute
|
# fall-back to the old way if there isn't a streamSubtitles attribute
|
||||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
||||||
subtitles[subtitle_lang] = [{
|
subtitles[subtitle_lang] = [{
|
||||||
'ext': subtitles_format,
|
'ext': subtitles_format,
|
||||||
'url': self._prepare_call(
|
'url': self._prepare_call(
|
||||||
|
@ -300,13 +315,15 @@ class VikiIE(VikiBaseIE):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': int_or_none(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': parse_iso8601(video.get('created_at')),
|
||||||
'uploader': uploader,
|
'uploader': video.get('author'),
|
||||||
|
'uploader_url': video.get('author_url'),
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'age_limit': age_limit,
|
'age_limit': parse_age_limit(video.get('rating')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -400,7 +417,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '50c',
|
'id': '50c',
|
||||||
'title': 'Boys Over Flowers',
|
'title': 'Boys Over Flowers',
|
||||||
'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
|
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 71,
|
'playlist_mincount': 71,
|
||||||
}, {
|
}, {
|
||||||
|
@ -411,6 +428,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||||
'description': 'md5:05bf5471385aa8b21c18ad450e350525',
|
'description': 'md5:05bf5471385aa8b21c18ad450e350525',
|
||||||
},
|
},
|
||||||
'playlist_count': 127,
|
'playlist_count': 127,
|
||||||
|
'skip': 'Page not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/news/24569c-showbiz-korea',
|
'url': 'http://www.viki.com/news/24569c-showbiz-korea',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -221,10 +221,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_original_format(self, url, video_id):
|
def _extract_original_format(self, url, video_id, unlisted_hash=None):
|
||||||
|
query = {'action': 'load_download_config'}
|
||||||
|
if unlisted_hash:
|
||||||
|
query['unlisted_hash'] = unlisted_hash
|
||||||
download_data = self._download_json(
|
download_data = self._download_json(
|
||||||
url, video_id, fatal=False,
|
url, video_id, fatal=False, query=query,
|
||||||
query={'action': 'load_download_config'},
|
|
||||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||||
if download_data:
|
if download_data:
|
||||||
source_file = download_data.get('source_file')
|
source_file = download_data.get('source_file')
|
||||||
|
@ -504,6 +506,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||||
|
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
|
@ -668,7 +675,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
if config.get('view') == 4:
|
if config.get('view') == 4:
|
||||||
config = self._verify_player_video_password(redirect_url, video_id, headers)
|
config = self._verify_player_video_password(redirect_url, video_id, headers)
|
||||||
|
|
||||||
vod = config.get('video', {}).get('vod', {})
|
video = config.get('video') or {}
|
||||||
|
vod = video.get('vod') or {}
|
||||||
|
|
||||||
def is_rented():
|
def is_rented():
|
||||||
if '>You rented this title.<' in webpage:
|
if '>You rented this title.<' in webpage:
|
||||||
|
@ -728,7 +736,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
source_format = self._extract_original_format(
|
source_format = self._extract_original_format(
|
||||||
'https://vimeo.com/' + video_id, video_id)
|
'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
|
||||||
if source_format:
|
if source_format:
|
||||||
formats.append(source_format)
|
formats.append(source_format)
|
||||||
|
|
||||||
|
|
|
@ -1,40 +1,55 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
month_by_abbreviation,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
unified_strdate,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XboxClipsIE(InfoExtractor):
|
class XboxClipsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
|
_VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Iabdulelah playing Titanfall',
|
'title': 'iAbdulElah playing Titanfall',
|
||||||
'filesize_approx': 26800000,
|
'filesize_approx': 26800000,
|
||||||
'upload_date': '20140807',
|
'upload_date': '20140807',
|
||||||
'duration': 56,
|
'duration': 56,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
if '/video.php' in url:
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
webpage = self._download_webpage(url, video_id)
|
||||||
r'>(?:Link|Download): <a[^>]+href="([^"]+)"', webpage, 'video URL')
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = None
|
||||||
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
mobj = re.search(
|
||||||
|
r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
|
||||||
filesize = parse_filesize(self._html_search_regex(
|
filesize = parse_filesize(self._html_search_regex(
|
||||||
r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
|
r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
|
||||||
duration = int_or_none(self._html_search_regex(
|
duration = int_or_none(self._html_search_regex(
|
||||||
|
@ -42,12 +57,12 @@ class XboxClipsIE(InfoExtractor):
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
return {
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'filesize_approx': filesize,
|
'filesize_approx': filesize,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
})
|
||||||
|
return info
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
|
@ -209,17 +210,27 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
|
||||||
missing_track_ids = [
|
missing_track_ids = [
|
||||||
track_id for track_id in track_ids
|
track_id for track_id in track_ids
|
||||||
if track_id not in present_track_ids]
|
if track_id not in present_track_ids]
|
||||||
missing_tracks = self._call_api(
|
# Request missing tracks in chunks to avoid exceeding max HTTP header size,
|
||||||
'track-entries', tld, url, item_id,
|
# see https://github.com/ytdl-org/youtube-dl/issues/27355
|
||||||
'Downloading missing tracks JSON', {
|
_TRACKS_PER_CHUNK = 250
|
||||||
'entries': ','.join(missing_track_ids),
|
for chunk_num in itertools.count(0):
|
||||||
'lang': tld,
|
start = chunk_num * _TRACKS_PER_CHUNK
|
||||||
'external-domain': 'music.yandex.%s' % tld,
|
end = start + _TRACKS_PER_CHUNK
|
||||||
'overembed': 'false',
|
missing_track_ids_req = missing_track_ids[start:end]
|
||||||
'strict': 'true',
|
assert missing_track_ids_req
|
||||||
})
|
missing_tracks = self._call_api(
|
||||||
if missing_tracks:
|
'track-entries', tld, url, item_id,
|
||||||
tracks.extend(missing_tracks)
|
'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
|
||||||
|
'entries': ','.join(missing_track_ids_req),
|
||||||
|
'lang': tld,
|
||||||
|
'external-domain': 'music.yandex.%s' % tld,
|
||||||
|
'overembed': 'false',
|
||||||
|
'strict': 'true',
|
||||||
|
})
|
||||||
|
if missing_tracks:
|
||||||
|
tracks.extend(missing_tracks)
|
||||||
|
if end >= len(missing_track_ids):
|
||||||
|
break
|
||||||
|
|
||||||
return tracks
|
return tracks
|
||||||
|
|
||||||
|
|
|
@ -324,7 +324,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||||
'view count', default=None))
|
'view count', default=None))
|
||||||
uploader = try_get(
|
uploader = try_get(
|
||||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
renderer,
|
||||||
|
(lambda x: x['ownerText']['runs'][0]['text'],
|
||||||
|
lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': YoutubeIE.ie_key(),
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
|
@ -340,64 +342,70 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com'
|
IE_DESC = 'YouTube.com'
|
||||||
|
_INVIDIOUS_SITES = (
|
||||||
|
# invidious-redirect websites
|
||||||
|
r'(?:www\.)?redirect\.invidious\.io',
|
||||||
|
r'(?:(?:www|dev)\.)?invidio\.us',
|
||||||
|
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
|
||||||
|
r'(?:www\.)?invidious\.pussthecat\.org',
|
||||||
|
r'(?:www\.)?invidious\.048596\.xyz',
|
||||||
|
r'(?:www\.)?invidious\.zee\.li',
|
||||||
|
r'(?:www\.)?vid\.puffyan\.us',
|
||||||
|
r'(?:(?:www|au)\.)?ytprivate\.com',
|
||||||
|
r'(?:www\.)?invidious\.namazso\.eu',
|
||||||
|
r'(?:www\.)?invidious\.ethibox\.fr',
|
||||||
|
r'(?:www\.)?inv\.skyn3t\.in',
|
||||||
|
r'(?:www\.)?invidious\.himiko\.cloud',
|
||||||
|
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
|
||||||
|
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
|
||||||
|
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
|
||||||
|
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
|
||||||
|
# youtube-dl invidious instances list
|
||||||
|
r'(?:(?:www|no)\.)?invidiou\.sh',
|
||||||
|
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
|
||||||
|
r'(?:www\.)?invidious\.kabi\.tk',
|
||||||
|
r'(?:www\.)?invidious\.13ad\.de',
|
||||||
|
r'(?:www\.)?invidious\.mastodon\.host',
|
||||||
|
r'(?:www\.)?invidious\.zapashcanon\.fr',
|
||||||
|
r'(?:www\.)?invidious\.kavin\.rocks',
|
||||||
|
r'(?:www\.)?invidious\.tube',
|
||||||
|
r'(?:www\.)?invidiou\.site',
|
||||||
|
r'(?:www\.)?invidious\.site',
|
||||||
|
r'(?:www\.)?invidious\.xyz',
|
||||||
|
r'(?:www\.)?invidious\.nixnet\.xyz',
|
||||||
|
r'(?:www\.)?invidious\.drycat\.fr',
|
||||||
|
r'(?:www\.)?tube\.poal\.co',
|
||||||
|
r'(?:www\.)?tube\.connect\.cafe',
|
||||||
|
r'(?:www\.)?vid\.wxzm\.sx',
|
||||||
|
r'(?:www\.)?vid\.mint\.lgbt',
|
||||||
|
r'(?:www\.)?yewtu\.be',
|
||||||
|
r'(?:www\.)?yt\.elukerio\.org',
|
||||||
|
r'(?:www\.)?yt\.lelux\.fi',
|
||||||
|
r'(?:www\.)?invidious\.ggc-project\.de',
|
||||||
|
r'(?:www\.)?yt\.maisputain\.ovh',
|
||||||
|
r'(?:www\.)?invidious\.toot\.koeln',
|
||||||
|
r'(?:www\.)?invidious\.fdn\.fr',
|
||||||
|
r'(?:www\.)?watch\.nettohikari\.com',
|
||||||
|
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
|
||||||
|
r'(?:www\.)?qklhadlycap4cnod\.onion',
|
||||||
|
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
|
||||||
|
r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
|
||||||
|
r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
|
||||||
|
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
|
||||||
|
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
|
||||||
|
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
|
||||||
|
)
|
||||||
_VALID_URL = r"""(?x)^
|
_VALID_URL = r"""(?x)^
|
||||||
(
|
(
|
||||||
(?:https?://|//) # http(s):// or protocol-independent URL
|
(?:https?://|//) # http(s):// or protocol-independent URL
|
||||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
|
||||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
(?:www\.)?deturl\.com/www\.youtube\.com|
|
||||||
(?:www\.)?pwnyoutube\.com/|
|
(?:www\.)?pwnyoutube\.com|
|
||||||
(?:www\.)?hooktube\.com/|
|
(?:www\.)?hooktube\.com|
|
||||||
(?:www\.)?yourepeat\.com/|
|
(?:www\.)?yourepeat\.com|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net|
|
||||||
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
%(invidious)s|
|
||||||
(?:www\.)?invidious\.pussthecat\.org/|
|
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
|
||||||
(?:www\.)?invidious\.048596\.xyz/|
|
|
||||||
(?:www\.)?invidious\.zee\.li/|
|
|
||||||
(?:www\.)?vid\.puffyan\.us/|
|
|
||||||
(?:(?:www|au)\.)?ytprivate\.com/|
|
|
||||||
(?:www\.)?invidious\.namazso\.eu/|
|
|
||||||
(?:www\.)?invidious\.ethibox\.fr/|
|
|
||||||
(?:www\.)?inv\.skyn3t\.in/|
|
|
||||||
(?:www\.)?invidious\.himiko\.cloud/|
|
|
||||||
(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion/|
|
|
||||||
(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion/|
|
|
||||||
(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion/|
|
|
||||||
(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion/|
|
|
||||||
(?:(?:www|dev)\.)?invidio\.us/|
|
|
||||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
|
||||||
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
|
||||||
(?:www\.)?invidious\.kabi\.tk/|
|
|
||||||
(?:www\.)?invidious\.13ad\.de/|
|
|
||||||
(?:www\.)?invidious\.mastodon\.host/|
|
|
||||||
(?:www\.)?invidious\.zapashcanon\.fr/|
|
|
||||||
(?:www\.)?invidious\.kavin\.rocks/|
|
|
||||||
(?:www\.)?invidious\.tube/|
|
|
||||||
(?:www\.)?invidiou\.site/|
|
|
||||||
(?:www\.)?invidious\.site/|
|
|
||||||
(?:www\.)?invidious\.xyz/|
|
|
||||||
(?:www\.)?invidious\.nixnet\.xyz/|
|
|
||||||
(?:www\.)?invidious\.drycat\.fr/|
|
|
||||||
(?:www\.)?tube\.poal\.co/|
|
|
||||||
(?:www\.)?tube\.connect\.cafe/|
|
|
||||||
(?:www\.)?vid\.wxzm\.sx/|
|
|
||||||
(?:www\.)?vid\.mint\.lgbt/|
|
|
||||||
(?:www\.)?yewtu\.be/|
|
|
||||||
(?:www\.)?yt\.elukerio\.org/|
|
|
||||||
(?:www\.)?yt\.lelux\.fi/|
|
|
||||||
(?:www\.)?invidious\.ggc-project\.de/|
|
|
||||||
(?:www\.)?yt\.maisputain\.ovh/|
|
|
||||||
(?:www\.)?invidious\.toot\.koeln/|
|
|
||||||
(?:www\.)?invidious\.fdn\.fr/|
|
|
||||||
(?:www\.)?watch\.nettohikari\.com/|
|
|
||||||
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
|
||||||
(?:www\.)?qklhadlycap4cnod\.onion/|
|
|
||||||
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
|
||||||
(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
|
|
||||||
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
|
||||||
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
|
||||||
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
|
||||||
(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||||
|
@ -412,6 +420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
youtu\.be| # just youtu.be/xxxx
|
youtu\.be| # just youtu.be/xxxx
|
||||||
vid\.plus| # or vid.plus/xxxx
|
vid\.plus| # or vid.plus/xxxx
|
||||||
zwearz\.com/watch| # or zwearz.com/watch/xxxx
|
zwearz\.com/watch| # or zwearz.com/watch/xxxx
|
||||||
|
%(invidious)s
|
||||||
)/
|
)/
|
||||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||||
)
|
)
|
||||||
|
@ -424,7 +433,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
(?(1).+)? # if we found the ID, everything can follow
|
(?(1).+)? # if we found the ID, everything can follow
|
||||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
$""" % {
|
||||||
|
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
|
||||||
|
'invidious': '|'.join(_INVIDIOUS_SITES),
|
||||||
|
}
|
||||||
_PLAYER_INFO_RE = (
|
_PLAYER_INFO_RE = (
|
||||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||||
|
@ -1031,6 +1043,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
|
||||||
|
'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# DRM protected
|
# DRM protected
|
||||||
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
||||||
|
@ -1169,6 +1190,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# controversial video, only works with bpctr when authenticated with cookies
|
||||||
|
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -1426,7 +1452,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
base_url = self.http_scheme() + '//www.youtube.com/'
|
base_url = self.http_scheme() + '//www.youtube.com/'
|
||||||
webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1'
|
webpage_url = base_url + 'watch?v=' + video_id + '&has_verified=1&bpctr=9999999999'
|
||||||
webpage = self._download_webpage(webpage_url, video_id, fatal=False)
|
webpage = self._download_webpage(webpage_url, video_id, fatal=False)
|
||||||
|
|
||||||
player_response = None
|
player_response = None
|
||||||
|
|
69
youtube_dlc/extractor/zhihu.py
Normal file
69
youtube_dlc/extractor/zhihu.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import float_or_none, int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class ZhihuIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
|
||||||
|
'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1342930761977176064',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '写春联也太难了吧!',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'uploader': '桥半舫',
|
||||||
|
'timestamp': 1612959715,
|
||||||
|
'upload_date': '20210210',
|
||||||
|
'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
|
||||||
|
'duration': 146.333,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
zvideo = self._download_json(
|
||||||
|
'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
|
||||||
|
title = zvideo['title']
|
||||||
|
video = zvideo.get('video') or {}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, q in (video.get('playlist') or {}).items():
|
||||||
|
play_url = q.get('url') or q.get('play_url')
|
||||||
|
if not play_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'asr': int_or_none(q.get('sample_rate')),
|
||||||
|
'filesize': int_or_none(q.get('size')),
|
||||||
|
'format_id': format_id,
|
||||||
|
'fps': int_or_none(q.get('fps')),
|
||||||
|
'height': int_or_none(q.get('height')),
|
||||||
|
'tbr': float_or_none(q.get('bitrate')),
|
||||||
|
'url': play_url,
|
||||||
|
'width': int_or_none(q.get('width')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
author = zvideo.get('author') or {}
|
||||||
|
url_token = author.get('url_token')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
|
||||||
|
'uploader': author.get('name'),
|
||||||
|
'timestamp': int_or_none(zvideo.get('published_at')),
|
||||||
|
'uploader_id': author.get('id'),
|
||||||
|
'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
|
||||||
|
'duration': float_or_none(video.get('duration')),
|
||||||
|
'view_count': int_or_none(zvideo.get('play_count')),
|
||||||
|
'like_count': int_or_none(zvideo.get('liked_count')),
|
||||||
|
'comment_count': int_or_none(zvideo.get('comment_count')),
|
||||||
|
}
|
|
@ -127,10 +127,13 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||||
|
|
||||||
except PostProcessingError as err:
|
except PostProcessingError as err:
|
||||||
self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
|
self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
|
||||||
if not check_executable('AtomicParsley', ['-v']):
|
atomicparsley = next((
|
||||||
|
x for x in ['AtomicParsley', 'atomicparsley']
|
||||||
|
if check_executable(x, ['-v'])), None)
|
||||||
|
if atomicparsley is None:
|
||||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||||
|
|
||||||
cmd = [encodeFilename('AtomicParsley', True),
|
cmd = [encodeFilename(atomicparsley, True),
|
||||||
encodeFilename(filename, True),
|
encodeFilename(filename, True),
|
||||||
encodeArgument('--artwork'),
|
encodeArgument('--artwork'),
|
||||||
encodeFilename(thumbnail_filename, True),
|
encodeFilename(thumbnail_filename, True),
|
||||||
|
|
Loading…
Add table
Reference in a new issue