mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-18 23:03:05 +01:00
parent
717297545b
commit
41d1cca328
17 changed files with 249 additions and 105 deletions
9
.github/workflows/core.yml
vendored
9
.github/workflows/core.yml
vendored
|
@ -41,11 +41,18 @@ jobs:
|
||||||
- name: Install Jython
|
- name: Install Jython
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
run: |
|
run: |
|
||||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||||
- name: Install nose
|
- name: Install nose
|
||||||
|
if: ${{ matrix.python-impl != 'jython' }}
|
||||||
run: pip install nose
|
run: pip install nose
|
||||||
|
- name: Install nose (Jython)
|
||||||
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
|
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
run: |
|
||||||
|
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
|
||||||
|
pip install nose-1.3.7-py2-none-any.whl
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||||
env:
|
env:
|
||||||
|
|
9
.github/workflows/download.yml
vendored
9
.github/workflows/download.yml
vendored
|
@ -41,11 +41,18 @@ jobs:
|
||||||
- name: Install Jython
|
- name: Install Jython
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
run: |
|
run: |
|
||||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||||
- name: Install nose
|
- name: Install nose
|
||||||
|
if: ${{ matrix.python-impl != 'jython' }}
|
||||||
run: pip install nose
|
run: pip install nose
|
||||||
|
- name: Install nose (Jython)
|
||||||
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
|
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
run: |
|
||||||
|
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
|
||||||
|
pip install nose-1.3.7-py2-none-any.whl
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||||
env:
|
env:
|
||||||
|
|
|
@ -1056,11 +1056,20 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
process=True, force_generic_extractor=False):
|
process=True, force_generic_extractor=False):
|
||||||
'''
|
"""
|
||||||
Returns a list with a dictionary for each video we find.
|
Return a list with a dictionary for each video extracted.
|
||||||
If 'download', also downloads the videos.
|
|
||||||
extra_info is a dict containing the extra values to add to each result
|
Arguments:
|
||||||
'''
|
url -- URL to extract
|
||||||
|
|
||||||
|
Keyword arguments:
|
||||||
|
download -- whether to download videos during extraction
|
||||||
|
ie_key -- extractor key hint
|
||||||
|
extra_info -- dictionary containing the extra values to add to each result
|
||||||
|
process -- whether to resolve all unresolved references (URLs, playlist items),
|
||||||
|
must be True for download to work.
|
||||||
|
force_generic_extractor -- force using the generic extractor
|
||||||
|
"""
|
||||||
|
|
||||||
if not ie_key and force_generic_extractor:
|
if not ie_key and force_generic_extractor:
|
||||||
ie_key = 'Generic'
|
ie_key = 'Generic'
|
||||||
|
|
|
@ -133,6 +133,8 @@ class CDAIE(InfoExtractor):
|
||||||
'age_limit': 18 if need_confirm_age else 0,
|
'age_limit': 18 if need_confirm_age else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||||
def decrypt_file(a):
|
def decrypt_file(a):
|
||||||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||||
|
@ -197,7 +199,7 @@ class CDAIE(InfoExtractor):
|
||||||
handler = self._download_webpage
|
handler = self._download_webpage
|
||||||
|
|
||||||
webpage = handler(
|
webpage = handler(
|
||||||
self._BASE_URL + href, video_id,
|
urljoin(self._BASE_URL, href), video_id,
|
||||||
'Downloading %s version information' % resolution, fatal=False)
|
'Downloading %s version information' % resolution, fatal=False)
|
||||||
if not webpage:
|
if not webpage:
|
||||||
# Manually report warning because empty page is returned when
|
# Manually report warning because empty page is returned when
|
||||||
|
@ -209,6 +211,4 @@ class CDAIE(InfoExtractor):
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
|
|
||||||
return merge_dicts(info_dict, info)
|
return merge_dicts(info_dict, info)
|
||||||
|
|
|
@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||||
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||||
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# From https://gdcvault.com/play/1016624, empty speakerVideo
|
||||||
|
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201210-822101_1349794556671DDDD',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pre-launch - Preparing to Take the Plunge',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
|
||||||
|
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_mp4(self, metadata):
|
def _parse_mp4(self, metadata):
|
||||||
|
@ -85,25 +97,19 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
'format_id': audio.get('code'),
|
'format_id': audio.get('code'),
|
||||||
})
|
})
|
||||||
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
for video_key, format_id, preference in (
|
||||||
formats.append({
|
('slide', 'slides', -2), ('speaker', 'speaker', -1)):
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
video_path = xpath_text(metadata, './%sVideo' % video_key)
|
||||||
'play_path': remove_end(slide_video_path, '.flv'),
|
if not video_path:
|
||||||
'ext': 'flv',
|
continue
|
||||||
'format_note': 'slide deck video',
|
formats.append({
|
||||||
'quality': -2,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
'format_id': 'slides',
|
'play_path': remove_end(video_path, '.flv'),
|
||||||
'acodec': 'none',
|
'ext': 'flv',
|
||||||
})
|
'format_note': '%s video' % video_key,
|
||||||
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
'quality': preference,
|
||||||
formats.append({
|
'format_id': format_id,
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
})
|
||||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_note': 'speaker video',
|
|
||||||
'quality': -1,
|
|
||||||
'format_id': 'speaker',
|
|
||||||
})
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -151,7 +151,6 @@ from .bleacherreport import (
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
)
|
)
|
||||||
from .blinkx import BlinkxIE
|
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bongacams import BongaCamsIE
|
from .bongacams import BongaCamsIE
|
||||||
|
|
|
@ -402,6 +402,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# "<figure id=" pattern (#28792)
|
||||||
|
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -419,8 +423,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||||
r'id-video=([^@]+@[^"]+)',
|
r'id-video=([^@]+@[^"]+)',
|
||||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||||
r'<figure[^>]+id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self._make_url_result(video_id)
|
return self._make_url_result(video_id)
|
||||||
|
|
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class FunimationIE(InfoExtractor):
|
class FunimationIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_NETRC_MACHINE = 'funimation'
|
_NETRC_MACHINE = 'funimation'
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
|
@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# with lang code
|
||||||
|
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
|
|
|
@ -5,7 +5,10 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
|
remove_start,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
smuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -100,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
|
||||||
'format': 'mp4-408',
|
'format': 'mp4-408',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
|
||||||
|
'url': 'https://www.gdcvault.com/play/1025699',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_zagynv0a',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tech Toolbox',
|
||||||
|
'upload_date': '20190408',
|
||||||
|
'uploader_id': 'joe@blazestreaming.com',
|
||||||
|
'timestamp': 1554764629,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# HTML5 video
|
||||||
|
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self, webpage_url, display_id):
|
def _login(self, webpage_url, display_id):
|
||||||
|
@ -120,38 +143,78 @@ class GDCVaultIE(InfoExtractor):
|
||||||
request = sanitized_Request(login_url, urlencode_postdata(login_form))
|
request = sanitized_Request(login_url, urlencode_postdata(login_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
self._download_webpage(request, display_id, 'Logging in')
|
self._download_webpage(request, display_id, 'Logging in')
|
||||||
webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||||
self._download_webpage(logout_url, display_id, 'Logging out')
|
self._download_webpage(logout_url, display_id, 'Logging out')
|
||||||
|
|
||||||
return webpage
|
return start_page
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, name = re.match(self._VALID_URL, url).groups()
|
video_id, name = re.match(self._VALID_URL, url).groups()
|
||||||
display_id = name or video_id
|
display_id = name or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||||
|
start_page = self._download_webpage(webpage_url, display_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
direct_url = self._search_regex(
|
||||||
r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
|
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||||
webpage, 'title')
|
start_page, 'url', default=None)
|
||||||
|
if direct_url:
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
|
||||||
|
start_page, 'title')
|
||||||
|
video_url = 'http://www.gdcvault.com' + direct_url
|
||||||
|
# resolve the url so that we can detect the correct extension
|
||||||
|
video_url = self._request_webpage(
|
||||||
|
HEADRequest(video_url), video_id).geturl()
|
||||||
|
|
||||||
PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>'
|
return {
|
||||||
manifest_url = self._html_search_regex(
|
'id': video_id,
|
||||||
PLAYER_REGEX, webpage, 'manifest_url')
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
embed_url = KalturaIE._extract_url(start_page)
|
||||||
r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id',
|
if embed_url:
|
||||||
default='1670711')
|
embed_url = smuggle_url(embed_url, {'source_url': url})
|
||||||
|
ie_key = 'Kaltura'
|
||||||
|
else:
|
||||||
|
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
|
||||||
|
|
||||||
kaltura_id = self._search_regex(
|
xml_root = self._html_search_regex(
|
||||||
r'entry_id=(?P<id>(?:[^&])+)', manifest_url,
|
PLAYER_REGEX, start_page, 'xml root', default=None)
|
||||||
'kaltura id', group='id')
|
if xml_root is None:
|
||||||
|
# Probably need to authenticate
|
||||||
|
login_res = self._login(webpage_url, display_id)
|
||||||
|
if login_res is None:
|
||||||
|
self.report_warning('Could not login.')
|
||||||
|
else:
|
||||||
|
start_page = login_res
|
||||||
|
# Grab the url from the authenticated page
|
||||||
|
xml_root = self._html_search_regex(
|
||||||
|
PLAYER_REGEX, start_page, 'xml root')
|
||||||
|
|
||||||
|
xml_name = self._html_search_regex(
|
||||||
|
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
|
||||||
|
start_page, 'xml filename', default=None)
|
||||||
|
if not xml_name:
|
||||||
|
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
|
||||||
|
info.update({
|
||||||
|
'title': remove_start(self._search_regex(
|
||||||
|
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
|
||||||
|
'title', default=None) or self._og_search_title(
|
||||||
|
start_page, default=None), 'GDC Vault - '),
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
embed_url = '%s/xml/%s' % (xml_root, xml_name)
|
||||||
|
ie_key = 'DigitallySpeaking'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
|
||||||
'ie_key': KalturaIE.ie_key(),
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'url': embed_url,
|
||||||
|
'ie_key': ie_key,
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||||
finditer = (
|
finditer = (
|
||||||
re.finditer(
|
list(re.finditer(
|
||||||
r"""(?xs)
|
r"""(?xs)
|
||||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||||
\{.*?
|
\{.*?
|
||||||
|
@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
|
||||||
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||||
""", webpage)
|
""", webpage))
|
||||||
or re.finditer(
|
or list(re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?P<q1>["'])
|
(?P<q1>["'])
|
||||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||||
|
@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
|
||||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||||
)
|
)
|
||||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||||
''', webpage)
|
''', webpage))
|
||||||
or re.finditer(
|
or list(re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
|
||||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||||
(?:(?!(?P=q1)).)*
|
(?:(?!(?P=q1)).)*
|
||||||
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||||
(?:(?!(?P=q1)).)*
|
(?:(?!(?P=q1)).)*
|
||||||
(?P=q1)
|
(?P=q1)
|
||||||
''', webpage)
|
''', webpage))
|
||||||
)
|
)
|
||||||
urls = []
|
urls = []
|
||||||
for mobj in finditer:
|
for mobj in finditer:
|
||||||
|
|
|
@ -15,33 +15,39 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class MedalTVIE(InfoExtractor):
|
class MedalTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34934644',
|
'id': '2mA60jWAGQCBH',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Quad Cold',
|
'title': 'Quad Cold',
|
||||||
'description': 'Medal,https://medal.tv/desktop/',
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
'uploader': 'MowgliSB',
|
'uploader': 'MowgliSB',
|
||||||
'timestamp': 1603165266,
|
'timestamp': 1603165266,
|
||||||
'upload_date': '20201020',
|
'upload_date': '20201020',
|
||||||
'uploader_id': 10619174,
|
'uploader_id': '10619174',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://medal.tv/clips/36787208',
|
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '36787208',
|
'id': '2um24TWdty0NA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'u tk me i tk u bigger',
|
'title': 'u tk me i tk u bigger',
|
||||||
'description': 'Medal,https://medal.tv/desktop/',
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
'uploader': 'Mimicc',
|
'uploader': 'Mimicc',
|
||||||
'timestamp': 1605580939,
|
'timestamp': 1605580939,
|
||||||
'upload_date': '20201117',
|
'upload_date': '20201117',
|
||||||
'uploader_id': 5156321,
|
'uploader_id': '5156321',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -146,7 +146,7 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
)
|
)
|
||||||
(?P<svt_id>[^/?#&]+)|
|
(?P<svt_id>[^/?#&]+)|
|
||||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||||
(?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))?
|
(?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -177,6 +177,9 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
|
'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# geo restricted to Sweden
|
# geo restricted to Sweden
|
||||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||||
|
@ -259,7 +262,7 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||||
if not svt_id:
|
if not svt_id:
|
||||||
svt_id = self._search_regex(
|
svt_id = self._search_regex(
|
||||||
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||||
r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id),
|
r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
|
||||||
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||||
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
|
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
|
||||||
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
||||||
|
|
|
@ -74,6 +74,12 @@ class TV2DKIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
def add_entry(partner_id, kaltura_id):
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
|
||||||
|
video_id=kaltura_id))
|
||||||
|
|
||||||
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
||||||
video = extract_attributes(video_el)
|
video = extract_attributes(video_el)
|
||||||
kaltura_id = video.get('data-entryid')
|
kaltura_id = video.get('data-entryid')
|
||||||
|
@ -82,9 +88,14 @@ class TV2DKIE(InfoExtractor):
|
||||||
partner_id = video.get('data-partnerid')
|
partner_id = video.get('data-partnerid')
|
||||||
if not partner_id:
|
if not partner_id:
|
||||||
continue
|
continue
|
||||||
entries.append(self.url_result(
|
add_entry(partner_id, kaltura_id)
|
||||||
'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
|
if not entries:
|
||||||
video_id=kaltura_id))
|
kaltura_id = self._search_regex(
|
||||||
|
r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||||
|
'partner id')
|
||||||
|
add_entry(partner_id, kaltura_id)
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
strip_or_none,
|
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -45,32 +44,18 @@ class TVerIE(InfoExtractor):
|
||||||
query={'token': self._TOKEN})['main']
|
query={'token': self._TOKEN})['main']
|
||||||
p_id = main['publisher_id']
|
p_id = main['publisher_id']
|
||||||
service = remove_start(main['service'], 'ts_')
|
service = remove_start(main['service'], 'ts_')
|
||||||
info = {
|
|
||||||
|
r_id = main['reference_id']
|
||||||
|
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
|
||||||
|
r_id = 'ref:' + r_id
|
||||||
|
bc_url = smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
|
||||||
|
{'geo_countries': ['JP']})
|
||||||
|
|
||||||
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
|
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
|
||||||
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
|
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
|
||||||
|
'url': bc_url,
|
||||||
|
'ie_key': 'BrightcoveNew',
|
||||||
}
|
}
|
||||||
|
|
||||||
if service == 'cx':
|
|
||||||
title = main['title']
|
|
||||||
subtitle = strip_or_none(main.get('subtitle'))
|
|
||||||
if subtitle:
|
|
||||||
title += ' - ' + subtitle
|
|
||||||
info.update({
|
|
||||||
'title': title,
|
|
||||||
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
|
||||||
'ie_key': 'FujiTVFODPlus7',
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
r_id = main['reference_id']
|
|
||||||
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
|
|
||||||
r_id = 'ref:' + r_id
|
|
||||||
bc_url = smuggle_url(
|
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
|
|
||||||
{'geo_countries': ['JP']})
|
|
||||||
info.update({
|
|
||||||
'url': bc_url,
|
|
||||||
'ie_key': 'BrightcoveNew',
|
|
||||||
})
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ from ..utils import (
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -52,6 +53,9 @@ class TwitterBaseIE(InfoExtractor):
|
||||||
return [f], {}
|
return [f], {}
|
||||||
|
|
||||||
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
|
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
|
||||||
|
vmap_url = url_or_none(vmap_url)
|
||||||
|
if not vmap_url:
|
||||||
|
return []
|
||||||
vmap_data = self._download_xml(vmap_url, video_id)
|
vmap_data = self._download_xml(vmap_url, video_id)
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|
|
@ -58,6 +58,7 @@ class XFileShareIE(InfoExtractor):
|
||||||
(r'vidlocker\.xyz', 'VidLocker'),
|
(r'vidlocker\.xyz', 'VidLocker'),
|
||||||
(r'vidshare\.tv', 'VidShare'),
|
(r'vidshare\.tv', 'VidShare'),
|
||||||
(r'vup\.to', 'VUp'),
|
(r'vup\.to', 'VUp'),
|
||||||
|
(r'wolfstream\.tv', 'WolfStream'),
|
||||||
(r'xvideosharing\.com', 'XVideoSharing'),
|
(r'xvideosharing\.com', 'XVideoSharing'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -82,6 +83,9 @@ class XFileShareIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://wolfstream.tv/nthme29v9u2x',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -71,10 +72,10 @@ class XTubeIE(InfoExtractor):
|
||||||
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
||||||
})
|
})
|
||||||
|
|
||||||
title, thumbnail, duration = [None] * 3
|
title, thumbnail, duration, sources, media_definition = [None] * 5
|
||||||
|
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
|
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
|
||||||
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||||
if config:
|
if config:
|
||||||
config = config.get('mainRoll')
|
config = config.get('mainRoll')
|
||||||
|
@ -83,20 +84,52 @@ class XTubeIE(InfoExtractor):
|
||||||
thumbnail = config.get('poster')
|
thumbnail = config.get('poster')
|
||||||
duration = int_or_none(config.get('duration'))
|
duration = int_or_none(config.get('duration'))
|
||||||
sources = config.get('sources') or config.get('format')
|
sources = config.get('sources') or config.get('format')
|
||||||
|
media_definition = config.get('mediaDefinition')
|
||||||
|
|
||||||
if not isinstance(sources, dict):
|
if not isinstance(sources, dict) and not media_definition:
|
||||||
sources = self._parse_json(self._search_regex(
|
sources = self._parse_json(self._search_regex(
|
||||||
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
||||||
webpage, 'sources', group='sources'), video_id,
|
webpage, 'sources', group='sources'), video_id,
|
||||||
transform_source=js_to_json)
|
transform_source=js_to_json)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in sources.items():
|
format_urls = set()
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
if isinstance(sources, dict):
|
||||||
'format_id': format_id,
|
for format_id, format_url in sources.items():
|
||||||
'height': int_or_none(format_id),
|
format_url = url_or_none(format_url)
|
||||||
})
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if format_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(format_url)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
if isinstance(media_definition, list):
|
||||||
|
for media in media_definition:
|
||||||
|
video_url = url_or_none(media.get('videoUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
if video_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(video_url)
|
||||||
|
format_id = media.get('format')
|
||||||
|
if format_id == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif format_id == 'mp4':
|
||||||
|
height = int_or_none(media.get('quality'))
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': '%s-%d' % (format_id, height) if height else format_id,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue