mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-18 23:03:05 +01:00
[ie/digiteka] Fix testing and some formatting errors
This commit is contained in:
parent
685f96a052
commit
434f6ff6b9
1 changed files with 44 additions and 28 deletions
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DigitekaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
|
||||
|
@ -22,31 +23,52 @@ class DigitekaIE(InfoExtractor):
|
|||
)
|
||||
/id
|
||||
)/(?P<id>[\d+a-z]+)'''
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?P<embed_type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+))']
|
||||
_EMBED_REGEX = [
|
||||
r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?P<embed_type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+))',
|
||||
]
|
||||
_TESTS = [
|
||||
{'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf'}, # direct url
|
||||
{'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed
|
||||
{
|
||||
'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf',
|
||||
'info_dict': {
|
||||
'id': 'x8smpxf',
|
||||
'title': 'B. Bazin (Saint-Gobain) \'Notre cours de bourse a doublé depuis 2 ans et il a encore du potentiel !\'',
|
||||
'thumbnail': 'https://vod.digiteka.com/x8smpxf/thumbnails/e7c0403e5ff43ef78ee7baa8e27d3c26fb1deaa4-858x480.jpg',
|
||||
'url': 'https://assets.digiteka.com/encoded/04ddd4e10a9bb92f2a6e15d5adf40c9154db532a/mp4/d2da1c9e12f03d3f_480.mp4',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
},
|
||||
]
|
||||
_WEBPAGE_TESTS = [
|
||||
{
|
||||
'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e',
|
||||
'info_dict': {
|
||||
'id': 'xvussq5',
|
||||
'title': 'Le retour des taux négatifs est-il possible ? ',
|
||||
'thumbnail': 'https://vod.digiteka.com/xvussq5/thumbnails/9a4df121fc0532ab4d0befbece630fd7725d91a7-858x480.jpg',
|
||||
'url': 'https://assets.digiteka.com/encoded/0308c71b8ba91157ae76f0ca21c58f80e63ccf7a/mp4/0dde8b5bc0a8f240_480.mp4',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _fallback_to_iframe_content(self, url, video_id):
|
||||
iframe_content = self._download_webpage(url, video_id)
|
||||
VIDEO_URL_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
|
||||
VIDEO_TITLE_REGEX = '<meta property="og:title" content="(?P<title>.*)"/>'
|
||||
VIDEO_THUMBNAIL_REGEX = '<meta property="og:image" content="(?P<thumbnail>.*)"/>'
|
||||
|
||||
video_url = self._search_regex(VIDEO_URL_REGEX, iframe_content, 'url')
|
||||
video_url = self._og_search_video_url(iframe_content)
|
||||
video_format = video_url.split('.')[-1]
|
||||
video_title = self._search_regex(VIDEO_TITLE_REGEX, iframe_content, 'title')
|
||||
video_thumbnail = self._search_regex(VIDEO_THUMBNAIL_REGEX, iframe_content, 'thumbnail')
|
||||
video_title = self._og_search_title(iframe_content)
|
||||
video_thumbnail = self._og_search_thumbnail(iframe_content)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'ext': video_format,
|
||||
}]
|
||||
'formats': [
|
||||
{
|
||||
'url': video_url,
|
||||
'ext': video_format,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -58,9 +80,9 @@ class DigitekaIE(InfoExtractor):
|
|||
|
||||
deliver_info = self._download_json(
|
||||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||
video_id)
|
||||
video_id,
|
||||
)
|
||||
if not deliver_info:
|
||||
# Apparently some video's deliver_info are not accessible this way anymore
|
||||
return self._fallback_to_iframe_content(url, video_id)
|
||||
yt_id = deliver_info.get('yt_id')
|
||||
if yt_id:
|
||||
|
@ -68,25 +90,19 @@ class DigitekaIE(InfoExtractor):
|
|||
|
||||
jwconf = deliver_info['jwconf']
|
||||
|
||||
|
||||
formats = []
|
||||
|
||||
for source in jwconf['playlist'][0]['sources']:
|
||||
if source['file'] is not False:
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
})
|
||||
if len(formats) == 0:
|
||||
# the file urls are not available from the json directly anymore, but
|
||||
# can be found in the iframe content
|
||||
formats.append(
|
||||
{
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
},
|
||||
)
|
||||
if not formats:
|
||||
return self._fallback_to_iframe_content(url, video_id)
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': video_format,
|
||||
})
|
||||
|
||||
title = deliver_info['title']
|
||||
thumbnail = jwconf.get('image')
|
||||
duration = int_or_none(deliver_info.get('duration'))
|
||||
|
|
Loading…
Add table
Reference in a new issue