1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-01-18 14:53:04 +01:00

[cleanup] Use _html_extract_title

This commit is contained in:
pukkandan 2022-04-04 13:57:35 +05:30
parent 85e801a9db
commit 04f3fd2c89
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
38 changed files with 51 additions and 80 deletions

View file

@ -534,13 +534,13 @@ Extracting variables is acceptable for reducing code duplication and improving r
Correct: Correct:
```python ```python
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') title = self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title')
``` ```
Incorrect: Incorrect:
```python ```python
TITLE_RE = r'<title>([^<]+)</title>' TITLE_RE = r'<h1>([^<]+)</h1>'
# ...some lines of code... # ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title') title = self._html_search_regex(TITLE_RE, webpage, 'title')
``` ```

View file

@ -14,7 +14,7 @@ class AdobeConnectIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true' is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = [] formats = []

View file

@ -7,6 +7,7 @@ from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
remove_end, remove_end,
strip_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_basename, url_basename,
@ -102,10 +103,7 @@ class AllocineIE(InfoExtractor):
video_id = display_id video_id = display_id
media_data = self._download_json( media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
title = remove_end( title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
' - AlloCiné')
for key, value in media_data['video'].items(): for key, value in media_data['video'].items():
if not key.endswith('Path'): if not key.endswith('Path'):
continue continue

View file

@ -483,8 +483,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
regex), webpage, name, default='{}'), video_id, fatal=False) regex), webpage, name, default='{}'), video_id, fatal=False)
def _extract_webpage_title(self, webpage): def _extract_webpage_title(self, webpage):
page_title = self._html_search_regex( page_title = self._html_extract_title(webpage, default='')
r'<title>([^<]*)</title>', webpage, 'title', default='')
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix. # YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
return self._html_search_regex( return self._html_search_regex(
r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)', r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',

View file

@ -181,8 +181,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
'title', default=None) or self._og_search_title( 'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title', 'twitter:title', webpage, 'title',
default=None) or self._search_regex( default=None) or self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
if title: if title:
title = re.sub(r'\s*\|\s*.+?$', '', title) title = re.sub(r'\s*\|\s*.+?$', '', title)

View file

@ -906,9 +906,8 @@ class BBCIE(BBCCoUkIE):
playlist_title = json_ld_info.get('title') playlist_title = json_ld_info.get('title')
if not playlist_title: if not playlist_title:
playlist_title = self._og_search_title( playlist_title = (self._og_search_title(webpage, default=None)
webpage, default=None) or self._html_search_regex( or self._html_extract_title(webpage, 'playlist title', default=None))
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
if playlist_title: if playlist_title:
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()

View file

@ -29,9 +29,8 @@ class BreitBartIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title( 'title': (self._og_search_title(webpage, default=None)
webpage, default=None) or self._html_search_regex( or self._html_extract_title(webpage, 'video title')),
r'(?s)<title>(.*?)</title>', webpage, 'video title'),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._rta_search(webpage), 'age_limit': self._rta_search(webpage),

View file

@ -54,7 +54,7 @@ class CallinIE(InfoExtractor):
id = episode['id'] id = episode['id']
title = (episode.get('title') title = (episode.get('title')
or self._og_search_title(webpage, fatal=False) or self._og_search_title(webpage, fatal=False)
or self._html_search_regex('<title>(.*?)</title>', webpage, 'title')) or self._html_extract_title(webpage))
url = episode['m3u8'] url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts') formats = self._extract_m3u8_formats(url, display_id, ext='ts')
self._sort_formats(formats) self._sort_formats(formats)

View file

@ -127,9 +127,9 @@ class CBCIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage, default=None) or self._html_search_meta( title = (self._og_search_title(webpage, default=None)
'twitter:title', webpage, 'title', default=None) or self._html_search_regex( or self._html_search_meta('twitter:title', webpage, 'title', default=None)
r'<title>([^<]+)</title>', webpage, 'title', fatal=False) or self._html_extract_title(webpage))
entries = [ entries = [
self._extract_player_init(player_init, display_id) self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]

View file

@ -54,8 +54,7 @@ class CloserToTruthIE(InfoExtractor):
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)', r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
webpage, 'kaltura partner_id') webpage, 'kaltura partner_id')
title = self._search_regex( title = self._html_extract_title(webpage, 'video title')
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
select = self._search_regex( select = self._search_regex(
r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>', r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',

View file

@ -1329,9 +1329,8 @@ class InfoExtractor(object):
def _og_search_description(self, html, **kargs): def _og_search_description(self, html, **kargs):
return self._og_search_property('description', html, fatal=False, **kargs) return self._og_search_property('description', html, fatal=False, **kargs)
def _og_search_title(self, html, **kargs): def _og_search_title(self, html, *, fatal=False, **kargs):
kargs.setdefault('fatal', False) return self._og_search_property('title', html, fatal=fatal, **kargs)
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs): def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
regexes = self._og_regexes('video') + self._og_regexes('video:url') regexes = self._og_regexes('video') + self._og_regexes('video:url')
@ -1342,9 +1341,8 @@ class InfoExtractor(object):
def _og_search_url(self, html, **kargs): def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs) return self._og_search_property('url', html, **kargs)
def _html_extract_title(self, html, name, **kwargs): def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
return self._html_search_regex( return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
r'(?s)<title>(.*?)</title>', html, name, **kwargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
name = variadic(name) name = variadic(name)

View file

@ -278,7 +278,7 @@ class CSpanCongressIE(InfoExtractor):
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
title = (self._og_search_title(webpage, default=None) title = (self._og_search_title(webpage, default=None)
or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title')) or self._html_extract_title(webpage, 'video title'))
description = (self._og_search_description(webpage, default=None) description = (self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, 'description', default=None)) or self._html_search_meta('description', webpage, 'description', default=None))

View file

@ -75,8 +75,7 @@ class FiveTVIE(InfoExtractor):
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'], r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url') webpage, 'video url')
title = self._og_search_title(webpage, default=None) or self._search_regex( title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title')
duration = int_or_none(self._og_search_property( duration = int_or_none(self._og_search_property(
'video:duration', webpage, 'duration', default=None)) 'video:duration', webpage, 'duration', default=None))

View file

@ -29,8 +29,7 @@ class FoxgayIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._html_search_regex( title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com')
r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
description = get_element_by_id('inf_tit', webpage) description = get_element_by_id('inf_tit', webpage)
# The default user-agent with foxgay cookies leads to pages without videos # The default user-agent with foxgay cookies leads to pages without videos

View file

@ -2873,10 +2873,8 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title # Site Name | Video Title
# Video Title - Tagline | Site Name # Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical # and so on and so forth; it's just not practical
video_title = self._og_search_title( video_title = (self._og_search_title(webpage, default=None)
webpage, default=None) or self._html_search_regex( or self._html_extract_title(webpage, 'video title', default='video'))
r'(?s)<title>(.*?)</title>', webpage, 'video title',
default='video')
# Try to detect age limit automatically # Try to detect age limit automatically
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)

View file

@ -23,9 +23,7 @@ class GlideIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage)
r'<title>(.+?)</title>', webpage,
'title', default=None) or self._og_search_title(webpage)
video_url = self._proto_relative_url(self._search_regex( video_url = self._proto_relative_url(self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?)\1', r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'video URL', default=None, webpage, 'video URL', default=None,

View file

@ -38,8 +38,7 @@ class HellPornoIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
title = remove_end(self._html_search_regex( title = remove_end(self._html_extract_title(webpage), ' - Hell Porno')
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
info = self._parse_html5_media_entries(url, webpage, display_id)[0] info = self._parse_html5_media_entries(url, webpage, display_id)[0]
self._sort_formats(info['formats']) self._sort_formats(info['formats'])

View file

@ -66,8 +66,7 @@ class HuyaLiveIE(InfoExtractor):
room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo']) room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
if not room_info: if not room_info:
raise ExtractorError('Can not extract the room info', expected=True) raise ExtractorError('Can not extract the room info', expected=True)
title = room_info.get('roomName') or room_info.get('introduction') or self._html_search_regex( title = room_info.get('roomName') or room_info.get('introduction') or self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title')
screen_type = room_info.get('screenType') screen_type = room_info.get('screenType')
live_source_type = room_info.get('liveSourceType') live_source_type = room_info.get('liveSourceType')
stream_info_list = stream_data['data'][0]['gameStreamInfoList'] stream_info_list = stream_data['data'][0]['gameStreamInfoList']

View file

@ -68,7 +68,7 @@ class ImdbIE(InfoExtractor):
video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={}) video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={})
title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text')) title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text'))
or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None)
or self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')) or self._html_extract_title(webpage))
data = video_info.get('playbackURLs') or try_get(self._download_json( data = video_info.get('playbackURLs') or try_get(self._download_json(
'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id, 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
query={ query={

View file

@ -115,7 +115,7 @@ class InfoQIE(BokeCCBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') video_title = self._html_extract_title(webpage)
video_description = self._html_search_meta('description', webpage, 'description') video_description = self._html_search_meta('description', webpage, 'description')
if '/cn/' in url: if '/cn/' in url:

View file

@ -76,8 +76,7 @@ class IwaraIE(InfoExtractor):
'age_limit': age_limit, 'age_limit': age_limit,
} }
title = remove_end(self._html_search_regex( title = remove_end(self._html_extract_title(webpage), ' | Iwara')
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None) r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)

View file

@ -102,7 +102,7 @@ class LinkedInIE(LinkedInBaseIE):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') title = self._html_extract_title(webpage)
description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))

View file

@ -24,8 +24,7 @@ class MiaoPaiIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)', r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
webpage, 'thumbnail', fatal=False, group='url') webpage, 'thumbnail', fatal=False, group='url')

View file

@ -38,8 +38,7 @@ class MojvideoIE(InfoExtractor):
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False) r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
title = self._html_search_regex( title = self._html_extract_title(playerapi)
r'<title>([^<]+)</title>', playerapi, 'title')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<file>([^<]+)</file>', playerapi, 'video URL') r'<file>([^<]+)</file>', playerapi, 'video URL')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(

View file

@ -106,8 +106,7 @@ class NewgroundsIE(InfoExtractor):
uploader = None uploader = None
webpage = self._download_webpage(url, media_id) webpage = self._download_webpage(url, media_id)
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>(.+?)</title>', webpage, 'title')
media_url_string = self._search_regex( media_url_string = self._search_regex(
r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None)
@ -219,8 +218,7 @@ class NewgroundsPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
title = self._search_regex( title = self._html_extract_title(webpage, default=None)
r'<title>([^>]+)</title>', webpage, 'title', default=None)
# cut left menu # cut left menu
webpage = self._search_regex( webpage = self._search_regex(

View file

@ -309,7 +309,9 @@ class NhkForSchoolProgramListIE(InfoExtractor):
webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
title = self._og_search_title(webpage, fatal=False) or self._html_extract_title(webpage, fatal=False) or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False) title = (self._og_search_title(webpage)
or self._html_extract_title(webpage)
or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False))
title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>', r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',

View file

@ -85,8 +85,7 @@ class PlayvidIE(InfoExtractor):
# Extract title - should be in the flashvars; if not, look elsewhere # Extract title - should be in the flashvars; if not, look elsewhere
if video_title is None: if video_title is None:
video_title = self._html_search_regex( video_title = self._html_extract_title(webpage)
r'<title>(.*?)</title', webpage, 'title')
return { return {
'id': video_id, 'id': video_id,

View file

@ -49,7 +49,7 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality, 'quality': quality,
}) })
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') title = self._html_extract_title(webpage)
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None) thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None) duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)

View file

@ -112,7 +112,7 @@ class SenateISVPIE(InfoExtractor):
if smuggled_data.get('force_title'): if smuggled_data.get('force_title'):
title = smuggled_data['force_title'] title = smuggled_data['force_title']
else: else:
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id) title = self._html_extract_title(webpage)
poster = qs.get('poster') poster = qs.get('poster')
thumbnail = poster[0] if poster else None thumbnail = poster[0] if poster else None

View file

@ -36,8 +36,7 @@ class SunPornoIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.sunporno.com/videos/%s' % video_id, video_id) 'http://www.sunporno.com/videos/%s' % video_id, video_id)
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title')
description = self._html_search_meta( description = self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(

View file

@ -37,9 +37,7 @@ class ThisAVIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._html_search_regex( title = remove_end(self._html_extract_title(webpage), ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
r'<title>([^<]+)</title>', webpage, 'title'),
' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
if video_url: if video_url:

View file

@ -24,8 +24,7 @@ class TrailerAddictIE(InfoExtractor):
name = mobj.group('movie') + '/' + mobj.group('trailer_name') name = mobj.group('movie') + '/' + mobj.group('trailer_name')
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
title = self._search_regex(r'<title>(.+?)</title>', title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
webpage, 'video title').replace(' - Trailer Addict', '')
view_count_str = self._search_regex( view_count_str = self._search_regex(
r'<span class="views_n">([0-9,.]+)</span>', r'<span class="views_n">([0-9,.]+)</span>',
webpage, 'view count', fatal=False) webpage, 'view count', fatal=False)

View file

@ -42,8 +42,7 @@ class Varzesh3IE(InfoExtractor):
video_url = self._search_regex( video_url = self._search_regex(
r'<source[^>]+src="([^"]+)"', webpage, 'video url') r'<source[^>]+src="([^"]+)"', webpage, 'video url')
title = remove_start(self._html_search_regex( title = remove_start(self._html_extract_title(webpage), 'ویدیو ورزش 3 | ')
r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div class="matn">(.+?)</div>', r'(?s)<div class="matn">(.+?)</div>',

View file

@ -50,8 +50,7 @@ class VShareIE(InfoExtractor):
'https://vshare.io/v/%s/width-650/height-430/1' % video_id, 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
video_id, headers={'Referer': url}) video_id, headers={'Referer': url})
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>([^<]+)</title>', webpage, 'title')
title = title.split(' - ')[0] title = title.split(' - ')[0]
error = self._html_search_regex( error = self._html_search_regex(

View file

@ -28,7 +28,7 @@ class VuploadIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') title = self._html_extract_title(webpage)
video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json) video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json)
formats = [] formats = []
for source in video_json: for source in video_json:

View file

@ -73,8 +73,7 @@ class WeiboIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, note='Revisiting webpage') url, video_id, note='Revisiting webpage')
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>(.+?)</title>', webpage, 'title')
video_formats = compat_parse_qs(self._search_regex( video_formats = compat_parse_qs(self._search_regex(
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources')) r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))

View file

@ -533,7 +533,7 @@ class YahooJapanNewsIE(InfoExtractor):
title = self._html_search_meta( title = self._html_search_meta(
['og:title', 'twitter:title'], webpage, 'title', default=None ['og:title', 'twitter:title'], webpage, 'title', default=None
) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title') ) or self._html_extract_title(webpage)
if display_id == host: if display_id == host:
# Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)

View file

@ -36,8 +36,7 @@ class YouJizzIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( title = self._html_extract_title(webpage)
r'<title>(.+?)</title>', webpage, 'title')
formats = [] formats = []