mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-31 12:32:27 +01:00
rewrite code to use json api
This commit is contained in:
parent
75fbe5dc83
commit
9174bc2a81
1 changed files with 119 additions and 123 deletions
|
@ -1,123 +1,53 @@
|
|||
import re
|
||||
import secrets
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class EggsBaseIE(InfoExtractor):
|
||||
def _parse_artist_name(self, webpage):
|
||||
artist = self._search_regex(
|
||||
r'<div[^>]+class=(["\'])artist_name\1[^>]*>([^<]+)</div>',
|
||||
webpage, 'artist name', fatal=False, default=None, group=2)
|
||||
if artist:
|
||||
return artist.strip()
|
||||
_API_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'apVersion': '8.2.00',
|
||||
'deviceName': 'Android',
|
||||
}
|
||||
|
||||
og_title = self._html_search_meta(['og:title'], webpage, 'og:title', default=None)
|
||||
if og_title:
|
||||
artist_match = re.search(r'(?P<artist>[^()]+)(?:\([^)]*\))?のEggsページ', og_title)
|
||||
if artist_match:
|
||||
return artist_match.group('artist').strip()
|
||||
@staticmethod
|
||||
def _generate_random_device_id():
|
||||
return secrets.token_hex(8)
|
||||
|
||||
return 'Unknown Artist'
|
||||
def _download_eggs_json(self, url, music_id):
|
||||
headers = self._API_HEADERS.copy()
|
||||
headers['deviceId'] = self._generate_random_device_id()
|
||||
return self._download_json(url, video_id=music_id, headers=headers)
|
||||
|
||||
def _parse_single_song(self, url, webpage, song_id, default_artist='Unknown Artist'):
|
||||
track_title = self._search_regex(
|
||||
r'<div[^>]+class=(["\'])product_name\1[^>]*>\s*<p>([^<]+)</p>',
|
||||
webpage, 'track title', fatal=False, default=None, group=2)
|
||||
def _extract_music_info(self, data, song_id):
|
||||
music_info = traverse_obj(data, {
|
||||
'id': ('musicId', {str_or_none}, {lambda x: x or song_id}),
|
||||
'title': ('musicTitle', {str}, {lambda x: x or 'Unknown Title'}),
|
||||
'url': ('musicDataPath', {url_or_none}),
|
||||
'uploader': ('artist', 'displayName', {str}, {lambda x: x or 'Unknown Artist'}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
'youtube_url': ('youtubeUrl', {url_or_none}),
|
||||
'youtube_id': ('youtubeVideoId', {str_or_none}),
|
||||
'source_type': ('sourceType', {int}),
|
||||
'vcodec': (None, {lambda x: 'none'}),
|
||||
}, get_all=False)
|
||||
|
||||
if not track_title:
|
||||
page_title = self._search_regex(
|
||||
r'<title>(?P<title>[^<]+)</title>',
|
||||
webpage, 'page title', fatal=False, default=None, group='title')
|
||||
if page_title:
|
||||
inner_match = re.search(r'「(?P<inner>[^」]+)」', page_title)
|
||||
if inner_match:
|
||||
track_title = inner_match.group('inner').strip()
|
||||
if not music_info.get('url') and not (music_info.get('source_type') == 2 and music_info.get('youtube_url')):
|
||||
raise ExtractorError('Audio URL not found (possibly an unsupported sourceType)', expected=True)
|
||||
|
||||
if not track_title:
|
||||
track_title = 'Unknown Title'
|
||||
|
||||
artist = default_artist
|
||||
if not artist or artist == 'Unknown Artist':
|
||||
artist_regex = r'<span[^>]+class=(["\'])artist_name\1[^>]*>\s*<a[^>]*>([^<]+)</a>'
|
||||
fallback_artist = self._search_regex(
|
||||
artist_regex, webpage, 'artist name',
|
||||
fatal=False, default=None, group=2)
|
||||
if fallback_artist:
|
||||
artist = fallback_artist.strip()
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'<div[^>]+class=(["\'])[^"\']*player[^"\']*\1[^>]+data-src=(["\'])(?P<audio_url>[^"\']+)\2',
|
||||
webpage, 'audio url', fatal=True, group='audio_url')
|
||||
audio_url = url_or_none(unescapeHTML(audio_url))
|
||||
if not audio_url:
|
||||
raise ExtractorError('Invalid audio URL.', expected=True)
|
||||
|
||||
thumbnail = (
|
||||
self._html_search_meta(['og:image'], webpage, 'thumbnail', default=None)
|
||||
or self._search_regex(
|
||||
r'<span[^>]*>\s*<img[^>]+src=(["\'])(?P<thumb>[^"\']+)\1',
|
||||
webpage, 'thumbnail', fatal=False, default=None, group='thumb')
|
||||
)
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': audio_url,
|
||||
'title': track_title,
|
||||
'uploader': artist,
|
||||
'vcodec': 'none',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
def _parse_artist_page(self, webpage, artist_id, artist_name):
|
||||
song_blocks = re.findall(r'(?s)<li[^>]+id="songs\d+"[^>]*>.*?</li>', webpage)
|
||||
entries = []
|
||||
|
||||
for block in song_blocks:
|
||||
audio_url = self._search_regex(
|
||||
r'data-src=(["\'])(?P<url>https?://.*?\.(?:mp3|m4a).*?)\1',
|
||||
block, 'audio url', fatal=False, default=None, group='url')
|
||||
audio_url = url_or_none(unescapeHTML(audio_url))
|
||||
if not audio_url:
|
||||
continue
|
||||
|
||||
track_id = self._search_regex(
|
||||
r'data-srcid=(["\'])(?P<id>[^"\'<>]+)\1',
|
||||
block, 'track id', fatal=False, default=None, group='id')
|
||||
if not track_id:
|
||||
continue
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-srcname=(["\'])(?P<title>[^"\']+)\1',
|
||||
block, 'track title', fatal=False, default=None, group='title')
|
||||
if not title:
|
||||
title = 'Unknown Title'
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+src=(["\'])(?P<th>[^"\']+)\1',
|
||||
block, 'thumbnail', fatal=False, default=None, group='th')
|
||||
|
||||
entries.append({
|
||||
'id': track_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'uploader': artist_name,
|
||||
'vcodec': 'none',
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
|
||||
return entries
|
||||
return music_info
|
||||
|
||||
|
||||
class EggsIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:single'
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?eggs\.mu/artist/[^/]+/song/(?P<song_id>[^/]+)'
|
||||
)
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/[^/]+/song/(?P<song_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'info_dict': {
|
||||
|
@ -125,39 +55,105 @@ class EggsIE(EggsBaseIE):
|
|||
'ext': 'm4a',
|
||||
'title': 'シネマと信号',
|
||||
'uploader': 'Sunny Girl',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
|
||||
'source_type': 1,
|
||||
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||
'info_dict': {
|
||||
'id': '80cLKA2wnoA',
|
||||
'ext': 'mp4',
|
||||
'title': 'KAMO「いい女だから」Audio',
|
||||
'uploader': 'KAMO',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||
'availability': 'public',
|
||||
'uploader_id': '@KAMO_band',
|
||||
'upload_date': '20240925',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||
'comment_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||
'view_count': int,
|
||||
'duration': 151,
|
||||
'like_count': int,
|
||||
'channel': 'KAMO',
|
||||
'playable_in_embed': True,
|
||||
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||
'tags': [],
|
||||
'timestamp': 1727271121,
|
||||
'age_limit': 0,
|
||||
'categories': ['People & Blogs'],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
song_id = mobj.group('song_id')
|
||||
webpage = self._download_webpage(url, song_id)
|
||||
artist_name = self._parse_artist_name(webpage)
|
||||
return self._parse_single_song(url, webpage, song_id, artist_name)
|
||||
song_id = self._match_valid_url(url).group('song_id')
|
||||
json_data = self._download_eggs_json(
|
||||
f'https://app-front-api.eggs.mu/v1/musics/{song_id}', music_id=song_id)
|
||||
music_info = self._extract_music_info(json_data, song_id)
|
||||
|
||||
if music_info['source_type'] == 2 and music_info['youtube_url']:
|
||||
return self.url_result(
|
||||
music_info['youtube_url'], ie='Youtube', video_id=music_info['youtube_id'])
|
||||
|
||||
return music_info
|
||||
|
||||
|
||||
class EggsArtistIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:artist'
|
||||
_VALID_URL = (
|
||||
r'https?://(?:www\.)?eggs\.mu/artist/(?P<artist_id>[^/]+)$'
|
||||
)
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||
'info_dict': {
|
||||
'id': '32_sunny_girl',
|
||||
'title': 'Sunny Girl',
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/(?P<artist_id>[^/]+)$'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||
'info_dict': {
|
||||
'id': '32_sunny_girl',
|
||||
'title': 'Sunny Girl',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}]
|
||||
{
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||
'info_dict': {
|
||||
'id': 'KAMO_3pband',
|
||||
'title': 'KAMO',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_valid_url(url).group('artist_id')
|
||||
webpage = self._download_webpage(url, artist_id)
|
||||
artist_name = self._parse_artist_name(webpage)
|
||||
entries = self._parse_artist_page(webpage, artist_id, artist_name)
|
||||
json_data = self._download_eggs_json(
|
||||
f'https://app-front-api.eggs.mu/v1/artists/{artist_id}/musics', music_id=artist_id)
|
||||
items = traverse_obj(json_data, 'data', default=[])
|
||||
entries = []
|
||||
display_name = None
|
||||
|
||||
for item in items:
|
||||
music_info = self._extract_music_info(item, '')
|
||||
if not music_info['id']:
|
||||
continue
|
||||
|
||||
if not display_name:
|
||||
display_name = music_info['uploader']
|
||||
|
||||
if music_info['source_type'] == 2 and music_info['youtube_url']:
|
||||
entries.append(
|
||||
self.url_result(
|
||||
music_info['youtube_url'], ie='Youtube', video_id=music_info['youtube_id']))
|
||||
continue
|
||||
|
||||
if not music_info.get('url'):
|
||||
continue
|
||||
|
||||
entries.append(music_info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries,
|
||||
playlist_id=artist_id,
|
||||
playlist_title=artist_name,
|
||||
)
|
||||
playlist_title=display_name or artist_id)
|
||||
|
|
Loading…
Add table
Reference in a new issue