mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-18 23:03:05 +01:00
[ie/Bandcamp:user] Fix extraction (#10328)
Authored by: quad, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
409f8e9e3b
commit
5d0176547f
1 changed files with 17 additions and 5 deletions
|
@ -1,3 +1,5 @@
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -6,7 +8,9 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_html_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -17,6 +21,7 @@ from ..utils import (
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
|
@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://coldworldofficial.bandcamp.com/music',
|
'url': 'https://coldworldofficial.bandcamp.com/music',
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 7,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'coldworldofficial',
|
'id': 'coldworldofficial',
|
||||||
'title': 'Discography of coldworldofficial',
|
'title': 'Discography of coldworldofficial',
|
||||||
|
@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _yield_items(self, webpage):
|
||||||
|
yield from (
|
||||||
|
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
||||||
|
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
||||||
|
|
||||||
|
yield from traverse_obj(webpage, (
|
||||||
|
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
|
||||||
|
'data-client-items', {json.loads}, ..., 'page_url', {str}))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
uploader = self._match_id(url)
|
uploader = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, uploader)
|
webpage = self._download_webpage(url, uploader)
|
||||||
|
|
||||||
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
|
|
||||||
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
|
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
|
self._yield_items(webpage), uploader, f'Discography of {uploader}',
|
||||||
|
getter=functools.partial(urljoin, url))
|
||||||
|
|
Loading…
Add table
Reference in a new issue