From 998965b78a8bc84cf369da99b9a2a633ea5b598a Mon Sep 17 00:00:00 2001 From: Ihor Stuzhuk Date: Fri, 10 Jan 2025 15:49:59 +0200 Subject: [PATCH] Instagram user reels extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/instagram.py | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 967010826e..0c24dbfe5b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -874,6 +874,7 @@ from .instagram import ( InstagramStoryIE, InstagramTagIE, InstagramUserIE, + InstagramUserReelsIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 55086d0b29..1496b95bf0 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -653,6 +653,51 @@ class InstagramUserIE(InstagramPlaylistBaseIE): } +class InstagramUserReelsIE(InstagramBaseIE): + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/reels/?' + IE_DESC = 'Instagram user reels' + IE_NAME = 'instagram:user:reels' + + def _real_extract(self, url): + username = self._match_valid_url(url).group('id') + + webpage = self._download_webpage(url, username) + user_info = self._search_json(r'"props":', webpage, 'user info', username) + + user_id = user_info['id'] + csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken') + + def reels(): + max_id = None + for page in itertools.count(1): + resp = self._download_json( + f'{self._API_BASE_URL}/clips/user/', + video_id=username, note=f'Downloading page {page}', + data=urlencode_postdata({ + 'include_feed_video': 'true', + 'page_size': 12, + 'target_user_id': user_id, + **({'max_id': max_id} if max_id else {}), + }), + headers={ + **self._API_HEADERS, + 'X-CSRFToken': csrf_token.value, + 'X-Requested-With': 'XMLHttpRequest', + 'Referer': url, + }, + ) + + for item in resp['items']: + yield self._extract_product(item['media']) + + paging_info = resp['paging_info'] + if not paging_info['more_available']: + break + max_id = paging_info['max_id'] + + return self.playlist_result(reels(), playlist_id=username, playlist_title=f'Reels of {username}') + + class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' IE_DESC = 'Instagram hashtag search URLs'