mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-18 23:03:05 +01:00
[cookies] Move YoutubeDLCookieJar
to cookies module (#7091)
Authored by: coletdjnz
This commit is contained in:
parent
08916a49c7
commit
b87e01c123
6 changed files with 157 additions and 137 deletions
|
@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from yt_dlp.utils import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
|
@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
# will be ignored
|
# will be ignored
|
||||||
self.assertFalse(cookiejar._cookies)
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
def test_get_cookie_header(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||||
|
self.assertIn('HTTPONLY_COOKIE', header)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -2404,7 +2404,7 @@ class YoutubeDL:
|
||||||
if 'Youtubedl-No-Compression' in res: # deprecated
|
if 'Youtubedl-No-Compression' in res: # deprecated
|
||||||
res.pop('Youtubedl-No-Compression', None)
|
res.pop('Youtubedl-No-Compression', None)
|
||||||
res['Accept-Encoding'] = 'identity'
|
res['Accept-Encoding'] = 'identity'
|
||||||
cookies = self._calc_cookies(info_dict['url'])
|
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
if cookies:
|
if cookies:
|
||||||
res['Cookie'] = cookies
|
res['Cookie'] = cookies
|
||||||
|
|
||||||
|
@ -2416,9 +2416,8 @@ class YoutubeDL:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _calc_cookies(self, url):
|
def _calc_cookies(self, url):
|
||||||
pr = sanitized_Request(url)
|
self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
|
||||||
self.cookiejar.add_cookie_header(pr)
|
return self.cookiejar.get_cookie_header(url)
|
||||||
return pr.get_header('Cookie')
|
|
||||||
|
|
||||||
def _sort_thumbnails(self, thumbnails):
|
def _sort_thumbnails(self, thumbnails):
|
||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import base64
|
import base64
|
||||||
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -11,6 +13,7 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
import urllib.request
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from hashlib import pbkdf2_hmac
|
from hashlib import pbkdf2_hmac
|
||||||
|
@ -29,11 +32,14 @@ from .dependencies import (
|
||||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||||
from .utils import (
|
from .utils import (
|
||||||
Popen,
|
Popen,
|
||||||
YoutubeDLCookieJar,
|
|
||||||
error_to_str,
|
error_to_str,
|
||||||
|
escape_url,
|
||||||
expand_path,
|
expand_path,
|
||||||
is_path_like,
|
is_path_like,
|
||||||
|
sanitize_url,
|
||||||
|
str_or_none,
|
||||||
try_call,
|
try_call,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
|
|
||||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||||
|
@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
morsel = None
|
morsel = None
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||||
|
"""
|
||||||
|
See [1] for cookie file format.
|
||||||
|
|
||||||
|
1. https://curl.haxx.se/docs/http-cookies.html
|
||||||
|
"""
|
||||||
|
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||||
|
_ENTRY_LEN = 7
|
||||||
|
_HEADER = '''# Netscape HTTP Cookie File
|
||||||
|
# This file is generated by yt-dlp. Do not edit.
|
||||||
|
|
||||||
|
'''
|
||||||
|
_CookieFileEntry = collections.namedtuple(
|
||||||
|
'CookieFileEntry',
|
||||||
|
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
||||||
|
|
||||||
|
def __init__(self, filename=None, *args, **kwargs):
|
||||||
|
super().__init__(None, *args, **kwargs)
|
||||||
|
if is_path_like(filename):
|
||||||
|
filename = os.fspath(filename)
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _true_or_false(cndn):
|
||||||
|
return 'TRUE' if cndn else 'FALSE'
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def open(self, file, *, write=False):
|
||||||
|
if is_path_like(file):
|
||||||
|
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
||||||
|
yield f
|
||||||
|
else:
|
||||||
|
if write:
|
||||||
|
file.truncate(0)
|
||||||
|
yield file
|
||||||
|
|
||||||
|
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
||||||
|
now = time.time()
|
||||||
|
for cookie in self:
|
||||||
|
if (not ignore_discard and cookie.discard
|
||||||
|
or not ignore_expires and cookie.is_expired(now)):
|
||||||
|
continue
|
||||||
|
name, value = cookie.name, cookie.value
|
||||||
|
if value is None:
|
||||||
|
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||||
|
# with no name, whereas http.cookiejar regards it as a
|
||||||
|
# cookie with no value.
|
||||||
|
name, value = '', name
|
||||||
|
f.write('%s\n' % '\t'.join((
|
||||||
|
cookie.domain,
|
||||||
|
self._true_or_false(cookie.domain.startswith('.')),
|
||||||
|
cookie.path,
|
||||||
|
self._true_or_false(cookie.secure),
|
||||||
|
str_or_none(cookie.expires, default=''),
|
||||||
|
name, value
|
||||||
|
)))
|
||||||
|
|
||||||
|
def save(self, filename=None, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Save cookies to a file.
|
||||||
|
Code is taken from CPython 3.6
|
||||||
|
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
||||||
|
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
# Store session cookies with `expires` set to 0 instead of an empty string
|
||||||
|
for cookie in self:
|
||||||
|
if cookie.expires is None:
|
||||||
|
cookie.expires = 0
|
||||||
|
|
||||||
|
with self.open(filename, write=True) as f:
|
||||||
|
f.write(self._HEADER)
|
||||||
|
self._really_save(f, *args, **kwargs)
|
||||||
|
|
||||||
|
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
|
"""Load cookies from a file."""
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
def prepare_line(line):
|
||||||
|
if line.startswith(self._HTTPONLY_PREFIX):
|
||||||
|
line = line[len(self._HTTPONLY_PREFIX):]
|
||||||
|
# comments and empty lines are fine
|
||||||
|
if line.startswith('#') or not line.strip():
|
||||||
|
return line
|
||||||
|
cookie_list = line.split('\t')
|
||||||
|
if len(cookie_list) != self._ENTRY_LEN:
|
||||||
|
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||||
|
cookie = self._CookieFileEntry(*cookie_list)
|
||||||
|
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||||
|
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||||
|
return line
|
||||||
|
|
||||||
|
cf = io.StringIO()
|
||||||
|
with self.open(filename) as f:
|
||||||
|
for line in f:
|
||||||
|
try:
|
||||||
|
cf.write(prepare_line(line))
|
||||||
|
except http.cookiejar.LoadError as e:
|
||||||
|
if f'{line.strip()} '[0] in '[{"':
|
||||||
|
raise http.cookiejar.LoadError(
|
||||||
|
'Cookies file must be Netscape formatted, not JSON. See '
|
||||||
|
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
||||||
|
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
||||||
|
continue
|
||||||
|
cf.seek(0)
|
||||||
|
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||||
|
# Session cookies are denoted by either `expires` field set to
|
||||||
|
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||||
|
# (see [1]). So we need force the latter to be recognized as session
|
||||||
|
# cookies on our own.
|
||||||
|
# Session cookies may be important for cookies-based authentication,
|
||||||
|
# e.g. usually, when user does not check 'Remember me' check box while
|
||||||
|
# logging in on a site, some important cookies are stored as session
|
||||||
|
# cookies so that not recognizing them will result in failed login.
|
||||||
|
# 1. https://bugs.python.org/issue17164
|
||||||
|
for cookie in self:
|
||||||
|
# Treat `expires=0` cookies as session cookies
|
||||||
|
if cookie.expires == 0:
|
||||||
|
cookie.expires = None
|
||||||
|
cookie.discard = True
|
||||||
|
|
||||||
|
def get_cookie_header(self, url):
|
||||||
|
"""Generate a Cookie HTTP header for a given url"""
|
||||||
|
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
|
||||||
|
self.add_cookie_header(cookie_req)
|
||||||
|
return cookie_req.get_header('Cookie')
|
||||||
|
|
|
@ -3444,7 +3444,7 @@ class InfoExtractor:
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding
|
||||||
from .traversal import traverse_obj
|
from .traversal import traverse_obj
|
||||||
from ..dependencies import certifi, websockets
|
from ..dependencies import certifi, websockets
|
||||||
|
|
||||||
|
# isort: split
|
||||||
|
from ..cookies import YoutubeDLCookieJar # noqa: F401
|
||||||
|
|
||||||
has_certifi = bool(certifi)
|
has_certifi = bool(certifi)
|
||||||
has_websockets = bool(websockets)
|
has_websockets = bool(websockets)
|
||||||
|
|
||||||
|
|
|
@ -1518,136 +1518,6 @@ def is_path_like(f):
|
||||||
return isinstance(f, (str, bytes, os.PathLike))
|
return isinstance(f, (str, bytes, os.PathLike))
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|
||||||
"""
|
|
||||||
See [1] for cookie file format.
|
|
||||||
|
|
||||||
1. https://curl.haxx.se/docs/http-cookies.html
|
|
||||||
"""
|
|
||||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
|
||||||
_ENTRY_LEN = 7
|
|
||||||
_HEADER = '''# Netscape HTTP Cookie File
|
|
||||||
# This file is generated by yt-dlp. Do not edit.
|
|
||||||
|
|
||||||
'''
|
|
||||||
_CookieFileEntry = collections.namedtuple(
|
|
||||||
'CookieFileEntry',
|
|
||||||
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
|
||||||
|
|
||||||
def __init__(self, filename=None, *args, **kwargs):
|
|
||||||
super().__init__(None, *args, **kwargs)
|
|
||||||
if is_path_like(filename):
|
|
||||||
filename = os.fspath(filename)
|
|
||||||
self.filename = filename
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _true_or_false(cndn):
|
|
||||||
return 'TRUE' if cndn else 'FALSE'
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def open(self, file, *, write=False):
|
|
||||||
if is_path_like(file):
|
|
||||||
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
|
||||||
yield f
|
|
||||||
else:
|
|
||||||
if write:
|
|
||||||
file.truncate(0)
|
|
||||||
yield file
|
|
||||||
|
|
||||||
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
|
||||||
now = time.time()
|
|
||||||
for cookie in self:
|
|
||||||
if (not ignore_discard and cookie.discard
|
|
||||||
or not ignore_expires and cookie.is_expired(now)):
|
|
||||||
continue
|
|
||||||
name, value = cookie.name, cookie.value
|
|
||||||
if value is None:
|
|
||||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
|
||||||
# with no name, whereas http.cookiejar regards it as a
|
|
||||||
# cookie with no value.
|
|
||||||
name, value = '', name
|
|
||||||
f.write('%s\n' % '\t'.join((
|
|
||||||
cookie.domain,
|
|
||||||
self._true_or_false(cookie.domain.startswith('.')),
|
|
||||||
cookie.path,
|
|
||||||
self._true_or_false(cookie.secure),
|
|
||||||
str_or_none(cookie.expires, default=''),
|
|
||||||
name, value
|
|
||||||
)))
|
|
||||||
|
|
||||||
def save(self, filename=None, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Save cookies to a file.
|
|
||||||
Code is taken from CPython 3.6
|
|
||||||
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
|
||||||
|
|
||||||
if filename is None:
|
|
||||||
if self.filename is not None:
|
|
||||||
filename = self.filename
|
|
||||||
else:
|
|
||||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
|
||||||
|
|
||||||
# Store session cookies with `expires` set to 0 instead of an empty string
|
|
||||||
for cookie in self:
|
|
||||||
if cookie.expires is None:
|
|
||||||
cookie.expires = 0
|
|
||||||
|
|
||||||
with self.open(filename, write=True) as f:
|
|
||||||
f.write(self._HEADER)
|
|
||||||
self._really_save(f, *args, **kwargs)
|
|
||||||
|
|
||||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
|
||||||
"""Load cookies from a file."""
|
|
||||||
if filename is None:
|
|
||||||
if self.filename is not None:
|
|
||||||
filename = self.filename
|
|
||||||
else:
|
|
||||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
|
||||||
|
|
||||||
def prepare_line(line):
|
|
||||||
if line.startswith(self._HTTPONLY_PREFIX):
|
|
||||||
line = line[len(self._HTTPONLY_PREFIX):]
|
|
||||||
# comments and empty lines are fine
|
|
||||||
if line.startswith('#') or not line.strip():
|
|
||||||
return line
|
|
||||||
cookie_list = line.split('\t')
|
|
||||||
if len(cookie_list) != self._ENTRY_LEN:
|
|
||||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
|
||||||
cookie = self._CookieFileEntry(*cookie_list)
|
|
||||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
|
||||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
|
||||||
return line
|
|
||||||
|
|
||||||
cf = io.StringIO()
|
|
||||||
with self.open(filename) as f:
|
|
||||||
for line in f:
|
|
||||||
try:
|
|
||||||
cf.write(prepare_line(line))
|
|
||||||
except http.cookiejar.LoadError as e:
|
|
||||||
if f'{line.strip()} '[0] in '[{"':
|
|
||||||
raise http.cookiejar.LoadError(
|
|
||||||
'Cookies file must be Netscape formatted, not JSON. See '
|
|
||||||
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
|
||||||
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
|
||||||
continue
|
|
||||||
cf.seek(0)
|
|
||||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
|
||||||
# Session cookies are denoted by either `expires` field set to
|
|
||||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
|
||||||
# (see [1]). So we need force the latter to be recognized as session
|
|
||||||
# cookies on our own.
|
|
||||||
# Session cookies may be important for cookies-based authentication,
|
|
||||||
# e.g. usually, when user does not check 'Remember me' check box while
|
|
||||||
# logging in on a site, some important cookies are stored as session
|
|
||||||
# cookies so that not recognizing them will result in failed login.
|
|
||||||
# 1. https://bugs.python.org/issue17164
|
|
||||||
for cookie in self:
|
|
||||||
# Treat `expires=0` cookies as session cookies
|
|
||||||
if cookie.expires == 0:
|
|
||||||
cookie.expires = None
|
|
||||||
cookie.discard = True
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
|
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
|
||||||
def __init__(self, cookiejar=None):
|
def __init__(self, cookiejar=None):
|
||||||
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
|
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
|
|
Loading…
Add table
Reference in a new issue