1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-01-18 23:03:05 +01:00

Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347

This commit is contained in:
Adam Thalhammer 2016-05-02 13:21:39 +10:00
parent 686cc89634
commit 79a2e94e79
2 changed files with 14 additions and 4 deletions

View file

@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
tests = 'aäb\u4e2d\u56fd\u7684c'
self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
@ -155,6 +155,11 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
pass
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')

View file

@ -14,8 +14,8 @@ import email.utils
import errno
import functools
import gzip
import itertools
import io
import itertools
import json
import locale
import math
@ -24,8 +24,8 @@ import os
import pipes
import platform
import re
import ssl
import socket
import ssl
import struct
import subprocess
import sys
@ -365,6 +365,11 @@ def sanitize_filename(s, restricted=False, is_id=False):
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
"""
def replace_insane(char):
accents = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
if restricted and char in accents:
return accents[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':