New optoin --restrict-filenames

This commit is contained in:
Philipp Hagemeister 2012-11-26 23:58:46 +01:00
parent 71f36332dd
commit 1c469a9480
7 changed files with 77 additions and 41 deletions

View File

@ -47,6 +47,8 @@ ## Filesystem Options:
%(extractor)s for the provider (youtube, metacafe, %(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout. percent. Use - to output to stdout.
--restrict-filenames Avoid some characters such as "&" and spaces in
filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin) -a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files -w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files -c, --continue resume partially downloaded files

View File

@ -30,11 +30,34 @@ def test_sanitize_filename(self):
self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that')) self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
for forbidden in u'"\0\\/': forbidden = u'"\0\\/'
self.assertTrue(forbidden not in sanitize_filename(forbidden)) for fc in forbidden:
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc))
def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
forbidden = u'"\0\\/&: \'\t\n'
for fc in forbidden:
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])

View File

@ -59,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.
.PP .PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
some systems) or clone the git repo to see the code. some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file. \f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ youtube-dl\f[]. To recompile the executable, run \f[C]make\ youtube-dl\f[].

View File

@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts local cur prev opts
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

View File

@ -44,37 +44,38 @@ class FileDownloader(object):
Available options: Available options:
username: Username for authentication purposes. username: Username for authentication purposes.
password: Password for authentication purposes. password: Password for authentication purposes.
usenetrc: Use netrc for authentication instead. usenetrc: Use netrc for authentication instead.
quiet: Do not print messages to stdout. quiet: Do not print messages to stdout.
forceurl: Force printing final URL. forceurl: Force printing final URL.
forcetitle: Force printing title. forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL. forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description. forcedescription: Force printing description.
forcefilename: Force printing final filename. forcefilename: Force printing final filename.
simulate: Do not download the video files. simulate: Do not download the video files.
format: Video format code. format: Video format code.
format_limit: Highest quality format to try. format_limit: Highest quality format to try.
outtmpl: Template for output names. outtmpl: Template for output names.
ignoreerrors: Do not stop on download errors. restrictfilenames: Do not allow "&" and spaces in file names
ratelimit: Download speed limit, in bytes/sec. ignoreerrors: Do not stop on download errors.
nooverwrites: Prevent overwriting files. ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx nooverwrites: Prevent overwriting files.
continuedl: Try to continue downloads if possible. retries: Number of times to retry for HTTP error 5xx
noprogress: Do not print the progress bar. continuedl: Try to continue downloads if possible.
playliststart: Playlist item to start at. noprogress: Do not print the progress bar.
playlistend: Playlist item to end at. playliststart: Playlist item to start at.
matchtitle: Download only matching titles. playlistend: Playlist item to end at.
rejecttitle: Reject downloads for matching titles. matchtitle: Download only matching titles.
logtostderr: Log messages to stderr instead of stdout. rejecttitle: Reject downloads for matching titles.
consoletitle: Display progress in console window's titlebar. logtostderr: Log messages to stderr instead of stdout.
nopart: Do not use temporary .part files. consoletitle: Display progress in console window's titlebar.
updatetime: Use the Last-modified header to set output file timestamps. nopart: Do not use temporary .part files.
writedescription: Write the video description to a .description file updatetime: Use the Last-modified header to set output file timestamps.
writeinfojson: Write the video description to a .info.json file writedescription: Write the video description to a .description file
writesubtitles: Write the video subtitles to a .srt file writeinfojson: Write the video description to a .info.json file
subtitleslang: Language of the subtitles to download writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
""" """
params = None params = None
@ -349,7 +350,7 @@ def _match_entry(self, info_dict):
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single dictionary returned by an InfoExtractor."""
info_dict['stitle'] = sanitize_filename(info_dict['title']) info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
reason = self._match_entry(info_dict) reason = self._match_entry(info_dict)
if reason is not None: if reason is not None:

View File

@ -272,6 +272,9 @@ def _find_term_columns():
help='number downloaded files starting from 00000', default=False) help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output', filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Avoid some characters such as "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file', filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites', filesystem.add_option('-w', '--no-overwrites',
@ -485,6 +488,7 @@ def _real_main():
or (opts.useid and u'%(id)s.%(ext)s') or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'), or u'%(id)s.%(ext)s'),
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors, 'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,

View File

@ -194,18 +194,22 @@ def timeconvert(timestr):
if timetuple is not None: if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple) timestamp = email.utils.mktime_tz(timetuple)
return timestamp return timestamp
def sanitize_filename(s): def sanitize_filename(s, restricted=False):
"""Sanitizes a string so it could be used as part of a filename.""" """Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
"""
def replace_insane(char): def replace_insane(char):
if char == '?' or ord(char) < 32 or ord(char) == 127: if char == '?' or ord(char) < 32 or ord(char) == 127:
return '' return ''
elif char == '"': elif char == '"':
return '\'' return '' if restricted else 'FOO\''
elif char == ':': elif char == ':':
return ' -' return '_-' if restricted else ' -'
elif char in '\\/|*<>': elif char in '\\/|*<>':
return '-' return '-'
if restricted and (char in '&\'' or char.isspace()):
return '_'
return char return char
result = u''.join(map(replace_insane, s)) result = u''.join(map(replace_insane, s))