Add basic --download-archive option
Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.pull/1562/head
parent
226113c880
commit
c1c9a79c49
|
@ -3,6 +3,7 @@
|
|||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import errno
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
|
@ -84,6 +85,9 @@ class YoutubeDL(object):
|
|||
cachedir: Location of the cache files in the filesystem.
|
||||
None to disable filesystem cache.
|
||||
noplaylist: Download single video instead of a playlist if in doubt.
|
||||
downloadarchive: File name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded
|
||||
again.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
|
@ -309,6 +313,9 @@ class YoutubeDL(object):
|
|||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
if self.in_download_archive(info_dict):
|
||||
return (u'%(title)s) has already been recorded in archive'
|
||||
% info_dict)
|
||||
return None
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
|
@ -578,6 +585,8 @@ class YoutubeDL(object):
|
|||
self.report_error(u'postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
|
@ -617,3 +626,26 @@ class YoutubeDL(object):
|
|||
os.remove(encodeFilename(filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
if line.strip() == vid_id:
|
||||
return True
|
||||
except IOError as ioe:
|
||||
if ioe.errno != errno.ENOENT:
|
||||
raise
|
||||
return False
|
||||
|
||||
def record_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return
|
||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + u'\n')
|
||||
|
|
|
@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
|
|||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
||||
selection.add_option('--download-archive', metavar='FILE',
|
||||
dest='download_archive',
|
||||
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
||||
|
||||
|
||||
authentication.add_option('-u', '--username',
|
||||
|
@ -631,6 +634,7 @@ def _real_main(argv=None):
|
|||
'daterange': date,
|
||||
'cachedir': opts.cachedir,
|
||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||
'download_archive': opts.download_archive,
|
||||
})
|
||||
|
||||
if opts.verbose:
|
||||
|
|
|
@ -830,3 +830,99 @@ def get_cachedir(params={}):
|
|||
cache_root = os.environ.get('XDG_CACHE_HOME',
|
||||
os.path.expanduser('~/.cache'))
|
||||
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
|
||||
|
||||
|
||||
# Cross-platform file locking
|
||||
if sys.platform == 'win32':
|
||||
import ctypes.wintypes
|
||||
import msvcrt
|
||||
|
||||
class OVERLAPPED(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('Internal', ctypes.wintypes.LPVOID),
|
||||
('InternalHigh', ctypes.wintypes.LPVOID),
|
||||
('Offset', ctypes.wintypes.DWORD),
|
||||
('OffsetHigh', ctypes.wintypes.DWORD),
|
||||
('hEvent', ctypes.wintypes.HANDLE),
|
||||
]
|
||||
|
||||
kernel32 = ctypes.windll.kernel32
|
||||
LockFileEx = kernel32.LockFileEx
|
||||
LockFileEx.argtypes = [
|
||||
ctypes.wintypes.HANDLE, # hFile
|
||||
ctypes.wintypes.DWORD, # dwFlags
|
||||
ctypes.wintypes.DWORD, # dwReserved
|
||||
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
|
||||
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
|
||||
ctypes.POINTER(OVERLAPPED) # Overlapped
|
||||
]
|
||||
LockFileEx.restype = ctypes.wintypes.BOOL
|
||||
UnlockFileEx = kernel32.UnlockFileEx
|
||||
UnlockFileEx.argtypes = [
|
||||
ctypes.wintypes.HANDLE, # hFile
|
||||
ctypes.wintypes.DWORD, # dwReserved
|
||||
ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
|
||||
ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
|
||||
ctypes.POINTER(OVERLAPPED) # Overlapped
|
||||
]
|
||||
UnlockFileEx.restype = ctypes.wintypes.BOOL
|
||||
whole_low = 0xffffffff
|
||||
whole_high = 0x7fffffff
|
||||
|
||||
def _lock_file(f, exclusive):
|
||||
overlapped = OVERLAPPED()
|
||||
overlapped.Offset = 0
|
||||
overlapped.OffsetHigh = 0
|
||||
overlapped.hEvent = 0
|
||||
f._lock_file_overlapped_p = ctypes.pointer(overlapped)
|
||||
handle = msvcrt.get_osfhandle(f.fileno())
|
||||
if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
|
||||
whole_low, whole_high, f._lock_file_overlapped_p):
|
||||
raise OSError('Locking file failed: %r' % ctypes.FormatError())
|
||||
|
||||
def _unlock_file(f):
|
||||
assert f._lock_file_overlapped_p
|
||||
handle = msvcrt.get_osfhandle(f.fileno())
|
||||
if not UnlockFileEx(handle, 0,
|
||||
whole_low, whole_high, f._lock_file_overlapped_p):
|
||||
raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
|
||||
|
||||
else:
|
||||
import fcntl
|
||||
|
||||
def _lock_file(f, exclusive):
|
||||
fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
|
||||
|
||||
def _unlock_file(f):
|
||||
fcntl.lockf(f, fcntl.LOCK_UN)
|
||||
|
||||
|
||||
class locked_file(object):
|
||||
def __init__(self, filename, mode, encoding=None):
|
||||
assert mode in ['r', 'a', 'w']
|
||||
self.f = io.open(filename, mode, encoding=encoding)
|
||||
self.mode = mode
|
||||
|
||||
def __enter__(self):
|
||||
exclusive = self.mode != 'r'
|
||||
try:
|
||||
_lock_file(self.f, exclusive)
|
||||
except IOError:
|
||||
self.f.close()
|
||||
raise
|
||||
return self
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
try:
|
||||
_unlock_file(self.f)
|
||||
finally:
|
||||
self.f.close()
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.f)
|
||||
|
||||
def write(self, *args):
|
||||
return self.f.write(*args)
|
||||
|
||||
def read(self, *args):
|
||||
return self.f.read(*args)
|
||||
|
|
Loading…
Reference in New Issue