cleanups and version inc
This commit is contained in:
parent
9e714732fb
commit
d2186f1f26
1 changed files with 43 additions and 46 deletions
|
@ -57,12 +57,11 @@ Options:
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
__author__ = 'Darko Poljak <darko.poljak@gmail.com>'
|
__author__ = 'Darko Poljak <darko.poljak@gmail.com>'
|
||||||
__version__ = '0.4.1'
|
__version__ = '0.5.0'
|
||||||
__license__ = 'GPLv3'
|
__license__ = 'GPLv3'
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'file_dups', 'rm_file_dups', 'mv_file_dups', 'iter_file_dups',
|
'file_dups', 'rm_file_dups', 'mv_file_dups', 'iter_file_dups'
|
||||||
'file_dups_immediate'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
@ -80,6 +79,9 @@ if sys.version_info[0] == 3:
|
||||||
|
|
||||||
def _dict_iter_keys(d):
|
def _dict_iter_keys(d):
|
||||||
return d.keys()
|
return d.keys()
|
||||||
|
|
||||||
|
def _dict_iter_values(d):
|
||||||
|
return d.values()
|
||||||
else:
|
else:
|
||||||
def _dict_iter_items(d):
|
def _dict_iter_items(d):
|
||||||
return d.iteritems()
|
return d.iteritems()
|
||||||
|
@ -87,6 +89,9 @@ else:
|
||||||
def _dict_iter_keys(d):
|
def _dict_iter_keys(d):
|
||||||
return d.iterkeys()
|
return d.iterkeys()
|
||||||
|
|
||||||
|
def _dict_iter_values(d):
|
||||||
|
return d.itervalues()
|
||||||
|
|
||||||
range = xrange
|
range = xrange
|
||||||
|
|
||||||
|
|
||||||
|
@ -221,6 +226,7 @@ def file_dups(topdirs=['./'], hashalgs=['md5'], block_size=4096, verbose=False,
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print('')
|
print('')
|
||||||
|
# make result dict with unique file paths list
|
||||||
result = {}
|
result = {}
|
||||||
for k, v in _dict_iter_items(dups):
|
for k, v in _dict_iter_items(dups):
|
||||||
uniq_v = _uniq_list(v)
|
uniq_v = _uniq_list(v)
|
||||||
|
@ -229,14 +235,15 @@ def file_dups(topdirs=['./'], hashalgs=['md5'], block_size=4096, verbose=False,
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def file_dups_immediate(topdirs=['./'], hashalgs=['md5'], block_size=4096,
|
def iter_file_dups(topdirs=['./'], hashalgs=['md5'], block_size=4096,
|
||||||
safe_mode=False):
|
safe_mode=False):
|
||||||
"""Find duplicate files in directory list iterator.
|
"""Find duplicate files in directory list.
|
||||||
Yield tuple of file path, hash tuple and list of duplicate files
|
Yield tuple of file path, hash tuple and list of duplicate files
|
||||||
as soon as duplicate file is found (newly found file is
|
as soon as duplicate file is found.
|
||||||
included in the list).
|
Newly found file is not included in the list at the yield time,
|
||||||
This means that not all duplicate files are returned.
|
but is appended later before next yield.
|
||||||
Same hash value and sublist could be returned later
|
This means that not all duplicate files are returned with any
|
||||||
|
return value. Same hash value and sublist could be returned later
|
||||||
if file with same content is found.
|
if file with same content is found.
|
||||||
If safe_mode is true then you want to play safe: do byte
|
If safe_mode is true then you want to play safe: do byte
|
||||||
by byte comparison for hash duplicate files.
|
by byte comparison for hash duplicate files.
|
||||||
|
@ -253,29 +260,25 @@ def file_dups_immediate(topdirs=['./'], hashalgs=['md5'], block_size=4096,
|
||||||
hexmd = tuple(hexmds)
|
hexmd = tuple(hexmds)
|
||||||
dup_files = dups[hexmd]
|
dup_files = dups[hexmd]
|
||||||
# there were dup list elements (used for yield)
|
# there were dup list elements (used for yield)
|
||||||
had_dup_list = True if dup_files else False
|
if safe_mode and dup_files:
|
||||||
files_equals = False
|
# compare only with first file in dup_files
|
||||||
if safe_mode:
|
# all files in dup_files list are already content equal
|
||||||
if dup_files:
|
files_equals = _fbequal(dup_files[0], fpath)
|
||||||
for f in dup_files:
|
else: # when list is emtpy in safe mode or when safe mode is off
|
||||||
if _fbequal(f, fpath):
|
|
||||||
files_equals = True
|
files_equals = True
|
||||||
break
|
|
||||||
else: # when list is empty in safe mode
|
|
||||||
files_equals = True
|
|
||||||
else:
|
|
||||||
files_equals = True # when safe mode is off
|
|
||||||
if files_equals:
|
if files_equals:
|
||||||
dups[hexmd].append(fpath)
|
# yield only if current dup files list isn't empty
|
||||||
if files_equals and had_dup_list:
|
if dup_files:
|
||||||
yield (fpath, hexmd, dups[hexmd])
|
yield (fpath, hexmd, dups[hexmd])
|
||||||
|
# finally append newly found file to dup list
|
||||||
|
dups[hexmd].append(fpath)
|
||||||
|
|
||||||
|
|
||||||
def _extract_files_for_action(topdirs, hashalgs, block_size, keep_prefix,
|
def _extract_files_for_action(topdirs, hashalgs, block_size, keep_prefix,
|
||||||
verbose, safe_mode):
|
verbose, safe_mode):
|
||||||
for files in iter_file_dups(topdirs=topdirs, hashalgs=hashalgs,
|
for files in _dict_iter_values(file_dups(topdirs=topdirs,
|
||||||
block_size=block_size, verbose=verbose,
|
hashalgs=hashalgs, block_size=block_size,
|
||||||
safe_mode=safe_mode):
|
verbose=verbose, safe_mode=safe_mode)):
|
||||||
found = False
|
found = False
|
||||||
if keep_prefix:
|
if keep_prefix:
|
||||||
result = []
|
result = []
|
||||||
|
@ -325,11 +328,19 @@ def mv_file_dups(topdirs=['./'], hashalgs=['md5'], block_size=4096,
|
||||||
If safe_mode is true then do byte by byte comparison for
|
If safe_mode is true then do byte by byte comparison for
|
||||||
hash duplicate files.
|
hash duplicate files.
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(dest_dir):
|
|
||||||
os.mkdir(dest_dir)
|
|
||||||
if not os.path.isdir(dest_dir):
|
|
||||||
raise OSError('{} is not a directory'.format(dest_dir))
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
if not os.path.exists(dest_dir):
|
||||||
|
if simulate:
|
||||||
|
print('mkdir {}'.format(dest_dir))
|
||||||
|
else:
|
||||||
|
os.mkdir(dest_dir)
|
||||||
|
elif not os.path.isdir(dest_dir):
|
||||||
|
errmsg = '{} is not a directory'.format(dest_dir)
|
||||||
|
if simulate:
|
||||||
|
print('would raise:', errmsg)
|
||||||
|
else:
|
||||||
|
raise OSError(errmsg)
|
||||||
for dups, extracted in _extract_files_for_action(topdirs, hashalgs,
|
for dups, extracted in _extract_files_for_action(topdirs, hashalgs,
|
||||||
block_size, keep_prefix,
|
block_size, keep_prefix,
|
||||||
verbose, safe_mode):
|
verbose, safe_mode):
|
||||||
|
@ -342,20 +353,6 @@ def mv_file_dups(topdirs=['./'], hashalgs=['md5'], block_size=4096,
|
||||||
shutil.move(f, dest_dir)
|
shutil.move(f, dest_dir)
|
||||||
|
|
||||||
|
|
||||||
def iter_file_dups(topdirs=['./'], rethash=False, hashalgs=['md5'],
|
|
||||||
block_size=4096, verbose=False, safe_mode=False):
|
|
||||||
"""Yield duplicate files when found in specified directory list.
|
|
||||||
If rethash is True then tuple hash value and duplicate paths list is
|
|
||||||
returned, otherwise duplicate paths list is returned.
|
|
||||||
"""
|
|
||||||
dups = file_dups(topdirs, hashalgs, block_size, verbose, safe_mode)
|
|
||||||
for hash_, fpaths in _dict_iter_items(dups):
|
|
||||||
if rethash:
|
|
||||||
yield (hash_, fpaths)
|
|
||||||
else:
|
|
||||||
yield fpaths
|
|
||||||
|
|
||||||
|
|
||||||
def _remap_keys_to_str(d):
|
def _remap_keys_to_str(d):
|
||||||
'''Iterator that remaps dictionary keys to string in case keys are tuple
|
'''Iterator that remaps dictionary keys to string in case keys are tuple
|
||||||
or list. Leave it unchanged otherwise.
|
or list. Leave it unchanged otherwise.
|
||||||
|
@ -432,7 +429,7 @@ def main():
|
||||||
rm_file_dups(topdirs=topdirs, hashalgs=hashalgs,
|
rm_file_dups(topdirs=topdirs, hashalgs=hashalgs,
|
||||||
block_size=block_size,
|
block_size=block_size,
|
||||||
simulate=simulate,
|
simulate=simulate,
|
||||||
keep_prefix=-keep_prefix,
|
keep_prefix=keep_prefix,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
safe_mode=safe_mode)
|
safe_mode=safe_mode)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue