diff --git a/README.rst b/README.rst index cfc344e..e8f05fb 100644 --- a/README.rst +++ b/README.rst @@ -18,17 +18,17 @@ Remove duplicate files .. code:: python - from sweeper import file_dups + from sweeper import rm_file_dups rm_file_dups(['images']) Perform custom action .. code:: python - from sweeper import file_dups + from sweeper import iter_file_dups for files in iter_file_dups(['images']): for fname in files: - print('found duplicate file with name: %s' % fname) + print('found duplicate file with name: {}'.format(fname)) As script:: diff --git a/setup.py b/setup.py index a558e2e..19c5681 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='sweeper', - version='0.3.0', + version='0.4.0', author='Darko Poljak', author_email='darko.poljak@gmail.com', description='Find duplicate files and perform action.', diff --git a/sweeper/__init__.py b/sweeper/__init__.py index 6816de0..3c38b69 100644 --- a/sweeper/__init__.py +++ b/sweeper/__init__.py @@ -1,4 +1,4 @@ from __future__ import absolute_import -from .sweeper import file_dups +from .sweeper import file_dups, mv_file_dups, rm_file_dups, iter_file_dups __all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups'] diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index e3d5aa2..7da22a3 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -2,7 +2,7 @@ # Author: Darko Poljak # License: GPLv3 -"""Sweeper. +"""sweeper 0.4.0 Usage: sweeper.py [options] [...] @@ -11,18 +11,31 @@ Arguments: Options: -h, --help show this screen +-v, --version show version and exit -b , --block-size= size of block used when reading file's content [default: 4096] -d , --digest-alg= secure hash algorithm [default: md5] --a , --action= action on duplicate files (print, - remove, move) [default: print] +-a , --action= action on duplicate files (pprint, + print, remove, move) + [default: pprint] + -remove removes duplicate files + except first found + -move moves duplicate files to + duplicates driectory, except first + found + -print prints result directory where + keys are hash values and values are + list of duplicate file paths + -pprint prints sets of duplicate file + paths each in it's line where sets + are separated by blank newline -m , --move= move duplicate files to directory (used with move action) [default: ./dups] """ __author__ = 'Darko Poljak ' -__version__ = '0.3.0' +__version__ = '0.4.0' __license__ = 'GPLv3' __all__ = [ @@ -95,7 +108,7 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, if not os.path.exists(dest_dir): os.mkdir(dest_dir) if not os.path.isdir(dest_dir): - raise OSError('%s is not a directory' % dest_dir) + raise OSError('{} is not a directory'.format(dest_dir)) import shutil for files in do_with_file_dups(topdirs, hashalg, block_size): for i, f in enumerate(files): @@ -103,12 +116,18 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, shutil.move(f, dest_dir) -def iter_file_dups(topdirs=['./'], hashalg='md5', block_size=4096): - """Yield list of duplicate files when found in specified directory list. +def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5', + block_size=4096): + """Yield duplicate files when found in specified directory list. + If rethash is True then tuple hash value and duplicate paths list is + returned, otherwise duplicate paths list is returned. """ dups = file_dups(topdirs, hashalg, block_size) - for fpaths in dups.itervalues(): - yield fpaths + for hash_, fpaths in _dict_iter_items(dups): + if rethash: + yield (hash_, fpaths) + else: + yield fpaths def main(): @@ -128,21 +147,31 @@ def main(): bs = int(args['--block-size']) args['--block-size'] = bs except ValueError: - print('Invalid block size "%s"' % args['--block-size']) + print('Invalid block size "{}"'.format(args['--block-size'])) sys.exit(1) - if action == 'print': + if args['--version']: + print("sweeper {}".format(__version__)) + return + + if action == 'print' or action == 'pprint': dups = file_dups(topdirs, args['--digest-alg'], args['--block-size']) spam = dict(dups) if spam: - print(json.dumps(spam, indent=4)) + if action == 'pprint': + for h, fpaths in _dict_iter_items(spam): + for path in fpaths: + print(path) + print('') + else: + print(json.dumps(spam, indent=4)) elif action == 'move': mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'], args['--move']) elif action == 'remove': rm_file_dups(topdirs, args['--digest-alg'], args['--block-size']) else: - print('Invalid action "%s"' % action) + print('Invalid action "{}"'.format(action)) # if used as script call main function diff --git a/test/test_sweeper.py b/test/test_sweeper.py index c4aced3..d75797a 100644 --- a/test/test_sweeper.py +++ b/test/test_sweeper.py @@ -3,7 +3,7 @@ # License: GPLv3 import unittest -from sweeper import file_dups +from sweeper import file_dups, iter_file_dups import os mydir = os.path.dirname(os.path.realpath(__file__)) @@ -23,6 +23,15 @@ class TestSweeper(unittest.TestCase): for h, flist in dups.items(): self.assertTrue(len(flist) == 1) + def test_iter_fule_dups_rethash(self): + for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')], + rethash=True): + self.assertTrue(type(item).__name__ == 'tuple') + + def test_iter_fule_dups_norethash(self): + for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')]): + self.assertTrue(type(item).__name__ == 'list') + if __name__ == '__main__': unittest.main()