From 609418975e8eacb4c270467c70a6368dde718634 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Mon, 27 Jan 2014 13:33:04 +0100 Subject: [PATCH 01/11] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index cfc344e..e3aaaf3 100644 --- a/README.rst +++ b/README.rst @@ -18,14 +18,14 @@ Remove duplicate files .. code:: python - from sweeper import file_dups + from sweeper import rm_file_dups rm_file_dups(['images']) Perform custom action .. code:: python - from sweeper import file_dups + from sweeper import iter_file_dups for files in iter_file_dups(['images']): for fname in files: print('found duplicate file with name: %s' % fname) From a60ede31fbafc8dcdf8b2b72ba09055f23bfad00 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 07:29:23 +0100 Subject: [PATCH 02/11] added rethash parameter to iter_file_dups --- sweeper/sweeper.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index e3d5aa2..e4a9c6b 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -103,12 +103,17 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, shutil.move(f, dest_dir) -def iter_file_dups(topdirs=['./'], hashalg='md5', block_size=4096): - """Yield list of duplicate files when found in specified directory list. +def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5', block_size=4096): + """Yield duplicate files when found in specified directory list. + If rethash is True then tuple hash value and duplicate paths list is + returned, otherwise duplicate paths list is returned. """ dups = file_dups(topdirs, hashalg, block_size) - for fpaths in dups.itervalues(): - yield fpaths + for hash, fpaths in _dict_iter_items: + if rethash: + yield (hash, fpaths) + else: + yield fpaths def main(): From 4b8a7d51beabbfeb75e1cb1a18f244712639c5ba Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 07:30:26 +0100 Subject: [PATCH 03/11] added rethash parameter to iter_file_dups --- sweeper/sweeper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index e4a9c6b..d6efeca 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -109,9 +109,9 @@ def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5', block_size=4096 returned, otherwise duplicate paths list is returned. """ dups = file_dups(topdirs, hashalg, block_size) - for hash, fpaths in _dict_iter_items: + for hash_, fpaths in _dict_iter_items(dups): if rethash: - yield (hash, fpaths) + yield (hash_, fpaths) else: yield fpaths From 63bb9779dd3ffa98e49f552b60231c72369537cf Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 08:35:17 +0100 Subject: [PATCH 04/11] fixed missing imports --- sweeper/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sweeper/__init__.py b/sweeper/__init__.py index 6816de0..3c38b69 100644 --- a/sweeper/__init__.py +++ b/sweeper/__init__.py @@ -1,4 +1,4 @@ from __future__ import absolute_import -from .sweeper import file_dups +from .sweeper import file_dups, mv_file_dups, rm_file_dups, iter_file_dups __all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups'] From 3ddd76fcce33f5c3c0f0c261eaef947c37dd7dec Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 08:36:14 +0100 Subject: [PATCH 05/11] added tests for iter_file_dups --- test/test_sweeper.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_sweeper.py b/test/test_sweeper.py index c4aced3..d75797a 100644 --- a/test/test_sweeper.py +++ b/test/test_sweeper.py @@ -3,7 +3,7 @@ # License: GPLv3 import unittest -from sweeper import file_dups +from sweeper import file_dups, iter_file_dups import os mydir = os.path.dirname(os.path.realpath(__file__)) @@ -23,6 +23,15 @@ class TestSweeper(unittest.TestCase): for h, flist in dups.items(): self.assertTrue(len(flist) == 1) + def test_iter_fule_dups_rethash(self): + for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')], + rethash=True): + self.assertTrue(type(item).__name__ == 'tuple') + + def test_iter_fule_dups_norethash(self): + for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')]): + self.assertTrue(type(item).__name__ == 'list') + if __name__ == '__main__': unittest.main() From 8cb421c6e4a666fdcbb0e1735836c0f7b1c8294b Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 08:50:58 +0100 Subject: [PATCH 06/11] added pprint action --- sweeper/sweeper.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index d6efeca..1c55179 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -14,8 +14,19 @@ Options: -b , --block-size= size of block used when reading file's content [default: 4096] -d , --digest-alg= secure hash algorithm [default: md5] --a , --action= action on duplicate files (print, - remove, move) [default: print] +-a , --action= action on duplicate files (pprint, + print, remove, move) [default: pprint] + -remove removes duplicate files except + first found + -move moves duplicate files to + duplicates driectory, except first + found + -print prints result directory where + keys are hash values and values are + list of duplicate file paths + -pprint prints sets of duplicate file + paths each in it's line where sets + are separated by blank newline -m , --move= move duplicate files to directory (used with move action) [default: ./dups] @@ -136,11 +147,17 @@ def main(): print('Invalid block size "%s"' % args['--block-size']) sys.exit(1) - if action == 'print': + if action == 'print' or action == 'pprint': dups = file_dups(topdirs, args['--digest-alg'], args['--block-size']) spam = dict(dups) if spam: - print(json.dumps(spam, indent=4)) + if action == 'pprint': + for h, fpaths in _dict_iter_items(spam): + for path in fpaths: + print(path) + print('') + else: + print(json.dumps(spam, indent=4)) elif action == 'move': mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'], args['--move']) From cf0ff95b36ecbb9375fb9e3e04f0cf07899ce768 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 08:53:51 +0100 Subject: [PATCH 07/11] pep8 tunning --- sweeper/sweeper.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index 1c55179..5ca8dba 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -15,16 +15,17 @@ Options: file's content [default: 4096] -d , --digest-alg= secure hash algorithm [default: md5] -a , --action= action on duplicate files (pprint, - print, remove, move) [default: pprint] - -remove removes duplicate files except - first found - -move moves duplicate files to + print, remove, move) + [default: pprint] + -remove removes duplicate files + except first found + -move moves duplicate files to duplicates driectory, except first found -print prints result directory where keys are hash values and values are list of duplicate file paths - -pprint prints sets of duplicate file + -pprint prints sets of duplicate file paths each in it's line where sets are separated by blank newline -m , --move= move duplicate files to directory @@ -114,9 +115,10 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, shutil.move(f, dest_dir) -def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5', block_size=4096): +def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5', + block_size=4096): """Yield duplicate files when found in specified directory list. - If rethash is True then tuple hash value and duplicate paths list is + If rethash is True then tuple hash value and duplicate paths list is returned, otherwise duplicate paths list is returned. """ dups = file_dups(topdirs, hashalg, block_size) From 55279e1aee068f69f13de7ae7a9037988fe424c6 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 09:08:38 +0100 Subject: [PATCH 08/11] switch to version 0.4.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a558e2e..19c5681 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ def read(fname): setup( name='sweeper', - version='0.3.0', + version='0.4.0', author='Darko Poljak', author_email='darko.poljak@gmail.com', description='Find duplicate files and perform action.', From c7f34614ddfc54a91f807e12aff803b033090113 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 09:09:31 +0100 Subject: [PATCH 09/11] switch to version 0.4.0, added --version parameter --- sweeper/sweeper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index 5ca8dba..add9f17 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -2,7 +2,7 @@ # Author: Darko Poljak # License: GPLv3 -"""Sweeper. +"""sweeper 0.4.0 Usage: sweeper.py [options] [...] @@ -11,6 +11,7 @@ Arguments: Options: -h, --help show this screen +-v, --version show version and exit -b , --block-size= size of block used when reading file's content [default: 4096] -d , --digest-alg= secure hash algorithm [default: md5] @@ -34,7 +35,7 @@ Options: """ __author__ = 'Darko Poljak ' -__version__ = '0.3.0' +__version__ = '0.4.0' __license__ = 'GPLv3' __all__ = [ @@ -149,6 +150,10 @@ def main(): print('Invalid block size "%s"' % args['--block-size']) sys.exit(1) + if args['--version']: + print("sweeper %s" % __version__) + return + if action == 'print' or action == 'pprint': dups = file_dups(topdirs, args['--digest-alg'], args['--block-size']) spam = dict(dups) From ef57c4a04d68ba2f4f29732e0e3bd812d40ea756 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 12:51:29 +0100 Subject: [PATCH 10/11] replaced % operator with .format() --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e3aaaf3..e8f05fb 100644 --- a/README.rst +++ b/README.rst @@ -28,7 +28,7 @@ Perform custom action from sweeper import iter_file_dups for files in iter_file_dups(['images']): for fname in files: - print('found duplicate file with name: %s' % fname) + print('found duplicate file with name: {}'.format(fname)) As script:: From a3acd60556e41d760158b819bd1feed228c76389 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Tue, 28 Jan 2014 12:51:51 +0100 Subject: [PATCH 11/11] replaced % operator with .format() --- sweeper/sweeper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sweeper/sweeper.py b/sweeper/sweeper.py index add9f17..7da22a3 100644 --- a/sweeper/sweeper.py +++ b/sweeper/sweeper.py @@ -108,7 +108,7 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, if not os.path.exists(dest_dir): os.mkdir(dest_dir) if not os.path.isdir(dest_dir): - raise OSError('%s is not a directory' % dest_dir) + raise OSError('{} is not a directory'.format(dest_dir)) import shutil for files in do_with_file_dups(topdirs, hashalg, block_size): for i, f in enumerate(files): @@ -147,11 +147,11 @@ def main(): bs = int(args['--block-size']) args['--block-size'] = bs except ValueError: - print('Invalid block size "%s"' % args['--block-size']) + print('Invalid block size "{}"'.format(args['--block-size'])) sys.exit(1) if args['--version']: - print("sweeper %s" % __version__) + print("sweeper {}".format(__version__)) return if action == 'print' or action == 'pprint': @@ -171,7 +171,7 @@ def main(): elif action == 'remove': rm_file_dups(topdirs, args['--digest-alg'], args['--block-size']) else: - print('Invalid action "%s"' % action) + print('Invalid action "{}"'.format(action)) # if used as script call main function