synchronization
This commit is contained in:
parent
30620884c9
commit
1f4cb3e177
5 changed files with 57 additions and 19 deletions
|
@ -18,17 +18,17 @@ Remove duplicate files
|
||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
from sweeper import file_dups
|
from sweeper import rm_file_dups
|
||||||
rm_file_dups(['images'])
|
rm_file_dups(['images'])
|
||||||
|
|
||||||
Perform custom action
|
Perform custom action
|
||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
from sweeper import file_dups
|
from sweeper import iter_file_dups
|
||||||
for files in iter_file_dups(['images']):
|
for files in iter_file_dups(['images']):
|
||||||
for fname in files:
|
for fname in files:
|
||||||
print('found duplicate file with name: %s' % fname)
|
print('found duplicate file with name: {}'.format(fname))
|
||||||
|
|
||||||
As script::
|
As script::
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -6,7 +6,7 @@ def read(fname):
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='sweeper',
|
name='sweeper',
|
||||||
version='0.3.0',
|
version='0.4.0',
|
||||||
author='Darko Poljak',
|
author='Darko Poljak',
|
||||||
author_email='darko.poljak@gmail.com',
|
author_email='darko.poljak@gmail.com',
|
||||||
description='Find duplicate files and perform action.',
|
description='Find duplicate files and perform action.',
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from .sweeper import file_dups
|
from .sweeper import file_dups, mv_file_dups, rm_file_dups, iter_file_dups
|
||||||
|
|
||||||
__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups']
|
__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups']
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# Author: Darko Poljak <darko.poljak@gmail.com>
|
# Author: Darko Poljak <darko.poljak@gmail.com>
|
||||||
# License: GPLv3
|
# License: GPLv3
|
||||||
|
|
||||||
"""Sweeper.
|
"""sweeper 0.4.0
|
||||||
|
|
||||||
Usage: sweeper.py [options] [<directory>...]
|
Usage: sweeper.py [options] [<directory>...]
|
||||||
|
|
||||||
|
@ -11,18 +11,31 @@ Arguments:
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-h, --help show this screen
|
-h, --help show this screen
|
||||||
|
-v, --version show version and exit
|
||||||
-b <blocksize>, --block-size=<blocksize> size of block used when reading
|
-b <blocksize>, --block-size=<blocksize> size of block used when reading
|
||||||
file's content [default: 4096]
|
file's content [default: 4096]
|
||||||
-d <hashalg>, --digest-alg=<hashalg> secure hash algorithm [default: md5]
|
-d <hashalg>, --digest-alg=<hashalg> secure hash algorithm [default: md5]
|
||||||
-a <action>, --action=<action> action on duplicate files (print,
|
-a <action>, --action=<action> action on duplicate files (pprint,
|
||||||
remove, move) [default: print]
|
print, remove, move)
|
||||||
|
[default: pprint]
|
||||||
|
-remove removes duplicate files
|
||||||
|
except first found
|
||||||
|
-move moves duplicate files to
|
||||||
|
duplicates driectory, except first
|
||||||
|
found
|
||||||
|
-print prints result directory where
|
||||||
|
keys are hash values and values are
|
||||||
|
list of duplicate file paths
|
||||||
|
-pprint prints sets of duplicate file
|
||||||
|
paths each in it's line where sets
|
||||||
|
are separated by blank newline
|
||||||
-m <directory>, --move=<directory> move duplicate files to directory
|
-m <directory>, --move=<directory> move duplicate files to directory
|
||||||
(used with move action)
|
(used with move action)
|
||||||
[default: ./dups]
|
[default: ./dups]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__author__ = 'Darko Poljak <darko.poljak@gmail.com>'
|
__author__ = 'Darko Poljak <darko.poljak@gmail.com>'
|
||||||
__version__ = '0.3.0'
|
__version__ = '0.4.0'
|
||||||
__license__ = 'GPLv3'
|
__license__ = 'GPLv3'
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
@ -95,7 +108,7 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096,
|
||||||
if not os.path.exists(dest_dir):
|
if not os.path.exists(dest_dir):
|
||||||
os.mkdir(dest_dir)
|
os.mkdir(dest_dir)
|
||||||
if not os.path.isdir(dest_dir):
|
if not os.path.isdir(dest_dir):
|
||||||
raise OSError('%s is not a directory' % dest_dir)
|
raise OSError('{} is not a directory'.format(dest_dir))
|
||||||
import shutil
|
import shutil
|
||||||
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
||||||
for i, f in enumerate(files):
|
for i, f in enumerate(files):
|
||||||
|
@ -103,12 +116,18 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096,
|
||||||
shutil.move(f, dest_dir)
|
shutil.move(f, dest_dir)
|
||||||
|
|
||||||
|
|
||||||
def iter_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
def iter_file_dups(topdirs=['./'], rethash=False, hashalg='md5',
|
||||||
"""Yield list of duplicate files when found in specified directory list.
|
block_size=4096):
|
||||||
|
"""Yield duplicate files when found in specified directory list.
|
||||||
|
If rethash is True then tuple hash value and duplicate paths list is
|
||||||
|
returned, otherwise duplicate paths list is returned.
|
||||||
"""
|
"""
|
||||||
dups = file_dups(topdirs, hashalg, block_size)
|
dups = file_dups(topdirs, hashalg, block_size)
|
||||||
for fpaths in dups.itervalues():
|
for hash_, fpaths in _dict_iter_items(dups):
|
||||||
yield fpaths
|
if rethash:
|
||||||
|
yield (hash_, fpaths)
|
||||||
|
else:
|
||||||
|
yield fpaths
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -128,21 +147,31 @@ def main():
|
||||||
bs = int(args['--block-size'])
|
bs = int(args['--block-size'])
|
||||||
args['--block-size'] = bs
|
args['--block-size'] = bs
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print('Invalid block size "%s"' % args['--block-size'])
|
print('Invalid block size "{}"'.format(args['--block-size']))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if action == 'print':
|
if args['--version']:
|
||||||
|
print("sweeper {}".format(__version__))
|
||||||
|
return
|
||||||
|
|
||||||
|
if action == 'print' or action == 'pprint':
|
||||||
dups = file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
dups = file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
||||||
spam = dict(dups)
|
spam = dict(dups)
|
||||||
if spam:
|
if spam:
|
||||||
print(json.dumps(spam, indent=4))
|
if action == 'pprint':
|
||||||
|
for h, fpaths in _dict_iter_items(spam):
|
||||||
|
for path in fpaths:
|
||||||
|
print(path)
|
||||||
|
print('')
|
||||||
|
else:
|
||||||
|
print(json.dumps(spam, indent=4))
|
||||||
elif action == 'move':
|
elif action == 'move':
|
||||||
mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'],
|
mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'],
|
||||||
args['--move'])
|
args['--move'])
|
||||||
elif action == 'remove':
|
elif action == 'remove':
|
||||||
rm_file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
rm_file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
||||||
else:
|
else:
|
||||||
print('Invalid action "%s"' % action)
|
print('Invalid action "{}"'.format(action))
|
||||||
|
|
||||||
|
|
||||||
# if used as script call main function
|
# if used as script call main function
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
# License: GPLv3
|
# License: GPLv3
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from sweeper import file_dups
|
from sweeper import file_dups, iter_file_dups
|
||||||
import os
|
import os
|
||||||
|
|
||||||
mydir = os.path.dirname(os.path.realpath(__file__))
|
mydir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
@ -23,6 +23,15 @@ class TestSweeper(unittest.TestCase):
|
||||||
for h, flist in dups.items():
|
for h, flist in dups.items():
|
||||||
self.assertTrue(len(flist) == 1)
|
self.assertTrue(len(flist) == 1)
|
||||||
|
|
||||||
|
def test_iter_fule_dups_rethash(self):
|
||||||
|
for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')],
|
||||||
|
rethash=True):
|
||||||
|
self.assertTrue(type(item).__name__ == 'tuple')
|
||||||
|
|
||||||
|
def test_iter_fule_dups_norethash(self):
|
||||||
|
for item in iter_file_dups([os.path.join(mydir, 'testfiles_dups')]):
|
||||||
|
self.assertTrue(type(item).__name__ == 'list')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Add table
Reference in a new issue