Initial commit. First working version.
This commit is contained in:
parent
49518efcba
commit
a074cfb914
21 changed files with 220 additions and 0 deletions
29
README.md
29
README.md
|
@ -2,3 +2,32 @@ sweeper
|
|||
=======
|
||||
|
||||
Find duplicate files and perform action.
|
||||
|
||||
Constructor
|
||||
~~~~~~~~~~~
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
from source::
|
||||
|
||||
python setup.py install
|
||||
|
||||
or from PyPI::
|
||||
|
||||
pip install sweeper
|
||||
|
||||
Documentation
|
||||
=============
|
||||
|
||||
this README.rst, code itself, docstrings
|
||||
|
||||
sweeper can be found on github.com at:
|
||||
|
||||
https://github.com/darko-poljak/sweeper
|
||||
|
||||
Tested With
|
||||
===========
|
||||
|
||||
Python2.7.6, Python3.3.3
|
||||
|
||||
|
|
28
setup.py
Normal file
28
setup.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import os
|
||||
from setuptools import setup
|
||||
|
||||
def read(fname):
|
||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
||||
|
||||
setup(
|
||||
name='sweeper',
|
||||
version='0.1.0',
|
||||
author='Darko Poljak',
|
||||
author_email='darko.poljak@gmail.com',
|
||||
description='Find duplicate files in directory tree.',
|
||||
license="GPLv3",
|
||||
keywords=['find duplicate files', ],
|
||||
url='https://github.com/darko-poljak/sweeper',
|
||||
download_url='https://github.com/darko-poljak/sweeper',
|
||||
packages=['sweeper'],
|
||||
long_description=read('README.rst'),
|
||||
platforms="OS Independent",
|
||||
install_requires=["docopt"],
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
],
|
||||
)
|
||||
|
4
sweeper/__init__.py
Normal file
4
sweeper/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
from __future__ import absolute_import
|
||||
from .sweeper import file_dups
|
||||
|
||||
__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'do_with_file_dups']
|
BIN
sweeper/__init__.pyc
Normal file
BIN
sweeper/__init__.pyc
Normal file
Binary file not shown.
BIN
sweeper/__pycache__/__init__.cpython-33.pyc
Normal file
BIN
sweeper/__pycache__/__init__.cpython-33.pyc
Normal file
Binary file not shown.
BIN
sweeper/__pycache__/sweeper.cpython-33.pyc
Normal file
BIN
sweeper/__pycache__/sweeper.cpython-33.pyc
Normal file
Binary file not shown.
125
sweeper/sweeper.py
Normal file
125
sweeper/sweeper.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
#!/usr/bin/env python
|
||||
# Author: Darko Poljak <darko.poljak@gmail.com>
|
||||
# License: GPLv3
|
||||
|
||||
"""Sweeper.
|
||||
|
||||
Usage: sweeper.py [options] [<directory>...]
|
||||
|
||||
Arguments:
|
||||
<directory> directory path to scan for files
|
||||
|
||||
Options:
|
||||
-h, --help show this screen
|
||||
-b <blocksize>, --block-size=<blocksize> size of block used when reading file's
|
||||
content [default: 4096]
|
||||
-d <hashalg>, --digest-alg=<hashalg> secure hash algorithm [default: md5]
|
||||
-a <action>, --action=<action> action on duplicate files
|
||||
(print, remove, move) [default: print]
|
||||
-m <directory>, --move=<directory> move duplicate files to directory
|
||||
(used with move action) [default: ./dups]
|
||||
"""
|
||||
|
||||
__author__ = 'Darko Poljak <darko.poljak@gmail.com>'
|
||||
__version__ = '0.1.0'
|
||||
__license__ = 'GPLv3'
|
||||
|
||||
__all__ = [
|
||||
'file_dups', 'rm_file_dups', 'mv_file_dups', 'do_with_file_dups'
|
||||
]
|
||||
|
||||
import sys
|
||||
import hashlib
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
def _do_encode(buf):
|
||||
return buf
|
||||
|
||||
def _dict_iter_items(d):
|
||||
return d.items()
|
||||
else:
|
||||
def _do_encode(buf):
|
||||
return buf
|
||||
|
||||
def _dict_iter_items(d):
|
||||
return d.iteritems()
|
||||
|
||||
|
||||
def _filehash(filepath, hashalg, block_size):
|
||||
md = hashlib.new(hashalg)
|
||||
with open(filepath, "rb") as f:
|
||||
for buf in iter(lambda: f.read(block_size), b''):
|
||||
md.update(_do_encode(buf))
|
||||
return md.hexdigest()
|
||||
|
||||
|
||||
def file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||
dups = defaultdict(list)
|
||||
for topdir in topdirs:
|
||||
for dirpath, dirnames, filenames in os.walk(topdir):
|
||||
for fname in filenames:
|
||||
fpath = os.path.join(dirpath, fname)
|
||||
hexmd = _filehash(fpath, hashalg, block_size)
|
||||
dups[hexmd].append(fpath)
|
||||
result = {k: v for k, v in _dict_iter_items(dups) if len(v) > 1}
|
||||
return result
|
||||
|
||||
|
||||
def rm_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
||||
for f in files:
|
||||
os.remove(f)
|
||||
|
||||
|
||||
def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, dest_dir='dups'):
|
||||
if not os.path.exists(dest_dir):
|
||||
os.mkdir(dest_dir)
|
||||
if not os.path.isdir(dest_dir):
|
||||
raise OSError('%s is not a directory' % dest_dir)
|
||||
import shutil
|
||||
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
||||
for i, f in enumerate(files):
|
||||
if i > 0:
|
||||
shutil.move(f, dest_dir)
|
||||
|
||||
|
||||
def do_with_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||
dups = file_dups(topdirs, hashalg, block_size)
|
||||
for fpaths in dups.itervalues():
|
||||
yield fpaths
|
||||
|
||||
|
||||
def main(args):
|
||||
import json
|
||||
|
||||
topdirs = args['<directory>']
|
||||
if not topdirs:
|
||||
topdirs = ['./']
|
||||
|
||||
action = args['--action']
|
||||
try:
|
||||
bs = int(args['--block-size'])
|
||||
args['--block-size'] = bs
|
||||
except ValueError:
|
||||
print('Invalid block size "%s"' % args['--block-size'])
|
||||
sys.exit(1)
|
||||
|
||||
if action == 'print':
|
||||
dups = file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
||||
print(json.dumps(dict(dups), indent=4))
|
||||
elif action == 'move':
|
||||
mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'],
|
||||
args['--move'])
|
||||
elif action == 'remove':
|
||||
rm_file_dups(topdirs, args['--digest-alg'], args['--block-size'])
|
||||
else:
|
||||
print('Invalid action "%s"' % action)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from docopt import docopt
|
||||
arguments = docopt(__doc__)
|
||||
main(arguments)
|
BIN
sweeper/sweeper.pyc
Normal file
BIN
sweeper/sweeper.pyc
Normal file
Binary file not shown.
0
test/__init__.py
Normal file
0
test/__init__.py
Normal file
BIN
test/__init__.pyc
Normal file
BIN
test/__init__.pyc
Normal file
Binary file not shown.
BIN
test/__pycache__/__init__.cpython-33.pyc
Normal file
BIN
test/__pycache__/__init__.cpython-33.pyc
Normal file
Binary file not shown.
BIN
test/__pycache__/test_sweeper.cpython-33.pyc
Normal file
BIN
test/__pycache__/test_sweeper.cpython-33.pyc
Normal file
Binary file not shown.
28
test/test_sweeper.py
Normal file
28
test/test_sweeper.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env python
|
||||
# Author: Darko Poljak <darko.poljak@gmail.com>
|
||||
# License: GPLv3
|
||||
|
||||
import unittest
|
||||
from sweeper import file_dups
|
||||
import os
|
||||
|
||||
mydir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
class TestSweeper(unittest.TestCase):
|
||||
def test_file_dups_dups(self):
|
||||
dups = file_dups([os.path.join(mydir, 'testfiles_dups')], 'md5')
|
||||
dups_exist = False
|
||||
for h, flist in dups.items():
|
||||
if len(flist) > 1:
|
||||
dups_exist = True
|
||||
self.assertTrue(dups_exist)
|
||||
|
||||
def test_file_dups_nodups(self):
|
||||
dups = file_dups([os.path.join(mydir, 'testfiles_nodups')], 'md5')
|
||||
for h, flist in dups.items():
|
||||
self.assertTrue(len(flist) == 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
BIN
test/testfiles_dups/img1.jpg
Executable file
BIN
test/testfiles_dups/img1.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
test/testfiles_dups/img2.jpg
Executable file
BIN
test/testfiles_dups/img2.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
1
test/testfiles_dups/test1
Normal file
1
test/testfiles_dups/test1
Normal file
|
@ -0,0 +1 @@
|
|||
test1
|
1
test/testfiles_dups/test2
Normal file
1
test/testfiles_dups/test2
Normal file
|
@ -0,0 +1 @@
|
|||
test2
|
1
test/testfiles_dups/test3
Normal file
1
test/testfiles_dups/test3
Normal file
|
@ -0,0 +1 @@
|
|||
test2
|
1
test/testfiles_nodups/test1
Normal file
1
test/testfiles_nodups/test1
Normal file
|
@ -0,0 +1 @@
|
|||
test1
|
1
test/testfiles_nodups/test2
Normal file
1
test/testfiles_nodups/test2
Normal file
|
@ -0,0 +1 @@
|
|||
test2
|
1
test/testfiles_nodups/test3
Normal file
1
test/testfiles_nodups/test3
Normal file
|
@ -0,0 +1 @@
|
|||
test3
|
Loading…
Reference in a new issue