Rewriten as class Sweeper with code improvements and optimizations.

Small fixes.
Small changes.
2014-08-10 00:06:32 +02:00 · 2014-08-09 20:59:23 +02:00 · 2014-08-09 20:56:09 +02:00 · 2014-08-09 20:54:02 +02:00 · 2014-08-09 20:52:53 +02:00 · 2014-02-08 20:27:30 +01:00
9 changed files with 400 additions and 88 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 *.pyc
 dist/
 *.egg-info/
--- a/5
+++ b/5
@ -0,0 +1,5 @@
 Author:
    Darko Poljak <darko.poljak@gmail.com>
 Contributors:
    matthewi
--- a/README.rst
+++ b/README.rst
@ -10,29 +10,31 @@ Print duplicates
 .. code:: python
-    from sweeper import file_dups
+    from sweeper import Sweeper
-    dups = file_dups(['images1', 'images2'])
+    swp = Sweeper(['images1', 'images2'])
    dups = swp.file_dups()
    print(dups)
 Remove duplicate files
 .. code:: python
-    from sweeper import file_dups
+    from sweeper import Sweeper
-    rm_file_dups(['images'])
+    swp = Sweeper(['images1', 'images2'])
    swp.rm()
 Perform custom action
 .. code:: python
-    from sweeper import file_dups
+    from sweeper import Sweeper
-    for files in iter_file_dups(['images']):
+    swp = Sweeper(['images'])
-        for fname in files:
+    for f, h, dups in swp:
-            print('found duplicate file with name: %s' % fname)
+        print('encountered {} which duplicates with already found duplicate files {} with hash {}'.format(f, dups, h))
 As script::
-    python sweeper.py --help
+    python -m sweeper/sweeper --help
 As installed console script::
@ -61,5 +63,5 @@ https://github.com/darko-poljak/sweeper
 Tested With
 ===========
-Python2.7.6, Python3.3.3
+Python2.7, Python3
--- a/0
+++ b/0
--- a/setup.py
+++ b/setup.py
@ -1,12 +1,17 @@
 import os
 from setuptools import setup
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read()
 import sweeper.sweeper as sw
 setup(
    name='sweeper',
-    version='0.2.0',
+    version=sw.__version__,
    author='Darko Poljak',
    author_email='darko.poljak@gmail.com',
    description='Find duplicate files and perform action.',
@ -30,4 +35,3 @@ setup(
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
 )
--- a/sweeper/init.py
+++ b/sweeper/init.py
@ -1,4 +1,4 @@
 from __future__ import absolute_import
-from .sweeper import file_dups
+from .sweeper import Sweeper
-__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups']
+__all__ = ['Sweeper']
--- a/sweeper/sweeper.py
+++ b/sweeper/sweeper.py
@ -2,52 +2,96 @@
 # Author: Darko Poljak <darko.poljak@gmail.com>
 # License: GPLv3
-"""Sweeper.
+"""{0} {1}
-Usage: sweeper.py [options] [<directory>...]
+Usage: {0} [options] [<directory>...]
 Arguments:
    <directory> directory path to scan for files
 Options:
 -h, --help                                show this screen
-b <blocksize>, --block-size=<blocksize>  size of block used when reading file's
+-v, --version                             show version and exit
-                                          content [default: 4096]
+-b <blocksize>, --block-size=<blocksize>  size of block used when reading
-d <hashalg>, --digest-alg=<hashalg>      secure hash algorithm [default: md5]
+                                          file's content [default: 4096]
-a <action>, --action=<action>            action on duplicate files 
+-d <hashalgs>, --digest-algs=<hashalgs>   secure hash algorithm comma separated
-                                          (print, remove, move) [default: print]
+                                          list [default: sha1]
                                          note that multiple hashes will slow
                                          down sweeper
 -a <action>, --action=<action>            action on duplicate files (pprint,
                                          print, remove, move)
                                          [default: pprint]
                                          -remove removes duplicate files
                                           except first or first with specified
                                           directory prefix found
                                          -move moves duplicate files to
                                           duplicates driectory, except first
                                           or first with specified directory
                                           prefix found
                                          -print prints result directory where
                                           keys are hash values and values are
                                           list of duplicate file paths
                                          -pprint prints sets of duplicate file
                                           paths each in it's line where sets
                                           are separated by blank newline
 -m <directory>, --move=<directory>        move duplicate files to directory
-                                          (used with move action) [default: ./dups]
+                                          (used with move action)
                                          [default: ./dups]
 -k <dirprefix>, --keep=<dirprefix>        directory prefix for remove and move
                                          actions
 -s, --simulate                            if action is remove or move just
                                          simulate action by printing, do not
                                          actually perform the action
 -V, --verbose                             print more info
                                          note that verbosity will slow down
                                          sweeper due to text printing and
                                          gathering additional information
 -S, --safe-mode                           enable safe mode: compare hash
                                          duplicate files byte by byte too
                                          note that it will further slow down
                                          sweeper but will overcome hash
                                          collisions (although this is
                                          unlikely)
 """
 from __future__ import print_function
 __author__ = 'Darko Poljak <darko.poljak@gmail.com>'
-__version__ = '0.2.0'
+__version__ = '0.9.0'
 __license__ = 'GPLv3'
-__all__ = [
+__all__ = ['Sweeper']
    'file_dups', 'rm_file_dups', 'mv_file_dups', 'iter_file_dups'
 ]
 import sys
 import hashlib
 import os
 from collections import defaultdict
 from functools import partial
 import hashlib
 # some differences in python versions
 # we prefer iter methods
 if sys.version_info[0] == 3:
    def _do_encode(buf):
        return buf
    def _dict_iter_items(d):
        return d.items()
 else:
    def _do_encode(buf):
        return buf
    def _dict_iter_keys(d):
        return d.keys()
    def _dict_iter_values(d):
        return d.values()
 else:
    def _dict_iter_items(d):
        return d.iteritems()
    def _dict_iter_keys(d):
        return d.iterkeys()
    def _dict_iter_values(d):
        return d.itervalues()
    range = xrange
 def _filehash(filepath, hashalg, block_size):
    """Calculate secure hash for given file content using
@ -57,56 +101,254 @@ def _filehash(filepath, hashalg, block_size):
    md = hashlib.new(hashalg)
    with open(filepath, "rb") as f:
        for buf in iter(lambda: f.read(block_size), b''):
-            md.update(_do_encode(buf))
+            md.update(buf)
    return md.hexdigest()
-def file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
+def _uniq_list(list_):
-    """Find duplicate files in directory list. Return directory
+    return list(set(list_))
-       with keys equal to file hash value and value as list of
+
-       file paths whose content is the same.
+
-    """
+def _fbequal(fpath1, fpath2):
-    dups = defaultdict(list)
+    '''Compare files byte by byte. If files are equal return True,
-    for topdir in topdirs:
+       False otherwise.
-        for dirpath, dirnames, filenames in os.walk(topdir):
+       fpath1 and fpath2 are file paths.
-            for fname in filenames:
+    '''
-                fpath = os.path.join(dirpath, fname)
+    # nested to work with 2.6
-                hexmd = _filehash(fpath, hashalg, block_size)
+    with open(fpath1, "rb") as f1:
        with open(fpath2, "rb") as f2:
            while True:
                b1 = f1.read(1)
                b2 = f2.read(1)
                if b1 != b2:  # different bytes
                    return False
                if not b1 or not b2:  # end in one or both files
                    break
    if not b1 and not b2:  # end in both files, files are equal
        return True
    # end in one file but not in the other, files aren't equal
    return False
 def _remap_keys_to_str(d):
    '''Iterator that remaps dictionary keys to string in case keys are tuple
       or list. Leave it unchanged otherwise.
       Yields string key, value pairs.
    '''
    for k in _dict_iter_keys(d):
        if isinstance(k, tuple) or isinstance(k, list):
            key = ','.join(k)
        else:
            key = k
        yield (key, d[k])
 def _gather_file_list(dirs):
    '''Gather file paths in directory list dirs.
       Return tuple (count, files) where count is files
       list length and files is list of file paths in
       specified directories.
    '''
    files = []
    for dir_ in dirs:
        for dirpath, dirnames, filenames in os.walk(dir_):
            # replace fpath with realpath value (eliminate symbolic links)
            files.extend([os.path.realpath(os.path.join(dirpath, fname))
                          for fname in filenames])
    return files
 class Sweeper(object):
    DEF_HASHALGS = ['sha1']
    def __init__(self, topdirs=['./'], hashalgs=DEF_HASHALGS,
                 block_size=4096, verbose=False, safe_mode=False):
        # replace dir paths with realpath value (eliminate symbolic links)
        self.topdirs = []
        for i in range(len(topdirs)):
            self.topdirs.append(os.path.realpath(topdirs[i]))
        self.hashalgs = hashalgs
        self.block_size = block_size
        self.verbose = verbose
        self.safe_mode = safe_mode
    # iter through file paths in files list
    def _files_iter_from_list(self, files):
        return (fpath for fpath in files)
    # iter through file paths by os.walking
    def _files_iter_from_disk(self):
        for topdir in self.topdirs:
            for dirpath, dirnames, filenames in os.walk(topdir):
                for fname in filenames:
                    # replace fpath with realpath value
                    # (eliminate symbolic links)
                    fpath = os.path.realpath(os.path.join(dirpath, fname))
                    yield fpath
    def file_dups(self):
        """Find duplicate files in directory list. Return directory
           with keys equal to file hash value and value as list of
           file paths whose content is the same.
           If safe_mode is true then you want to play safe: do byte
           by byte comparison for hash duplicate files.
        """
        dups = defaultdict(list)
        if self.verbose:
            if self.safe_mode:
                print('safe mode is on')
            print('gathering and counting files...', end='')
            sys.stdout.flush()
            files = _gather_file_list(self.topdirs)
            count = len(files)
            current = 1
            print(count)
            _files_iter = partial(self._files_iter_from_list, files)
        else:
            _files_iter = self._files_iter_from_disk
        for fpath in _files_iter():
            if self.verbose:
                print('\rprocessing file {0}/{1}: calc hash'.format(current,
                                                                    count),
                      end='')
                sys.stdout.flush()
            hexmds = [_filehash(fpath, h, self.block_size)
                      for h in self.hashalgs]
            hexmd = tuple(hexmds)
            dup_files = dups[hexmd]
            files_equals = False
            if self.safe_mode:
                if dup_files:
                    if self.verbose:
                        print('\rprocessing file {0}/{1}: byte cmp'.format(
                            current, count), end='')
                        sys.stdout.flush()
                    for f in dup_files:
                        if _fbequal(f, fpath):
                            files_equals = True
                            break
                    if self.verbose and not files_equals:
                        print('\nsame hash value {} but not same bytes for'
                              ' file {} with files {}'.format(
                                  hexmd, fpath, dup_files))
                else:  # when list is empty in safe mode
                    files_equals = True
            else:
                files_equals = True  # when safe mode is off
            if self.verbose:
                current += 1
            if files_equals:
                dups[hexmd].append(fpath)
    result = {k: v for k, v in _dict_iter_items(dups) if len(v) > 1}
    return result
        if self.verbose:
            print('')
        # make result dict with unique file paths list
        result = {}
        for k, v in _dict_iter_items(dups):
            uniq_v = _uniq_list(v)
            if len(uniq_v) > 1:
                result[k] = uniq_v
        return result
-def rm_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
+    def __iter__(self):
-    """Remove duplicate files found in specified directory list.
+        """Find duplicate files in directory list.
-       First file in list is kept.
+           Yield tuple of file path, hash tuple and list of duplicate files
-    """
+           as soon as duplicate file is found.
-    for files in do_with_file_dups(topdirs, hashalg, block_size):
+           Newly found file is not included in the list at the yield time,
-        for f in files:
+           but is appended later before next yield.
-            os.remove(f)
+           This means that not all duplicate files are returned with any
           return value. Same hash value and sublist could be returned later
           if file with same content is found.
           If safe_mode is true then you want to play safe: do byte
           by byte comparison for hash duplicate files.
        """
        # internaly, file dups dict is still maintained
        dups = defaultdict(list)
        _files_iter = self._files_iter_from_disk
        for fpath in _files_iter():
            hexmds = [_filehash(fpath, h, self.block_size)
                      for h in self.hashalgs]
            hexmd = tuple(hexmds)
            dup_files = dups[hexmd]
            # there were dup list elements (used for yield)
            if self.safe_mode and dup_files:
                # compare only with first file in dup_files
                # all files in dup_files list are already content equal
                files_equals = _fbequal(dup_files[0], fpath)
            else:  # when list is emtpy in safe mode or when safe mode is off
                files_equals = True
            if files_equals:
                # yield only if current dup files list isn't empty
                if dup_files:
                    yield (fpath, hexmd, dups[hexmd])
                # finally append newly found file to dup list
                dups[hexmd].append(fpath)
-def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, dest_dir='dups'):
+    def _extract_files_for_action(self, keep_prefix):
-    """Move duplicate files found in specified directory list.
+        dups = self.file_dups()
-       First file in list is kept in the original directory.
+        for files in _dict_iter_values(dups):
-    """
+            found = False
-    if not os.path.exists(dest_dir):
+            if keep_prefix:
-        os.mkdir(dest_dir)
+                result = []
-    if not os.path.isdir(dest_dir):
+                for f in files:
-        raise OSError('%s is not a directory' % dest_dir)
+                    if f.startswith(keep_prefix) and not found:
-    import shutil
+                        found = True
-    for files in do_with_file_dups(topdirs, hashalg, block_size):
+                    else:
-        for i, f in enumerate(files):
+                        result.append(f)
-            if i > 0:
+            if not found:
-                shutil.move(f, dest_dir)
+                result = list(files)[1:]
            yield (files, result)
    def _do_action(self, simulate, keep_prefix, action, action_str):
        for dups, extracted in self._extract_files_for_action(keep_prefix):
            if simulate or self.verbose:
                print('found duplicates: \n{}'.format(dups))
            for f in extracted:
                if simulate or self.verbose:
                    print(action_str.format(f))
                if not simulate:
                    action(f)
-def iter_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
+    def rm(self, simulate=False, keep_prefix=None):
-    """Yield list of duplicate files when found in specified directory list.
+        """Remove duplicate files found in specified directory list.
-    """
+           If keep_prefix is specified then first file with that path
-    dups = file_dups(topdirs, hashalg, block_size)
+           prefix found is kept in the original directory.
-    for fpaths in dups.itervalues():
+           Otherwise first file in list is kept in the original directory.
-        yield fpaths
+           If simulate is True then only print the action, do not actually
           perform it.
           If safe_mode is true then do byte by byte comparison for
           hash duplicate files.
        """
        self._do_action(simulate, keep_prefix, os.remove, 'rm {}')
    def mv(self, dest_dir='dups', simulate=False, keep_prefix=None):
        """Move duplicate files found in specified directory list.
           If keep_prefix is specified then first file with that path
           prefix found is kept in the original directory.
           Otherwise first file in list is kept in the original directory.
           If simulate is True then only print the action, do not actually
           perform it.
           If safe_mode is true then do byte by byte comparison for
           hash duplicate files.
        """
        import shutil
        if not os.path.exists(dest_dir):
            if simulate:
                print('mkdir {}'.format(dest_dir))
            else:
                os.mkdir(dest_dir)
        elif not os.path.isdir(dest_dir):
            errmsg = '{} is not a directory'.format(dest_dir)
            if simulate:
                print('would raise:', errmsg)
            else:
                raise OSError(errmsg)
        self._do_action(simulate, keep_prefix,
                        partial(shutil.move, dst=dest_dir),
                        'mv {0} to ' + dest_dir)
 def main():
@ -115,32 +357,58 @@ def main():
    import json
    from docopt import docopt
-    args = docopt(__doc__)
+    args = docopt(__doc__.format(sys.argv[0], __version__),
- 
+                  version=" ".join(('sweeper', __version__)))
    topdirs = args['<directory>']
    if not topdirs:
        topdirs = ['./']
    action = args['--action']
    verbose = args['--verbose']
    # set block size as int
    try:
        bs = int(args['--block-size'])
        args['--block-size'] = bs
    except ValueError:
-        print('Invalid block size "%s"' % args['--block-size'])
+        print('Invalid block size "{}"'.format(args['--block-size']))
        sys.exit(1)
    hashalgs = args['--digest-algs'].split(',')
    hashalgs_uniq = _uniq_list(hashalgs)
    if len(hashalgs) != len(hashalgs_uniq):
        print('Duplicate hash algorithms specified')
        sys.exit(1)
    block_size = args['--block-size']
    simulate = args['--simulate']
    keep_prefix = args['--keep']
    dest_dir = args['--move']
    safe_mode = args['--safe-mode']
-    if action == 'print':
+    sweeper = Sweeper(topdirs=topdirs, hashalgs=hashalgs,
-        dups = file_dups(topdirs, args['--digest-alg'], args['--block-size'])
+                      block_size=block_size, verbose=verbose,
-        print(json.dumps(dict(dups), indent=4))
+                      safe_mode=safe_mode)
    if action == 'print' or action == 'pprint':
        dups = sweeper.file_dups()
        # defaultdict(list) -> dict
        spam = dict(dups)
        if spam:
            if action == 'pprint':
                for _, fpaths in _dict_iter_items(spam):
                    for path in fpaths:
                        print(path)
                    if fpaths:
                        print('')
            else:
                print(json.dumps({k: v for k, v in _remap_keys_to_str(spam)},
                                 indent=4))
    elif action == 'move':
-        mv_file_dups(topdirs, args['--digest-alg'], args['--block-size'],
+        sweeper.mv(dest_dir, simulate, keep_prefix)
                     args['--move'])
    elif action == 'remove':
-        rm_file_dups(topdirs, args['--digest-alg'], args['--block-size'])
+        sweeper.rm(simulate, keep_prefix)
    else:
-        print('Invalid action "%s"' % action)
+        print('Invalid action "{}"'.format(action))
 # if used as script call main function
 if __name__ == '__main__':
-   main()
+    main()
--- a/test/init.pyc
+++ b/test/init.pyc
--- a/test/test_sweeper.py
+++ b/test/test_sweeper.py
@ -3,7 +3,7 @@
 # License: GPLv3
 import unittest
-from sweeper import file_dups
+from sweeper import Sweeper
 import os
 mydir = os.path.dirname(os.path.realpath(__file__))
@ -11,7 +11,8 @@ mydir = os.path.dirname(os.path.realpath(__file__))
 class TestSweeper(unittest.TestCase):
    def test_file_dups_dups(self):
-        dups = file_dups([os.path.join(mydir, 'testfiles_dups')], 'md5')
+        swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
        dups = swp.file_dups()
        dups_exist = False
        for h, flist in dups.items():
            if len(flist) > 1:
@ -19,10 +20,40 @@ class TestSweeper(unittest.TestCase):
        self.assertTrue(dups_exist)
    def test_file_dups_nodups(self):
-        dups = file_dups([os.path.join(mydir, 'testfiles_nodups')], 'md5')
+        swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_nodups')])
        dups = swp.file_dups()
        for h, flist in dups.items():
            self.assertTrue(len(flist) == 1)
    # does not actually test safe_mode, we would need to find
    # hash collision
    def test_file_dups_safe_mode(self):
        swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')],
                      safe_mode=True)
        dups = swp.file_dups()
        for h, flist in dups.items():
            if len(flist) > 1:
                dups_exist = True
        self.assertTrue(dups_exist)
    def test_iter_file_dups_dups(self):
        swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
        dups_exist = False
        for x in swp:
            dups_exist = True
            filepath, h, dups = x
            self.assertNotIn(filepath, dups)
            self.assertTrue(len(dups) > 0)
        self.assertTrue(dups_exist)
    def test_iter_file_dups_nodups(self):
        swp = Sweeper([os.path.join(mydir, 'testfiles_nodups')])
        dups_exist = False
        for x in swp:
            dups_exist = True
            break
        self.assertFalse(dups_exist)
 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
Darko Poljak	0c04f67b93	Rewriten as class Sweeper with code improvements and optimizations.	2014-08-10 00:06:32 +02:00
Darko Poljak	41cd0fe6c6	Small fixes.	2014-08-09 20:59:23 +02:00
Darko Poljak	499b85bcfa	Small changes.	2014-08-09 20:56:09 +02:00
Darko Poljak	2f5af0f2fb	Version inc.	2014-08-09 20:54:02 +02:00
Darko Poljak	a5cf8d66df	Some code improvements.	2014-08-09 20:52:53 +02:00
darko-poljak	b41a4afa8b	change default hash alg to sha1	2014-02-08 20:27:30 +01:00
darko-poljak	ea547d83c5	deleted unwanted pyc file tracking	2014-02-08 19:17:10 +01:00
darko-poljak	867cab7f25	make with nested to work with 2.6	2014-02-08 19:11:55 +01:00
darko-poljak	40ddd90e9b	add ignore entries	2014-02-07 19:21:33 +01:00
darko-poljak	24a0ee4c04	update authors file	2014-02-07 19:21:11 +01:00
Darko Poljak	4d015b6be8	fix example for new iter_file_dups function	2014-02-07 13:04:37 +01:00
Darko Poljak	c38d74feb7	cleanups and version inc	2014-02-07 08:30:48 +01:00
Darko Poljak	4a45525d00	cleanups and version inc	2014-02-07 08:30:18 +01:00
Darko Poljak	d2186f1f26	cleanups and version inc	2014-02-07 08:29:54 +01:00
Darko Poljak	9e714732fb	added file_dups_immediate function	2014-02-06 14:43:54 +01:00
Darko Poljak	5040c33f96	added file_dups_immediate function	2014-02-06 14:43:32 +01:00
Darko Poljak	3cc886f057	added file_dups_immediate function	2014-02-06 14:42:51 +01:00
Darko Poljak	12df9e5a7e	added missing verbose if condition	2014-02-06 14:22:43 +01:00
Darko Poljak	ff0c1a2895	use __version__ from sweeper.py	2014-02-06 14:05:24 +01:00
Darko Poljak	c2d0a49d4f	use os.path.realpath to eliminate symbolic links	2014-02-06 11:24:29 +01:00
Darko Poljak	19b8349e8c	added --safe-mode test (see method comment :))	2014-02-06 10:11:06 +01:00
Darko Poljak	6d57adc215	added --safe-mode	2014-02-06 10:09:52 +01:00
Darko Poljak	395f2234d5	multiple hash algs support for dups detecting	2014-02-06 08:48:18 +01:00
Darko Poljak	7aaeb3e98f	cache file paths when counting in verbose mode	2014-02-06 08:07:06 +01:00
darko-poljak	bd88491cb0	added authors file (added matthewi)	2014-02-05 21:37:57 +01:00
darko-poljak	cccadf41f7	updated TODO file	2014-02-05 21:34:56 +01:00
Matthew Isaacs	17fa4e7279	import py3k print function from __future to maintain compat with python 2.6+	2014-02-05 11:33:09 -06:00
darko-poljak	43ebde240d	done with current TODO list	2014-01-31 22:45:54 +01:00
darko-poljak	cff9388a4e	switch to v0.4.1	2014-01-29 21:14:15 +01:00
darko-poljak	1be4e6ce4a	lots of improvements	2014-01-29 21:10:30 +01:00
darko-poljak	8cc0897926	added TODO file	2014-01-28 23:38:29 +01:00
darko-poljak	beb9734330	Merge branch 'master' of https://github.com/darko-poljak/sweeper	2014-01-28 22:39:27 +01:00
darko-poljak	1f4cb3e177	synchronization	2014-01-28 22:39:07 +01:00
Darko Poljak	a3acd60556	replaced % operator with .format()	2014-01-28 12:51:51 +01:00
Darko Poljak	ef57c4a04d	replaced % operator with .format()	2014-01-28 12:51:29 +01:00
Darko Poljak	c7f34614dd	switch to version 0.4.0, added --version parameter	2014-01-28 09:09:31 +01:00
Darko Poljak	55279e1aee	switch to version 0.4.0	2014-01-28 09:08:38 +01:00
Darko Poljak	cf0ff95b36	pep8 tunning	2014-01-28 08:53:51 +01:00
Darko Poljak	8cb421c6e4	added pprint action	2014-01-28 08:50:58 +01:00
Darko Poljak	3ddd76fcce	added tests for iter_file_dups	2014-01-28 08:36:14 +01:00
Darko Poljak	63bb9779dd	fixed missing imports	2014-01-28 08:35:17 +01:00
Darko Poljak	4b8a7d51be	added rethash parameter to iter_file_dups	2014-01-28 07:30:26 +01:00
Darko Poljak	a60ede31fb	added rethash parameter to iter_file_dups	2014-01-28 07:29:23 +01:00
Darko Poljak	609418975e	Update README.rst	2014-01-27 13:33:04 +01:00
darko-poljak	30620884c9	suppress empty dict printing when no duplicates found	2014-01-27 13:20:25 +01:00
darko-poljak	42202b901b	pep8 fixes	2014-01-27 09:42:43 +01:00