Commit 0c04f67b authored by Darko Poljak's avatar Darko Poljak

Rewriten as class Sweeper with code improvements and optimizations.

parent 41cd0fe6
Pipeline #59 failed with stages
......@@ -10,28 +10,31 @@ Print duplicates
.. code:: python
from sweeper import file_dups
dups = file_dups(['images1', 'images2'])
from sweeper import Sweeper
swp = Sweeper(['images1', 'images2'])
dups = swp.file_dups()
print(dups)
Remove duplicate files
.. code:: python
from sweeper import rm_file_dups
rm_file_dups(['images'])
from sweeper import Sweeper
swp = Sweeper(['images1', 'images2'])
swp.rm()
Perform custom action
.. code:: python
from sweeper import iter_file_dups
for f, h, dups in iter_file_dups(['images']):
from sweeper import Sweeper
swp = Sweeper(['images'])
for f, h, dups in swp:
print('encountered {} which duplicates with already found duplicate files {} with hash {}'.format(f, dups, h))
As script::
python sweeper.py --help
python -m sweeper/sweeper --help
As installed console script::
......
* Play it safe and add byte by byte comparison option for hash dup files?
Or use one more, different, hash algorithm?
from __future__ import absolute_import
from .sweeper import file_dups, mv_file_dups, rm_file_dups, iter_file_dups
from .sweeper import Sweeper
__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups']
__all__ = ['Sweeper']
This diff is collapsed.
......@@ -3,7 +3,7 @@
# License: GPLv3
import unittest
from sweeper import file_dups, iter_file_dups
from sweeper import Sweeper
import os
mydir = os.path.dirname(os.path.realpath(__file__))
......@@ -11,7 +11,8 @@ mydir = os.path.dirname(os.path.realpath(__file__))
class TestSweeper(unittest.TestCase):
def test_file_dups_dups(self):
dups = file_dups([os.path.join(mydir, 'testfiles_dups')])
swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
dups = swp.file_dups()
dups_exist = False
for h, flist in dups.items():
if len(flist) > 1:
......@@ -19,24 +20,26 @@ class TestSweeper(unittest.TestCase):
self.assertTrue(dups_exist)
def test_file_dups_nodups(self):
dups = file_dups([os.path.join(mydir, 'testfiles_nodups')])
swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_nodups')])
dups = swp.file_dups()
for h, flist in dups.items():
self.assertTrue(len(flist) == 1)
# does not actually test safe_mode, we would need to find
# hash collision
def test_file_dups_safe_mode(self):
dups = file_dups([os.path.join(mydir, 'testfiles_dups')],
safe_mode=True)
swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')],
safe_mode=True)
dups = swp.file_dups()
for h, flist in dups.items():
if len(flist) > 1:
dups_exist = True
self.assertTrue(dups_exist)
def test_iter_file_dups_dups(self):
it = iter_file_dups([os.path.join(mydir, 'testfiles_dups')])
swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
dups_exist = False
for x in it:
for x in swp:
dups_exist = True
filepath, h, dups = x
self.assertNotIn(filepath, dups)
......@@ -44,9 +47,9 @@ class TestSweeper(unittest.TestCase):
self.assertTrue(dups_exist)
def test_iter_file_dups_nodups(self):
it = iter_file_dups([os.path.join(mydir, 'testfiles_nodups')])
swp = Sweeper([os.path.join(mydir, 'testfiles_nodups')])
dups_exist = False
for x in it:
for x in swp:
dups_exist = True
break
self.assertFalse(dups_exist)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment