Commit 0c04f67b authored by Darko Poljak's avatar Darko Poljak

Rewriten as class Sweeper with code improvements and optimizations.

parent 41cd0fe6
Pipeline #59 failed with stages
...@@ -10,28 +10,31 @@ Print duplicates ...@@ -10,28 +10,31 @@ Print duplicates
.. code:: python .. code:: python
from sweeper import file_dups from sweeper import Sweeper
dups = file_dups(['images1', 'images2']) swp = Sweeper(['images1', 'images2'])
dups = swp.file_dups()
print(dups) print(dups)
Remove duplicate files Remove duplicate files
.. code:: python .. code:: python
from sweeper import rm_file_dups from sweeper import Sweeper
rm_file_dups(['images']) swp = Sweeper(['images1', 'images2'])
swp.rm()
Perform custom action Perform custom action
.. code:: python .. code:: python
from sweeper import iter_file_dups from sweeper import Sweeper
for f, h, dups in iter_file_dups(['images']): swp = Sweeper(['images'])
for f, h, dups in swp:
print('encountered {} which duplicates with already found duplicate files {} with hash {}'.format(f, dups, h)) print('encountered {} which duplicates with already found duplicate files {} with hash {}'.format(f, dups, h))
As script:: As script::
python sweeper.py --help python -m sweeper/sweeper --help
As installed console script:: As installed console script::
......
* Play it safe and add byte by byte comparison option for hash dup files?
Or use one more, different, hash algorithm?
from __future__ import absolute_import from __future__ import absolute_import
from .sweeper import file_dups, mv_file_dups, rm_file_dups, iter_file_dups from .sweeper import Sweeper
__all__ = ['file_dups', 'mv_file_dups', 'rm_file_dups', 'iter_file_dups'] __all__ = ['Sweeper']
This diff is collapsed.
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# License: GPLv3 # License: GPLv3
import unittest import unittest
from sweeper import file_dups, iter_file_dups from sweeper import Sweeper
import os import os
mydir = os.path.dirname(os.path.realpath(__file__)) mydir = os.path.dirname(os.path.realpath(__file__))
...@@ -11,7 +11,8 @@ mydir = os.path.dirname(os.path.realpath(__file__)) ...@@ -11,7 +11,8 @@ mydir = os.path.dirname(os.path.realpath(__file__))
class TestSweeper(unittest.TestCase): class TestSweeper(unittest.TestCase):
def test_file_dups_dups(self): def test_file_dups_dups(self):
dups = file_dups([os.path.join(mydir, 'testfiles_dups')]) swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
dups = swp.file_dups()
dups_exist = False dups_exist = False
for h, flist in dups.items(): for h, flist in dups.items():
if len(flist) > 1: if len(flist) > 1:
...@@ -19,24 +20,26 @@ class TestSweeper(unittest.TestCase): ...@@ -19,24 +20,26 @@ class TestSweeper(unittest.TestCase):
self.assertTrue(dups_exist) self.assertTrue(dups_exist)
def test_file_dups_nodups(self): def test_file_dups_nodups(self):
dups = file_dups([os.path.join(mydir, 'testfiles_nodups')]) swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_nodups')])
dups = swp.file_dups()
for h, flist in dups.items(): for h, flist in dups.items():
self.assertTrue(len(flist) == 1) self.assertTrue(len(flist) == 1)
# does not actually test safe_mode, we would need to find # does not actually test safe_mode, we would need to find
# hash collision # hash collision
def test_file_dups_safe_mode(self): def test_file_dups_safe_mode(self):
dups = file_dups([os.path.join(mydir, 'testfiles_dups')], swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')],
safe_mode=True) safe_mode=True)
dups = swp.file_dups()
for h, flist in dups.items(): for h, flist in dups.items():
if len(flist) > 1: if len(flist) > 1:
dups_exist = True dups_exist = True
self.assertTrue(dups_exist) self.assertTrue(dups_exist)
def test_iter_file_dups_dups(self): def test_iter_file_dups_dups(self):
it = iter_file_dups([os.path.join(mydir, 'testfiles_dups')]) swp = Sweeper(topdirs=[os.path.join(mydir, 'testfiles_dups')])
dups_exist = False dups_exist = False
for x in it: for x in swp:
dups_exist = True dups_exist = True
filepath, h, dups = x filepath, h, dups = x
self.assertNotIn(filepath, dups) self.assertNotIn(filepath, dups)
...@@ -44,9 +47,9 @@ class TestSweeper(unittest.TestCase): ...@@ -44,9 +47,9 @@ class TestSweeper(unittest.TestCase):
self.assertTrue(dups_exist) self.assertTrue(dups_exist)
def test_iter_file_dups_nodups(self): def test_iter_file_dups_nodups(self):
it = iter_file_dups([os.path.join(mydir, 'testfiles_nodups')]) swp = Sweeper([os.path.join(mydir, 'testfiles_nodups')])
dups_exist = False dups_exist = False
for x in it: for x in swp:
dups_exist = True dups_exist = True
break break
self.assertFalse(dups_exist) self.assertFalse(dups_exist)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment