Added comments.
This commit is contained in:
parent
660d6bd342
commit
c5fdf029e2
1 changed files with 20 additions and 0 deletions
|
@ -34,6 +34,7 @@ import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
# some differences in python versions
|
||||||
if sys.version_info[0] == 3:
|
if sys.version_info[0] == 3:
|
||||||
def _do_encode(buf):
|
def _do_encode(buf):
|
||||||
return buf
|
return buf
|
||||||
|
@ -49,6 +50,10 @@ else:
|
||||||
|
|
||||||
|
|
||||||
def _filehash(filepath, hashalg, block_size):
|
def _filehash(filepath, hashalg, block_size):
|
||||||
|
"""Calculate secure hash for given file content using
|
||||||
|
specified hash algorithm. Use block_size block size
|
||||||
|
when reading file content.
|
||||||
|
"""
|
||||||
md = hashlib.new(hashalg)
|
md = hashlib.new(hashalg)
|
||||||
with open(filepath, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
for buf in iter(lambda: f.read(block_size), b''):
|
for buf in iter(lambda: f.read(block_size), b''):
|
||||||
|
@ -57,6 +62,10 @@ def _filehash(filepath, hashalg, block_size):
|
||||||
|
|
||||||
|
|
||||||
def file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
def file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||||
|
"""Find duplicate files in directory list. Return directory
|
||||||
|
with keys equal to file hash value and value as list of
|
||||||
|
file paths whose content is the same.
|
||||||
|
"""
|
||||||
dups = defaultdict(list)
|
dups = defaultdict(list)
|
||||||
for topdir in topdirs:
|
for topdir in topdirs:
|
||||||
for dirpath, dirnames, filenames in os.walk(topdir):
|
for dirpath, dirnames, filenames in os.walk(topdir):
|
||||||
|
@ -69,12 +78,18 @@ def file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||||
|
|
||||||
|
|
||||||
def rm_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
def rm_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||||
|
"""Remove duplicate files found in specified directory list.
|
||||||
|
First file in list is kept.
|
||||||
|
"""
|
||||||
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
for files in do_with_file_dups(topdirs, hashalg, block_size):
|
||||||
for f in files:
|
for f in files:
|
||||||
os.remove(f)
|
os.remove(f)
|
||||||
|
|
||||||
|
|
||||||
def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, dest_dir='dups'):
|
def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, dest_dir='dups'):
|
||||||
|
"""Move duplicate files found in specified directory list.
|
||||||
|
First file in list is kept in the original directory.
|
||||||
|
"""
|
||||||
if not os.path.exists(dest_dir):
|
if not os.path.exists(dest_dir):
|
||||||
os.mkdir(dest_dir)
|
os.mkdir(dest_dir)
|
||||||
if not os.path.isdir(dest_dir):
|
if not os.path.isdir(dest_dir):
|
||||||
|
@ -87,12 +102,16 @@ def mv_file_dups(topdirs=['./'], hashalg='md5', block_size=4096, dest_dir='dups'
|
||||||
|
|
||||||
|
|
||||||
def do_with_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
def do_with_file_dups(topdirs=['./'], hashalg='md5', block_size=4096):
|
||||||
|
"""Yield list of duplicate files when found in specified directory list.
|
||||||
|
"""
|
||||||
dups = file_dups(topdirs, hashalg, block_size)
|
dups = file_dups(topdirs, hashalg, block_size)
|
||||||
for fpaths in dups.itervalues():
|
for fpaths in dups.itervalues():
|
||||||
yield fpaths
|
yield fpaths
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
"""Main when used as script. See usage (--help).
|
||||||
|
"""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
topdirs = args['<directory>']
|
topdirs = args['<directory>']
|
||||||
|
@ -119,6 +138,7 @@ def main(args):
|
||||||
print('Invalid action "%s"' % action)
|
print('Invalid action "%s"' % action)
|
||||||
|
|
||||||
|
|
||||||
|
# if used as script call main function
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from docopt import docopt
|
from docopt import docopt
|
||||||
arguments = docopt(__doc__)
|
arguments = docopt(__doc__)
|
||||||
|
|
Loading…
Reference in a new issue