uncloud-mravi/uncloud/filescanner/main.py

86 lines
2.5 KiB
Python
Executable file

import glob
import os
import pathlib
import subprocess as sp
import time
import argparse
import bitmath
from uuid import uuid4
from . import logger
from uncloud.common.settings import settings
from uncloud.shared import shared
arg_parser = argparse.ArgumentParser('filescanner', add_help=False)
arg_parser.add_argument('--hostname', required=True)
def sha512sum(file: str):
"""Use sha512sum utility to compute sha512 sum of arg:file
IF arg:file does not exists:
raise FileNotFoundError exception
ELSE IF sum successfully computer:
return computed sha512 sum
ELSE:
return None
"""
if not isinstance(file, str):
raise TypeError
try:
output = sp.check_output(['sha512sum', file], stderr=sp.PIPE)
except sp.CalledProcessError as e:
error = e.stderr.decode('utf-8')
if 'No such file or directory' in error:
raise FileNotFoundError from None
else:
output = output.decode('utf-8').strip()
output = output.split(' ')
return output[0]
return None
def track_file(file, base_dir, host):
file_path = file.relative_to(base_dir)
file_str = str(file)
# Get Username
try:
owner = file_path.parts[0]
except IndexError:
pass
else:
file_path = file_path.relative_to(owner)
creation_date = time.ctime(os.stat(file_str).st_ctime)
entry_key = os.path.join(settings['etcd']['file_prefix'], str(uuid4()))
entry_value = {
'filename': str(file_path),
'owner': owner,
'sha512sum': sha512sum(file_str),
'creation_date': creation_date,
'size': str(bitmath.Byte(os.path.getsize(file_str)).to_MB()),
'host': host
}
logger.info('Tracking %s', file_str)
shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
def main(hostname, debug=False):
base_dir = settings['storage']['file_dir']
# Recursively Get All Files and Folder below BASE_DIR
files = glob.glob('{}/**'.format(base_dir), recursive=True)
files = [pathlib.Path(f) for f in files if pathlib.Path(f).is_file()]
# Files that are already tracked
tracked_files = [
pathlib.Path(os.path.join(base_dir, f.value['owner'], f.value['filename']))
for f in shared.etcd_client.get_prefix(settings['etcd']['file_prefix'], value_in_json=True)
if f.value['host'] == hostname
]
untracked_files = set(files) - set(tracked_files)
for file in untracked_files:
track_file(file, base_dir, hostname)