import glob import os import pathlib import subprocess as sp import time import argparse import bitmath from uuid import uuid4 from . import logger from uncloud.common.shared import shared arg_parser = argparse.ArgumentParser('filescanner', add_help=False) arg_parser.add_argument('--hostname', required=True) def sha512sum(file: str): """Use sha512sum utility to compute sha512 sum of arg:file IF arg:file does not exists: raise FileNotFoundError exception ELSE IF sum successfully computer: return computed sha512 sum ELSE: return None """ if not isinstance(file, str): raise TypeError try: output = sp.check_output(['sha512sum', file], stderr=sp.PIPE) except sp.CalledProcessError as e: error = e.stderr.decode('utf-8') if 'No such file or directory' in error: raise FileNotFoundError from None else: output = output.decode('utf-8').strip() output = output.split(' ') return output[0] return None def track_file(file, base_dir, host): file_path = file.relative_to(base_dir) file_str = str(file) # Get Username try: owner = file_path.parts[0] except IndexError: pass else: file_path = file_path.relative_to(owner) creation_date = time.ctime(os.stat(file_str).st_ctime) entry_key = os.path.join(shared.settings['etcd']['file_prefix'], str(uuid4())) entry_value = { 'filename': str(file_path), 'owner': owner, 'sha512sum': sha512sum(file_str), 'creation_date': creation_date, 'size': str(bitmath.Byte(os.path.getsize(file_str)).to_MB()), 'host': host } logger.info('Tracking %s', file_str) shared.etcd_client.put(entry_key, entry_value, value_in_json=True) def main(arguments): hostname = arguments['hostname'] base_dir = shared.settings['storage']['file_dir'] # Recursively Get All Files and Folder below BASE_DIR files = glob.glob('{}/**'.format(base_dir), recursive=True) files = [pathlib.Path(f) for f in files if pathlib.Path(f).is_file()] # Files that are already tracked tracked_files = [ pathlib.Path(os.path.join(base_dir, f.value['owner'], f.value['filename'])) for f in shared.etcd_client.get_prefix(shared.settings['etcd']['file_prefix'], value_in_json=True) if f.value['host'] == hostname ] untracked_files = set(files) - set(tracked_files) for file in untracked_files: track_file(file, base_dir, hostname)