diff --git a/uncloud/filescanner/main.py b/uncloud/filescanner/main.py index c81fbbe..e4d807c 100755 --- a/uncloud/filescanner/main.py +++ b/uncloud/filescanner/main.py @@ -4,6 +4,7 @@ import pathlib import subprocess as sp import time import argparse +import bitmath from uuid import uuid4 @@ -28,66 +29,59 @@ def sha512sum(file: str): if not isinstance(file, str): raise TypeError try: - output = sp.check_output(["sha512sum", file], stderr=sp.PIPE) + output = sp.check_output(['sha512sum', file], stderr=sp.PIPE) except sp.CalledProcessError as e: - error = e.stderr.decode("utf-8") - if "No such file or directory" in error: + error = e.stderr.decode('utf-8') + if 'No such file or directory' in error: raise FileNotFoundError from None else: - output = output.decode("utf-8").strip() - output = output.split(" ") + output = output.decode('utf-8').strip() + output = output.split(' ') return output[0] return None def track_file(file, base_dir): - file_id = uuid4() + file_path = file.relative_to(base_dir) # Get Username - owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)] + try: + owner = file_path.parts[0] + except IndexError: + pass + else: + file_path = file_path.relative_to(owner) + creation_date = time.ctime(os.stat(file).st_ctime) - # Get Creation Date of File - # Here, we are assuming that ctime is creation time - # which is mostly not true. - creation_date = time.ctime(os.stat(file).st_ctime) + entry_key = os.path.join(settings['etcd']['file_prefix'], str(uuid4())) + entry_value = { + 'filename': str(file_path), + 'owner': owner, + 'sha512sum': sha512sum(str(file)), + 'creation_date': creation_date, + 'size': str(bitmath.Byte(os.path.getsize(str(file))).to_MB()), + } - file_path = pathlib.Path(file).parts[-1] + logger.info('Tracking %s', file) - # Create Entry - entry_key = os.path.join( - settings["etcd"]["file_prefix"], str(file_id) - ) - entry_value = { - "filename": file_path, - "owner": owner, - "sha512sum": sha512sum(file), - "creation_date": creation_date, - "size": os.path.getsize(file), - } - - logger.info("Tracking %s", file) - - shared.etcd_client.put(entry_key, entry_value, value_in_json=True) - os.setxattr(file, "user.utracked", b"True") + shared.etcd_client.put(entry_key, entry_value, value_in_json=True) def main(debug=False): - base_dir = settings["storage"]["file_dir"] - + base_dir = pathlib.Path(settings['storage']['file_dir']) # Recursively Get All Files and Folder below BASE_DIR - files = glob.glob("{}/**".format(base_dir), recursive=True) + files = glob.glob('{}/**'.format(base_dir), recursive=True) + files = [pathlib.Path(f) for f in files if pathlib.Path(f).is_file()] - # Retain only Files - files = [file for file in files if os.path.isfile(file)] - - untracked_files = [] - for file in files: - try: - os.getxattr(file, "user.utracked") - except OSError: - track_file(file, base_dir) - untracked_files.append(file) + # Files that are already tracked + tracked_files = [ + pathlib.Path(os.path.join(base_dir, f.value['owner'], f.value['filename'])) + for f in shared.etcd_client.get_prefix(settings['etcd']['file_prefix'], value_in_json=True) + ] + untracked_files = set(files) - set(tracked_files) + for file in untracked_files: + track_file(file, base_dir) -if __name__ == "__main__": +if __name__ == '__main__': main()