import glob import os import pathlib import subprocess as sp import time from uuid import uuid4 from . import logger from ucloud.settings import settings from ucloud.shared import shared def sha512sum(file: str): """Use sha512sum utility to compute sha512 sum of arg:file IF arg:file does not exists: raise FileNotFoundError exception ELSE IF sum successfully computer: return computed sha512 sum ELSE: return None """ if not isinstance(file, str): raise TypeError try: output = sp.check_output(["sha512sum", file], stderr=sp.PIPE) except sp.CalledProcessError as e: error = e.stderr.decode("utf-8") if "No such file or directory" in error: raise FileNotFoundError from None else: output = output.decode("utf-8").strip() output = output.split(" ") return output[0] return None def track_file(file, base_dir): file_id = uuid4() # Get Username owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)] # Get Creation Date of File # Here, we are assuming that ctime is creation time # which is mostly not true. creation_date = time.ctime(os.stat(file).st_ctime) file_path = pathlib.Path(file).parts[-1] # Create Entry entry_key = os.path.join( settings["etcd"]["file_prefix"], str(file_id) ) entry_value = { "filename": file_path, "owner": owner, "sha512sum": sha512sum(file), "creation_date": creation_date, "size": os.path.getsize(file), } logger.info("Tracking %s", file) shared.etcd_client.put(entry_key, entry_value, value_in_json=True) os.setxattr(file, "user.utracked", b"True") def main(): base_dir = settings["storage"]["file_dir"] # Recursively Get All Files and Folder below BASE_DIR files = glob.glob("{}/**".format(base_dir), recursive=True) # Retain only Files files = [file for file in files if os.path.isfile(file)] untracked_files = [] for file in files: try: os.getxattr(file, "user.utracked") except OSError: track_file(file, base_dir) untracked_files.append(file) if __name__ == "__main__": main()