import glob import os import pathlib import subprocess as sp import time from uuid import uuid4 from . import logger from ucloud.config import env_vars, etcd_client def getxattr(file, attr): """Get specified user extended attribute (arg:attr) of a file (arg:file)""" try: attr = "user." + attr value = sp.check_output(['getfattr', file, '--name', attr, '--only-values', '--absolute-names'], stderr=sp.DEVNULL) value = value.decode("utf-8") except sp.CalledProcessError as e: logger.exception(e) value = None return value def setxattr(file, attr, value): """Set specified user extended attribute (arg:attr) equal to (arg:value) of a file (arg:file)""" attr = "user." + attr sp.check_output(['setfattr', file, '--name', attr, '--value', str(value)]) def sha512sum(file: str): """Use sha512sum utility to compute sha512 sum of arg:file IF arg:file does not exists: raise FileNotFoundError exception ELSE IF sum successfully computer: return computed sha512 sum ELSE: return None """ if not isinstance(file, str): raise TypeError try: output = sp.check_output(["sha512sum", file], stderr=sp.PIPE) except sp.CalledProcessError as e: error = e.stderr.decode("utf-8") if "No such file or directory" in error: raise FileNotFoundError from None else: output = output.decode("utf-8").strip() output = output.split(" ") return output[0] return None try: sp.check_output(['which', 'getfattr']) sp.check_output(['which', 'setfattr']) except Exception as e: logger.exception(e) print('Make sure you have getfattr and setfattr available') exit(1) def main(): BASE_DIR = env_vars.get("BASE_DIR") FILE_PREFIX = env_vars.get("FILE_PREFIX") # Recursively Get All Files and Folder below BASE_DIR files = glob.glob("{}/**".format(BASE_DIR), recursive=True) # Retain only Files files = list(filter(os.path.isfile, files)) untracked_files = list( filter(lambda f: not bool(getxattr(f, "user.utracked")), files) ) tracked_files = list( filter(lambda f: f not in untracked_files, files) ) for file in untracked_files: file_id = uuid4() # Get Username owner = pathlib.Path(file).parts[3] # Get Creation Date of File # Here, we are assuming that ctime is creation time # which is mostly not true. creation_date = time.ctime(os.stat(file).st_ctime) # Get File Size size = os.path.getsize(file) # Compute sha512 sum sha_sum = sha512sum(file) # File Path excluding base and username file_path = pathlib.Path(file).parts[4:] file_path = os.path.join(*file_path) # Create Entry entry_key = os.path.join(FILE_PREFIX, str(file_id)) entry_value = { "filename": file_path, "owner": owner, "sha512sum": sha_sum, "creation_date": creation_date, "size": size } print("Tracking {}".format(file)) # Insert Entry etcd_client.put(entry_key, entry_value, value_in_json=True) setxattr(file, "user.utracked", True) if __name__ == "__main__": main()