import glob import os import pathlib import subprocess as sp import time import sys from uuid import uuid4 from . import logger from ucloud.settings import settings from ucloud.shared import shared def getxattr(file, attr): """Get specified user extended attribute (arg:attr) of a file (arg:file)""" try: attr = "user." + attr value = sp.check_output(['getfattr', file, '--name', attr, '--only-values', '--absolute-names'], stderr=sp.DEVNULL) value = value.decode("utf-8") except sp.CalledProcessError as e: value = None return value def setxattr(file, attr, value): """Set specified user extended attribute (arg:attr) equal to (arg:value) of a file (arg:file)""" attr = "user." + attr sp.check_output(['setfattr', file, '--name', attr, '--value', str(value)]) def sha512sum(file: str): """Use sha512sum utility to compute sha512 sum of arg:file IF arg:file does not exists: raise FileNotFoundError exception ELSE IF sum successfully computer: return computed sha512 sum ELSE: return None """ if not isinstance(file, str): raise TypeError try: output = sp.check_output(["sha512sum", file], stderr=sp.PIPE) except sp.CalledProcessError as e: error = e.stderr.decode("utf-8") if "No such file or directory" in error: raise FileNotFoundError from None else: output = output.decode("utf-8").strip() output = output.split(" ") return output[0] return None try: sp.check_output(['which', 'getfattr']) sp.check_output(['which', 'setfattr']) except Exception as e: logger.error("You don't seems to have both getfattr and setfattr") sys.exit(1) def main(): base_dir = settings['storage']['file_dir'] # Recursively Get All Files and Folder below BASE_DIR files = glob.glob("{}/**".format(base_dir), recursive=True) # Retain only Files files = list(filter(os.path.isfile, files)) untracked_files = list( filter(lambda f: not bool(getxattr(f, "utracked")), files) ) tracked_files = list( filter(lambda f: f not in untracked_files, files) ) for file in untracked_files: file_id = uuid4() # Get Username owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)] # Get Creation Date of File # Here, we are assuming that ctime is creation time # which is mostly not true. creation_date = time.ctime(os.stat(file).st_ctime) # Get File Size size = os.path.getsize(file) # Compute sha512 sum sha_sum = sha512sum(file) file_path = pathlib.Path(file).parts[-1] # Create Entry entry_key = os.path.join(settings['etcd']['file_prefix'], str(file_id)) entry_value = { "filename": file_path, "owner": owner, "sha512sum": sha_sum, "creation_date": creation_date, "size": size } logger.info("Tracking %s", file) shared.etcd_client.put(entry_key, entry_value, value_in_json=True) setxattr(file, "utracked", True) if __name__ == "__main__": main()