import os import glob import pathlib import time import hashlib import subprocess as sp from decouple import config from etcd3_wrapper import Etcd3Wrapper from uuid import uuid4 def getxattr(file, attr): try: attr = "user." + attr value = sp.check_output(['getfattr', file, '--name', attr, '--only-values', '--absolute-names']) value = value.decode("utf-8") except sp.CalledProcessError: value = None return value def setxattr(file, attr, value): attr = "user." + attr sp.check_output(['setfattr', file, '--name', attr, '--value', str(value)]) def sha512sum(filename): _sum = hashlib.sha512() buffer_size = 2**16 with open(filename, "rb") as f: while True: data = f.read(buffer_size) if not data: break _sum.update(data) return _sum.hexdigest() try: sp.check_output(['which', 'getfattr']) sp.check_output(['which', 'setfattr']) except Exception as e: print(e) print('Make sure you have getfattr and setfattr available') exit(1) BASE_DIR = config("BASE_DIR") FILE_PREFIX = config("FILE_PREFIX") etcd_client = Etcd3Wrapper(host=config("ETCD_URL")) # Recursively Get All Files and Folder below BASE_DIR files = glob.glob("{}/**".format(BASE_DIR), recursive=True) # Retain only Files files = list(filter(os.path.isfile, files)) untracked_files = list( filter(lambda f: not bool(getxattr(f, "user.utracked")), files) ) tracked_files = list( filter(lambda f: f not in untracked_files, files) ) for file in untracked_files: file_id = uuid4() # Get Username owner = pathlib.Path(file).parts[3] # Get Creation Date of File # Here, we are assuming that ctime is creation time # which is mostly not true. creation_date = time.ctime(os.stat(file).st_ctime) # Get File Size size = os.path.getsize(file) # Compute sha512 sum sha_sum = sha512sum(file) # File Path excluding base and username file_path = pathlib.Path(file).parts[4:] file_path = os.path.join(*file_path) # Create Entry entry_key = os.path.join(FILE_PREFIX, str(file_id)) entry_value = { "filename": file_path, "owner": owner, "sha512sum": sha_sum, "creation_date": creation_date, "size": size } print("Tracking {}".format(file)) # Insert Entry etcd_client.put(entry_key, entry_value, value_in_json=True) setxattr(file, "user.utracked", True)