import os import glob import xattr import pathlib import time import hashlib from decouple import config from etcd3_wrapper import Etcd3Wrapper from uuid import uuid4 def getxattr(f, attr, symlink=False): try: return xattr.getxattr(f, attr, symlink).decode("utf-8") except OSError as _: return None def sha512sum(filename): _sum = hashlib.sha512() buffer_size = 2**16 with open(filename, "rb") as f: while True: data = f.read(buffer_size) if not data: break _sum.update(data) return _sum.hexdigest() BASE_DIR = config("BASE_DIR") FILE_PREFIX = config("FILE_PREFIX") etcd_client = Etcd3Wrapper() # Recursively Get All Files and Folder below BASE_DIR files = glob.glob(f"{BASE_DIR}/**", recursive=True) # Retain only Files files = list(filter(lambda f: os.path.isfile(f), files)) untracked_files = list( filter(lambda f: not bool(getxattr(f, "user.utracked")), files) ) tracked_files = list( filter(lambda f: f not in untracked_files, files) ) for file in untracked_files: file_id = uuid4() # Get Username owner = pathlib.Path(file).parts[2] # Get Creation Date of File # Here, we are assuming that ctime is creation time # which is mostly not true. creation_date = time.ctime(os.stat(file).st_ctime) # Get File Size size = os.path.getsize(file) # Compute sha512 sum sha_sum = sha512sum(file) # File Path excluding base and username file_path = pathlib.Path(file).parts[3:] file_path = os.path.join(*file_path) # Create Entry entry_key = f"{FILE_PREFIX}/{file_id}" entry_value = {"filename": file_path, "owner": owner, "sha512sum": sha_sum, "creation_date": creation_date, "size": size } print(f"Tracking {file}") # Insert Entry etcd_client.put(entry_key, entry_value, value_in_json=True) xattr.setxattr(file, b"user.utracked", b"True")