88 lines
2 KiB
Python
88 lines
2 KiB
Python
|
import os
|
||
|
import glob
|
||
|
import xattr
|
||
|
import pathlib
|
||
|
import time
|
||
|
import hashlib
|
||
|
|
||
|
from decouple import config
|
||
|
from etcd3_wrapper import Etcd3Wrapper
|
||
|
from uuid import uuid4
|
||
|
|
||
|
|
||
|
def getxattr(f, attr, symlink=False):
|
||
|
try:
|
||
|
return xattr.getxattr(f, attr, symlink).decode("utf-8")
|
||
|
except OSError as _:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def sha512sum(filename):
|
||
|
_sum = hashlib.sha512()
|
||
|
buffer_size = 2**16
|
||
|
|
||
|
with open(filename, "rb") as f:
|
||
|
while True:
|
||
|
data = f.read(buffer_size)
|
||
|
if not data:
|
||
|
break
|
||
|
_sum.update(data)
|
||
|
|
||
|
return _sum.hexdigest()
|
||
|
|
||
|
|
||
|
BASE_DIR = config("BASE_DIR")
|
||
|
|
||
|
FILE_PREFIX = config("FILE_PREFIX")
|
||
|
|
||
|
etcd_client = Etcd3Wrapper()
|
||
|
|
||
|
# Recursively Get All Files and Folder below BASE_DIR
|
||
|
files = glob.glob(f"{BASE_DIR}/**", recursive=True)
|
||
|
|
||
|
# Retain only Files
|
||
|
files = list(filter(lambda f: os.path.isfile(f), files))
|
||
|
|
||
|
untracked_files = list(
|
||
|
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
|
||
|
)
|
||
|
|
||
|
tracked_files = list(
|
||
|
filter(lambda f: f not in untracked_files, files)
|
||
|
)
|
||
|
for file in untracked_files:
|
||
|
file_id = uuid4()
|
||
|
|
||
|
# Get Username
|
||
|
owner = pathlib.Path(file).parts[2]
|
||
|
|
||
|
# Get Creation Date of File
|
||
|
# Here, we are assuming that ctime is creation time
|
||
|
# which is mostly not true.
|
||
|
creation_date = time.ctime(os.stat(file).st_ctime)
|
||
|
|
||
|
# Get File Size
|
||
|
size = os.path.getsize(file)
|
||
|
|
||
|
# Compute sha512 sum
|
||
|
sha_sum = sha512sum(file)
|
||
|
|
||
|
# File Path excluding base and username
|
||
|
file_path = pathlib.Path(file).parts[3:]
|
||
|
file_path = os.path.join(*file_path)
|
||
|
|
||
|
# Create Entry
|
||
|
entry_key = f"{FILE_PREFIX}/{file_id}"
|
||
|
entry_value = {"filename": file_path,
|
||
|
"owner": owner,
|
||
|
"sha512sum": sha_sum,
|
||
|
"creation_date": creation_date,
|
||
|
"size": size
|
||
|
}
|
||
|
|
||
|
print(f"Tracking {file}")
|
||
|
|
||
|
# Insert Entry
|
||
|
etcd_client.put(entry_key, entry_value, value_in_json=True)
|
||
|
xattr.setxattr(file, b"user.utracked", b"True")
|