uncloud/ucloud/filescanner/main.py

127 lines
3.4 KiB
Python
Raw Normal View History

import glob
import os
import pathlib
import subprocess as sp
import time
from uuid import uuid4
from ucloud.filescanner import logger
from ucloud.config import env_vars, etcd_client
def getxattr(file, attr):
"""Get specified user extended attribute (arg:attr) of a file (arg:file)"""
try:
attr = "user." + attr
value = sp.check_output(['getfattr', file,
'--name', attr,
'--only-values',
'--absolute-names'], stderr=sp.DEVNULL)
value = value.decode("utf-8")
except sp.CalledProcessError as e:
logger.exception(e)
value = None
return value
def setxattr(file, attr, value):
"""Set specified user extended attribute (arg:attr) equal to (arg:value)
of a file (arg:file)"""
attr = "user." + attr
sp.check_output(['setfattr', file,
'--name', attr,
'--value', str(value)])
def sha512sum(file: str):
"""Use sha512sum utility to compute sha512 sum of arg:file
IF arg:file does not exists:
raise FileNotFoundError exception
ELSE IF sum successfully computer:
return computed sha512 sum
ELSE:
return None
"""
if not isinstance(file, str): raise TypeError
try:
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
except sp.CalledProcessError as e:
error = e.stderr.decode("utf-8")
if "No such file or directory" in error:
raise FileNotFoundError from None
else:
output = output.decode("utf-8").strip()
output = output.split(" ")
return output[0]
return None
try:
sp.check_output(['which', 'getfattr'])
sp.check_output(['which', 'setfattr'])
except Exception as e:
logger.exception(e)
print('Make sure you have getfattr and setfattr available')
exit(1)
def main():
BASE_DIR = env_vars.get("BASE_DIR")
FILE_PREFIX = env_vars.get("FILE_PREFIX")
# Recursively Get All Files and Folder below BASE_DIR
files = glob.glob("{}/**".format(BASE_DIR), recursive=True)
# Retain only Files
files = list(filter(os.path.isfile, files))
untracked_files = list(
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
)
tracked_files = list(
filter(lambda f: f not in untracked_files, files)
)
for file in untracked_files:
file_id = uuid4()
# Get Username
owner = pathlib.Path(file).parts[3]
# Get Creation Date of File
# Here, we are assuming that ctime is creation time
# which is mostly not true.
creation_date = time.ctime(os.stat(file).st_ctime)
# Get File Size
size = os.path.getsize(file)
# Compute sha512 sum
sha_sum = sha512sum(file)
# File Path excluding base and username
file_path = pathlib.Path(file).parts[4:]
file_path = os.path.join(*file_path)
# Create Entry
entry_key = os.path.join(FILE_PREFIX, str(file_id))
entry_value = {
"filename": file_path,
"owner": owner,
"sha512sum": sha_sum,
"creation_date": creation_date,
"size": size
}
print("Tracking {}".format(file))
# Insert Entry
etcd_client.put(entry_key, entry_value, value_in_json=True)
setxattr(file, "user.utracked", True)
if __name__ == "__main__":
main()