uncloud/ucloud/filescanner/main.py

126 lines
3.4 KiB
Python
Raw Normal View History

import glob
import os
import pathlib
import subprocess as sp
import time
from uuid import uuid4
2019-12-03 11:49:10 +00:00
from . import logger
2019-12-08 12:51:40 +00:00
from ucloud.config import config, etcd_client
def getxattr(file, attr):
"""Get specified user extended attribute (arg:attr) of a file (arg:file)"""
try:
attr = "user." + attr
value = sp.check_output(['getfattr', file,
'--name', attr,
'--only-values',
'--absolute-names'], stderr=sp.DEVNULL)
value = value.decode("utf-8")
except sp.CalledProcessError as e:
logger.exception(e)
value = None
return value
def setxattr(file, attr, value):
"""Set specified user extended attribute (arg:attr) equal to (arg:value)
of a file (arg:file)"""
attr = "user." + attr
sp.check_output(['setfattr', file,
'--name', attr,
'--value', str(value)])
def sha512sum(file: str):
"""Use sha512sum utility to compute sha512 sum of arg:file
2019-12-08 12:51:40 +00:00
IF arg:file does not exists:
raise FileNotFoundError exception
ELSE IF sum successfully computer:
return computed sha512 sum
ELSE:
return None
"""
if not isinstance(file, str): raise TypeError
try:
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
except sp.CalledProcessError as e:
error = e.stderr.decode("utf-8")
if "No such file or directory" in error:
raise FileNotFoundError from None
else:
output = output.decode("utf-8").strip()
output = output.split(" ")
return output[0]
return None
try:
sp.check_output(['which', 'getfattr'])
sp.check_output(['which', 'setfattr'])
except Exception as e:
logger.exception(e)
print('Make sure you have getfattr and setfattr available')
exit(1)
def main():
2019-12-08 12:51:40 +00:00
BASE_DIR = config['storage']["FILE_DIR"]
FILE_PREFIX = config['storage']["FILE_PREFIX"]
# Recursively Get All Files and Folder below BASE_DIR
files = glob.glob("{}/**".format(BASE_DIR), recursive=True)
# Retain only Files
files = list(filter(os.path.isfile, files))
untracked_files = list(
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
)
tracked_files = list(
filter(lambda f: f not in untracked_files, files)
)
for file in untracked_files:
file_id = uuid4()
# Get Username
owner = pathlib.Path(file).parts[3]
# Get Creation Date of File
# Here, we are assuming that ctime is creation time
# which is mostly not true.
creation_date = time.ctime(os.stat(file).st_ctime)
# Get File Size
size = os.path.getsize(file)
# Compute sha512 sum
sha_sum = sha512sum(file)
# File Path excluding base and username
file_path = pathlib.Path(file).parts[4:]
file_path = os.path.join(*file_path)
# Create Entry
entry_key = os.path.join(FILE_PREFIX, str(file_id))
entry_value = {
"filename": file_path,
"owner": owner,
"sha512sum": sha_sum,
"creation_date": creation_date,
"size": size
}
print("Tracking {}".format(file))
# Insert Entry
etcd_client.put(entry_key, entry_value, value_in_json=True)
setxattr(file, "user.utracked", True)
if __name__ == "__main__":
main()