forked from uncloud/uncloud
126 lines
3.4 KiB
Python
Executable file
126 lines
3.4 KiB
Python
Executable file
import glob
|
|
import os
|
|
import pathlib
|
|
import subprocess as sp
|
|
import time
|
|
from uuid import uuid4
|
|
|
|
from ucloud.filescanner import logger
|
|
from ucloud.config import env_vars, etcd_client
|
|
|
|
|
|
def getxattr(file, attr):
|
|
"""Get specified user extended attribute (arg:attr) of a file (arg:file)"""
|
|
try:
|
|
attr = "user." + attr
|
|
value = sp.check_output(['getfattr', file,
|
|
'--name', attr,
|
|
'--only-values',
|
|
'--absolute-names'], stderr=sp.DEVNULL)
|
|
value = value.decode("utf-8")
|
|
except sp.CalledProcessError as e:
|
|
logger.exception(e)
|
|
value = None
|
|
|
|
return value
|
|
|
|
|
|
def setxattr(file, attr, value):
|
|
"""Set specified user extended attribute (arg:attr) equal to (arg:value)
|
|
of a file (arg:file)"""
|
|
|
|
attr = "user." + attr
|
|
sp.check_output(['setfattr', file,
|
|
'--name', attr,
|
|
'--value', str(value)])
|
|
|
|
|
|
def sha512sum(file: str):
|
|
"""Use sha512sum utility to compute sha512 sum of arg:file
|
|
|
|
IF arg:file does not exists:
|
|
raise FileNotFoundError exception
|
|
ELSE IF sum successfully computer:
|
|
return computed sha512 sum
|
|
ELSE:
|
|
return None
|
|
"""
|
|
if not isinstance(file, str): raise TypeError
|
|
try:
|
|
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
|
|
except sp.CalledProcessError as e:
|
|
error = e.stderr.decode("utf-8")
|
|
if "No such file or directory" in error:
|
|
raise FileNotFoundError from None
|
|
else:
|
|
output = output.decode("utf-8").strip()
|
|
output = output.split(" ")
|
|
return output[0]
|
|
return None
|
|
|
|
|
|
try:
|
|
sp.check_output(['which', 'getfattr'])
|
|
sp.check_output(['which', 'setfattr'])
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
print('Make sure you have getfattr and setfattr available')
|
|
exit(1)
|
|
|
|
|
|
def main():
|
|
BASE_DIR = env_vars.get("BASE_DIR")
|
|
|
|
FILE_PREFIX = env_vars.get("FILE_PREFIX")
|
|
|
|
# Recursively Get All Files and Folder below BASE_DIR
|
|
files = glob.glob("{}/**".format(BASE_DIR), recursive=True)
|
|
|
|
# Retain only Files
|
|
files = list(filter(os.path.isfile, files))
|
|
|
|
untracked_files = list(
|
|
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
|
|
)
|
|
|
|
tracked_files = list(
|
|
filter(lambda f: f not in untracked_files, files)
|
|
)
|
|
for file in untracked_files:
|
|
file_id = uuid4()
|
|
|
|
# Get Username
|
|
owner = pathlib.Path(file).parts[3]
|
|
# Get Creation Date of File
|
|
# Here, we are assuming that ctime is creation time
|
|
# which is mostly not true.
|
|
creation_date = time.ctime(os.stat(file).st_ctime)
|
|
|
|
# Get File Size
|
|
size = os.path.getsize(file)
|
|
|
|
# Compute sha512 sum
|
|
sha_sum = sha512sum(file)
|
|
|
|
# File Path excluding base and username
|
|
file_path = pathlib.Path(file).parts[4:]
|
|
file_path = os.path.join(*file_path)
|
|
|
|
# Create Entry
|
|
entry_key = os.path.join(FILE_PREFIX, str(file_id))
|
|
entry_value = {
|
|
"filename": file_path,
|
|
"owner": owner,
|
|
"sha512sum": sha_sum,
|
|
"creation_date": creation_date,
|
|
"size": size
|
|
}
|
|
|
|
print("Tracking {}".format(file))
|
|
# Insert Entry
|
|
etcd_client.put(entry_key, entry_value, value_in_json=True)
|
|
setxattr(file, "user.utracked", True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|