uncloud/ucloud/filescanner/main.py

89 lines
2.2 KiB
Python
Executable file

import glob
import os
import pathlib
import subprocess as sp
import time
from uuid import uuid4
from . import logger
from ucloud.settings import settings
from ucloud.shared import shared
def sha512sum(file: str):
"""Use sha512sum utility to compute sha512 sum of arg:file
IF arg:file does not exists:
raise FileNotFoundError exception
ELSE IF sum successfully computer:
return computed sha512 sum
ELSE:
return None
"""
if not isinstance(file, str):
raise TypeError
try:
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
except sp.CalledProcessError as e:
error = e.stderr.decode("utf-8")
if "No such file or directory" in error:
raise FileNotFoundError from None
else:
output = output.decode("utf-8").strip()
output = output.split(" ")
return output[0]
return None
def track_file(file, base_dir):
file_id = uuid4()
# Get Username
owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)]
# Get Creation Date of File
# Here, we are assuming that ctime is creation time
# which is mostly not true.
creation_date = time.ctime(os.stat(file).st_ctime)
file_path = pathlib.Path(file).parts[-1]
# Create Entry
entry_key = os.path.join(
settings["etcd"]["file_prefix"], str(file_id)
)
entry_value = {
"filename": file_path,
"owner": owner,
"sha512sum": sha512sum(file),
"creation_date": creation_date,
"size": os.path.getsize(file),
}
logger.info("Tracking %s", file)
shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
os.setxattr(file, "user.utracked", b"True")
def main():
base_dir = settings["storage"]["file_dir"]
# Recursively Get All Files and Folder below BASE_DIR
files = glob.glob("{}/**".format(base_dir), recursive=True)
# Retain only Files
files = [file for file in files if os.path.isfile(file)]
untracked_files = []
for file in files:
try:
os.getxattr(file, "user.utracked")
except OSError:
track_file(file, base_dir)
untracked_files.append(file)
if __name__ == "__main__":
main()