ucloud-{api,scheduler,host,filescanner,imagescanner,metadata} combined
This commit is contained in:
commit
da77ac65eb
29 changed files with 3941 additions and 0 deletions
109
filescanner/main.py
Executable file
109
filescanner/main.py
Executable file
|
|
@ -0,0 +1,109 @@
|
|||
import os
|
||||
import glob
|
||||
import pathlib
|
||||
import time
|
||||
import hashlib
|
||||
import subprocess as sp
|
||||
|
||||
from decouple import config
|
||||
from etcd3_wrapper import Etcd3Wrapper
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
|
||||
def getxattr(file, attr):
|
||||
try:
|
||||
attr = "user." + attr
|
||||
value = sp.check_output(['getfattr', file,
|
||||
'--name', attr,
|
||||
'--only-values',
|
||||
'--absolute-names'])
|
||||
value = value.decode("utf-8")
|
||||
except sp.CalledProcessError:
|
||||
value = None
|
||||
|
||||
return value
|
||||
|
||||
def setxattr(file, attr, value):
|
||||
attr = "user." + attr
|
||||
sp.check_output(['setfattr', file,
|
||||
'--name', attr,
|
||||
'--value', str(value)])
|
||||
|
||||
|
||||
def sha512sum(filename):
|
||||
_sum = hashlib.sha512()
|
||||
buffer_size = 2**16
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
while True:
|
||||
data = f.read(buffer_size)
|
||||
if not data:
|
||||
break
|
||||
_sum.update(data)
|
||||
|
||||
return _sum.hexdigest()
|
||||
|
||||
|
||||
try:
|
||||
sp.check_output(['which', 'getfattr'])
|
||||
sp.check_output(['which', 'setfattr'])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('Make sure you have getfattr and setfattr available')
|
||||
exit(1)
|
||||
|
||||
|
||||
BASE_DIR = config("BASE_DIR")
|
||||
|
||||
FILE_PREFIX = config("FILE_PREFIX")
|
||||
|
||||
etcd_client = Etcd3Wrapper(host=config("ETCD_URL"))
|
||||
|
||||
# Recursively Get All Files and Folder below BASE_DIR
|
||||
files = glob.glob("{}/**".format(BASE_DIR), recursive=True)
|
||||
|
||||
# Retain only Files
|
||||
files = list(filter(os.path.isfile, files))
|
||||
|
||||
untracked_files = list(
|
||||
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
|
||||
)
|
||||
|
||||
tracked_files = list(
|
||||
filter(lambda f: f not in untracked_files, files)
|
||||
)
|
||||
for file in untracked_files:
|
||||
file_id = uuid4()
|
||||
|
||||
# Get Username
|
||||
owner = pathlib.Path(file).parts[3]
|
||||
# Get Creation Date of File
|
||||
# Here, we are assuming that ctime is creation time
|
||||
# which is mostly not true.
|
||||
creation_date = time.ctime(os.stat(file).st_ctime)
|
||||
|
||||
# Get File Size
|
||||
size = os.path.getsize(file)
|
||||
|
||||
# Compute sha512 sum
|
||||
sha_sum = sha512sum(file)
|
||||
|
||||
# File Path excluding base and username
|
||||
file_path = pathlib.Path(file).parts[4:]
|
||||
file_path = os.path.join(*file_path)
|
||||
|
||||
# Create Entry
|
||||
entry_key = os.path.join(FILE_PREFIX, str(file_id))
|
||||
entry_value = {
|
||||
"filename": file_path,
|
||||
"owner": owner,
|
||||
"sha512sum": sha_sum,
|
||||
"creation_date": creation_date,
|
||||
"size": size
|
||||
}
|
||||
|
||||
print("Tracking {}".format(file))
|
||||
# Insert Entry
|
||||
etcd_client.put(entry_key, entry_value, value_in_json=True)
|
||||
setxattr(file, "user.utracked", True)
|
||||
Loading…
Add table
Add a link
Reference in a new issue