2019-10-25 06:42:40 +00:00
|
|
|
import glob
|
2019-11-18 17:39:57 +00:00
|
|
|
import os
|
2019-10-25 06:42:40 +00:00
|
|
|
import pathlib
|
|
|
|
import subprocess as sp
|
2019-11-18 17:39:57 +00:00
|
|
|
import time
|
2019-12-21 09:36:55 +00:00
|
|
|
|
2019-11-18 17:39:57 +00:00
|
|
|
from uuid import uuid4
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-12-03 11:49:10 +00:00
|
|
|
from . import logger
|
2019-12-22 07:26:48 +00:00
|
|
|
from ucloud.settings import settings
|
|
|
|
from ucloud.shared import shared
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
def sha512sum(file: str):
|
|
|
|
"""Use sha512sum utility to compute sha512 sum of arg:file
|
2019-12-08 12:51:40 +00:00
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
IF arg:file does not exists:
|
|
|
|
raise FileNotFoundError exception
|
|
|
|
ELSE IF sum successfully computer:
|
|
|
|
return computed sha512 sum
|
|
|
|
ELSE:
|
|
|
|
return None
|
|
|
|
"""
|
2019-12-30 09:35:07 +00:00
|
|
|
if not isinstance(file, str):
|
|
|
|
raise TypeError
|
2019-11-02 15:42:24 +00:00
|
|
|
try:
|
|
|
|
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
|
|
|
|
except sp.CalledProcessError as e:
|
|
|
|
error = e.stderr.decode("utf-8")
|
|
|
|
if "No such file or directory" in error:
|
|
|
|
raise FileNotFoundError from None
|
|
|
|
else:
|
|
|
|
output = output.decode("utf-8").strip()
|
|
|
|
output = output.split(" ")
|
|
|
|
return output[0]
|
|
|
|
return None
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
2019-12-23 07:58:04 +00:00
|
|
|
def track_file(file, base_dir):
|
|
|
|
file_id = uuid4()
|
|
|
|
|
|
|
|
# Get Username
|
|
|
|
owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)]
|
|
|
|
|
|
|
|
# Get Creation Date of File
|
|
|
|
# Here, we are assuming that ctime is creation time
|
|
|
|
# which is mostly not true.
|
|
|
|
creation_date = time.ctime(os.stat(file).st_ctime)
|
|
|
|
|
|
|
|
file_path = pathlib.Path(file).parts[-1]
|
|
|
|
|
|
|
|
# Create Entry
|
2019-12-30 09:35:07 +00:00
|
|
|
entry_key = os.path.join(
|
|
|
|
settings["etcd"]["file_prefix"], str(file_id)
|
|
|
|
)
|
2019-12-23 07:58:04 +00:00
|
|
|
entry_value = {
|
|
|
|
"filename": file_path,
|
|
|
|
"owner": owner,
|
|
|
|
"sha512sum": sha512sum(file),
|
|
|
|
"creation_date": creation_date,
|
2019-12-30 09:35:07 +00:00
|
|
|
"size": os.path.getsize(file),
|
2019-12-23 07:58:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
logger.info("Tracking %s", file)
|
|
|
|
|
|
|
|
shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
|
2019-12-30 09:35:07 +00:00
|
|
|
os.setxattr(file, "user.utracked", b"True")
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-11-18 17:39:57 +00:00
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
def main():
|
2019-12-30 09:35:07 +00:00
|
|
|
base_dir = settings["storage"]["file_dir"]
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
# Recursively Get All Files and Folder below BASE_DIR
|
2019-12-22 07:26:48 +00:00
|
|
|
files = glob.glob("{}/**".format(base_dir), recursive=True)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
# Retain only Files
|
2019-12-23 07:58:04 +00:00
|
|
|
files = [file for file in files if os.path.isfile(file)]
|
|
|
|
|
|
|
|
untracked_files = []
|
|
|
|
for file in files:
|
|
|
|
try:
|
2019-12-30 09:35:07 +00:00
|
|
|
os.getxattr(file, "user.utracked")
|
2019-12-23 07:58:04 +00:00
|
|
|
except OSError:
|
|
|
|
track_file(file, base_dir)
|
|
|
|
untracked_files.append(file)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
if __name__ == "__main__":
|
2019-11-18 17:39:57 +00:00
|
|
|
main()
|