2019-10-25 06:42:40 +00:00
|
|
|
import glob
|
2019-11-18 17:39:57 +00:00
|
|
|
import os
|
2019-10-25 06:42:40 +00:00
|
|
|
import pathlib
|
|
|
|
import subprocess as sp
|
2019-11-18 17:39:57 +00:00
|
|
|
import time
|
2020-01-03 13:38:59 +00:00
|
|
|
import argparse
|
2020-01-05 12:21:26 +00:00
|
|
|
import bitmath
|
2019-12-21 09:36:55 +00:00
|
|
|
|
2019-11-18 17:39:57 +00:00
|
|
|
from uuid import uuid4
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-12-03 11:49:10 +00:00
|
|
|
from . import logger
|
2020-01-06 07:25:59 +00:00
|
|
|
from uncloud.common.settings import settings
|
2020-01-08 19:40:05 +00:00
|
|
|
from uncloud.common.shared import shared
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2020-01-03 13:38:59 +00:00
|
|
|
arg_parser = argparse.ArgumentParser('filescanner', add_help=False)
|
2020-01-07 12:57:44 +00:00
|
|
|
arg_parser.add_argument('--hostname', required=True)
|
2020-01-03 13:38:59 +00:00
|
|
|
|
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
def sha512sum(file: str):
|
|
|
|
"""Use sha512sum utility to compute sha512 sum of arg:file
|
2019-12-08 12:51:40 +00:00
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
IF arg:file does not exists:
|
|
|
|
raise FileNotFoundError exception
|
|
|
|
ELSE IF sum successfully computer:
|
|
|
|
return computed sha512 sum
|
|
|
|
ELSE:
|
|
|
|
return None
|
|
|
|
"""
|
2019-12-30 09:35:07 +00:00
|
|
|
if not isinstance(file, str):
|
|
|
|
raise TypeError
|
2019-11-02 15:42:24 +00:00
|
|
|
try:
|
2020-01-05 12:21:26 +00:00
|
|
|
output = sp.check_output(['sha512sum', file], stderr=sp.PIPE)
|
2019-11-02 15:42:24 +00:00
|
|
|
except sp.CalledProcessError as e:
|
2020-01-05 12:21:26 +00:00
|
|
|
error = e.stderr.decode('utf-8')
|
|
|
|
if 'No such file or directory' in error:
|
2019-11-02 15:42:24 +00:00
|
|
|
raise FileNotFoundError from None
|
|
|
|
else:
|
2020-01-05 12:21:26 +00:00
|
|
|
output = output.decode('utf-8').strip()
|
|
|
|
output = output.split(' ')
|
2019-11-02 15:42:24 +00:00
|
|
|
return output[0]
|
|
|
|
return None
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
2020-01-07 12:57:44 +00:00
|
|
|
def track_file(file, base_dir, host):
|
2020-01-05 12:21:26 +00:00
|
|
|
file_path = file.relative_to(base_dir)
|
2020-01-05 13:00:05 +00:00
|
|
|
file_str = str(file)
|
2019-12-23 07:58:04 +00:00
|
|
|
# Get Username
|
2020-01-05 12:21:26 +00:00
|
|
|
try:
|
|
|
|
owner = file_path.parts[0]
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
file_path = file_path.relative_to(owner)
|
2020-01-05 13:00:05 +00:00
|
|
|
creation_date = time.ctime(os.stat(file_str).st_ctime)
|
2019-12-23 07:58:04 +00:00
|
|
|
|
2020-01-05 12:21:26 +00:00
|
|
|
entry_key = os.path.join(settings['etcd']['file_prefix'], str(uuid4()))
|
|
|
|
entry_value = {
|
|
|
|
'filename': str(file_path),
|
|
|
|
'owner': owner,
|
2020-01-05 13:00:05 +00:00
|
|
|
'sha512sum': sha512sum(file_str),
|
2020-01-05 12:21:26 +00:00
|
|
|
'creation_date': creation_date,
|
2020-01-05 13:00:05 +00:00
|
|
|
'size': str(bitmath.Byte(os.path.getsize(file_str)).to_MB()),
|
2020-01-07 12:57:44 +00:00
|
|
|
'host': host
|
2020-01-05 12:21:26 +00:00
|
|
|
}
|
2019-12-23 07:58:04 +00:00
|
|
|
|
2020-01-05 13:00:05 +00:00
|
|
|
logger.info('Tracking %s', file_str)
|
2019-12-23 07:58:04 +00:00
|
|
|
|
2020-01-05 12:21:26 +00:00
|
|
|
shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-11-18 17:39:57 +00:00
|
|
|
|
2020-01-07 12:57:44 +00:00
|
|
|
def main(hostname, debug=False):
|
2020-01-05 12:56:42 +00:00
|
|
|
base_dir = settings['storage']['file_dir']
|
2019-11-02 15:42:24 +00:00
|
|
|
# Recursively Get All Files and Folder below BASE_DIR
|
2020-01-05 12:21:26 +00:00
|
|
|
files = glob.glob('{}/**'.format(base_dir), recursive=True)
|
|
|
|
files = [pathlib.Path(f) for f in files if pathlib.Path(f).is_file()]
|
2019-12-23 07:58:04 +00:00
|
|
|
|
2020-01-05 12:21:26 +00:00
|
|
|
# Files that are already tracked
|
|
|
|
tracked_files = [
|
|
|
|
pathlib.Path(os.path.join(base_dir, f.value['owner'], f.value['filename']))
|
|
|
|
for f in shared.etcd_client.get_prefix(settings['etcd']['file_prefix'], value_in_json=True)
|
2020-01-07 12:57:44 +00:00
|
|
|
if f.value['host'] == hostname
|
2020-01-05 12:21:26 +00:00
|
|
|
]
|
|
|
|
untracked_files = set(files) - set(tracked_files)
|
|
|
|
for file in untracked_files:
|
2020-01-07 12:57:44 +00:00
|
|
|
track_file(file, base_dir, hostname)
|