From 180f6f4989133b4a27833d6f9eebc7cc26cf02ce Mon Sep 17 00:00:00 2001
From: meow <ahmedbilal96@gmail.com>
Date: Sun, 5 Jan 2020 17:21:26 +0500
Subject: [PATCH] No longer using xattrs as they don't work on tmpfs/rootfs

---
 uncloud/filescanner/main.py | 78 +++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 42 deletions(-)

diff --git a/uncloud/filescanner/main.py b/uncloud/filescanner/main.py
index c81fbbe..e4d807c 100755
--- a/uncloud/filescanner/main.py
+++ b/uncloud/filescanner/main.py
@@ -4,6 +4,7 @@ import pathlib
 import subprocess as sp
 import time
 import argparse
+import bitmath
 
 from uuid import uuid4
 
@@ -28,66 +29,59 @@ def sha512sum(file: str):
     if not isinstance(file, str):
         raise TypeError
     try:
-        output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
+        output = sp.check_output(['sha512sum', file], stderr=sp.PIPE)
     except sp.CalledProcessError as e:
-        error = e.stderr.decode("utf-8")
-        if "No such file or directory" in error:
+        error = e.stderr.decode('utf-8')
+        if 'No such file or directory' in error:
             raise FileNotFoundError from None
     else:
-        output = output.decode("utf-8").strip()
-        output = output.split(" ")
+        output = output.decode('utf-8').strip()
+        output = output.split(' ')
         return output[0]
     return None
 
 
 def track_file(file, base_dir):
-    file_id = uuid4()
+    file_path = file.relative_to(base_dir)
 
     # Get Username
-    owner = pathlib.Path(file).parts[len(pathlib.Path(base_dir).parts)]
+    try:
+        owner = file_path.parts[0]
+    except IndexError:
+        pass
+    else:
+        file_path = file_path.relative_to(owner)
+        creation_date = time.ctime(os.stat(file).st_ctime)
 
-    # Get Creation Date of File
-    # Here, we are assuming that ctime is creation time
-    # which is mostly not true.
-    creation_date = time.ctime(os.stat(file).st_ctime)
+        entry_key = os.path.join(settings['etcd']['file_prefix'], str(uuid4()))
+        entry_value = {
+            'filename': str(file_path),
+            'owner': owner,
+            'sha512sum': sha512sum(str(file)),
+            'creation_date': creation_date,
+            'size': str(bitmath.Byte(os.path.getsize(str(file))).to_MB()),
+        }
 
-    file_path = pathlib.Path(file).parts[-1]
+        logger.info('Tracking %s', file)
 
-    # Create Entry
-    entry_key = os.path.join(
-        settings["etcd"]["file_prefix"], str(file_id)
-    )
-    entry_value = {
-        "filename": file_path,
-        "owner": owner,
-        "sha512sum": sha512sum(file),
-        "creation_date": creation_date,
-        "size": os.path.getsize(file),
-    }
-
-    logger.info("Tracking %s", file)
-
-    shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
-    os.setxattr(file, "user.utracked", b"True")
+        shared.etcd_client.put(entry_key, entry_value, value_in_json=True)
 
 
 def main(debug=False):
-    base_dir = settings["storage"]["file_dir"]
-
+    base_dir = pathlib.Path(settings['storage']['file_dir'])
     # Recursively Get All Files and Folder below BASE_DIR
-    files = glob.glob("{}/**".format(base_dir), recursive=True)
+    files = glob.glob('{}/**'.format(base_dir), recursive=True)
+    files = [pathlib.Path(f) for f in files if pathlib.Path(f).is_file()]
 
-    # Retain only Files
-    files = [file for file in files if os.path.isfile(file)]
-
-    untracked_files = []
-    for file in files:
-        try:
-            os.getxattr(file, "user.utracked")
-        except OSError:
-            track_file(file, base_dir)
-            untracked_files.append(file)
+    # Files that are already tracked
+    tracked_files = [
+        pathlib.Path(os.path.join(base_dir, f.value['owner'], f.value['filename']))
+        for f in shared.etcd_client.get_prefix(settings['etcd']['file_prefix'], value_in_json=True)
+    ]
+    untracked_files = set(files) - set(tracked_files)
+    for file in untracked_files:
+        track_file(file, base_dir)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()