From cc0ca68498aba73f023a8c07855418eff16fab63 Mon Sep 17 00:00:00 2001 From: meow Date: Mon, 25 Nov 2019 11:52:36 +0500 Subject: [PATCH] * Refactoring * Fix issue that causes a new image store to be created at every start of ucloud-api. * VM Migration API call now takes hostname instead of host key. * StorageHandler Classes are introduced. They transparently handles things related to importing of image, make vm out of image, resize vm image, delete vm image etc. * Loggers added to __init__.py of every ucloud component's subpackage. * Non-Trivial Timeout Events are no longer logged. * Fix issue that prevents removal of stopped VMs (i.e VMs that are successfully migrated). * Improved unit handling added. e.g MB, Mb, mB, mb are all Mega Bytes. * VM migration is now possible on IPv6 host. * Destination VM (receiving side of migration of a vm) now correctly expects incoming data on free ephemeral port. * Traceback is no longer output to screen, instead it goes to log file. * All sanity checks are put into a single file. These checks are run by ucloud.py before running any of ucloud component. --- api/helper.py | 2 +- api/main.py | 99 ++-- api/schemas.py | 6 +- common/classes.py | 22 - common/helpers.py | 15 + common/storage_handlers.py | 158 ++++++ common/vm.py | 4 - config.py | 17 +- docs/source/diagram-code/ucloud | 44 ++ docs/source/images/ucloud.svg | 494 ++++++++++++++++++ docs/source/index.rst | 8 +- docs/source/introduction/installation.rst | 34 +- docs/source/misc/todo.rst | 12 + docs/source/theory/summary.rst | 98 ++++ .../installation-troubleshooting.rst | 24 + filescanner/__init__.py | 3 + filescanner/main.py | 9 +- host/helper.py | 13 + host/main.py | 25 +- host/qmp/__init__.py | 1 + host/virtualmachine.py | 151 ++---- imagescanner/main.py | 56 +- sanity_checks.py | 33 ++ scheduler/helper.py | 8 +- scheduler/main.py | 12 +- ucloud.py | 47 +- 26 files changed, 1101 insertions(+), 294 deletions(-) create mode 100644 common/storage_handlers.py create mode 100644 docs/source/diagram-code/ucloud create mode 100644 docs/source/images/ucloud.svg create mode 100644 docs/source/theory/summary.rst create mode 100644 docs/source/troubleshooting/installation-troubleshooting.rst create mode 100644 host/helper.py create mode 100644 sanity_checks.py diff --git a/api/helper.py b/api/helper.py index a45bd16..eb32373 100755 --- a/api/helper.py +++ b/api/helper.py @@ -22,7 +22,7 @@ def check_otp(name, realm, token): except binascii.Error: return 400 - response = requests.get( + response = requests.post( "{OTP_SERVER}{OTP_VERIFY_ENDPOINT}".format( OTP_SERVER=env_vars.get("OTP_SERVER", ""), OTP_VERIFY_ENDPOINT=env_vars.get("OTP_VERIFY_ENDPOINT", "verify"), diff --git a/api/main.py b/api/main.py index e621ce1..59b7dc0 100644 --- a/api/main.py +++ b/api/main.py @@ -1,16 +1,16 @@ import json -import os import subprocess -from uuid import uuid4 - import pynetbox + +from uuid import uuid4 +from os.path import join as join_path + from flask import Flask, request from flask_restful import Resource, Api from common import counters from common.request import RequestEntry, RequestType -from common.vm import VMStatus -from config import (etcd_client, request_pool, vm_pool, host_pool, env_vars) +from config import (etcd_client, request_pool, vm_pool, host_pool, env_vars, image_storage_handler) from . import schemas from .helper import generate_mac, mac2ipv6 from api import logger @@ -20,13 +20,15 @@ api = Api(app) class CreateVM(Resource): + """API Request to Handle Creation of VM""" + @staticmethod def post(): data = request.json validator = schemas.CreateVMSchema(data) if validator.is_valid(): vm_uuid = uuid4().hex - vm_key = os.path.join(env_vars.get("VM_PREFIX"), vm_uuid) + vm_key = join_path(env_vars.get("VM_PREFIX"), vm_uuid) specs = { "cpu": validator.specs["cpu"], "ram": validator.specs["ram"], @@ -67,14 +69,14 @@ class VmStatus(Resource): validator = schemas.VMStatusSchema(data) if validator.is_valid(): vm = vm_pool.get( - os.path.join(env_vars.get("VM_PREFIX"), data["uuid"]) + join_path(env_vars.get("VM_PREFIX"), data["uuid"]) ) vm_value = vm.value.copy() vm_value["ip"] = [] for network_and_mac in vm.network: network_name, mac = network_and_mac network = etcd_client.get( - os.path.join( + join_path( env_vars.get("NETWORK_PREFIX"), data["name"], network_name, @@ -96,7 +98,7 @@ class CreateImage(Resource): validator = schemas.CreateImageSchema(data) if validator.is_valid(): file_entry = etcd_client.get( - os.path.join(env_vars.get("FILE_PREFIX"), data["uuid"]) + join_path(env_vars.get("FILE_PREFIX"), data["uuid"]) ) file_entry_value = json.loads(file_entry.value) @@ -109,7 +111,7 @@ class CreateImage(Resource): "visibility": "public", } etcd_client.put( - os.path.join(env_vars.get("IMAGE_PREFIX"), data["uuid"]), + join_path(env_vars.get("IMAGE_PREFIX"), data["uuid"]), json.dumps(image_entry_json), ) @@ -123,8 +125,9 @@ class ListPublicImages(Resource): images = etcd_client.get_prefix( env_vars.get("IMAGE_PREFIX"), value_in_json=True ) - r = {} - r["images"] = [] + r = { + "images": [] + } for image in images: image_key = "{}:{}".format( image.value["store_name"], image.value["name"] @@ -143,46 +146,22 @@ class VMAction(Resource): if validator.is_valid(): vm_entry = vm_pool.get( - os.path.join(env_vars.get("VM_PREFIX"), data["uuid"]) + join_path(env_vars.get("VM_PREFIX"), data["uuid"]) ) action = data["action"] if action == "start": - vm_entry.status = VMStatus.requested_start - vm_pool.put(vm_entry) action = "schedule" if action == "delete" and vm_entry.hostname == "": - try: - path_without_protocol = vm_entry.path[ - vm_entry.path.find(":") + 1: - ] - - if env_vars.get("WITHOUT_CEPH"): - command_to_delete = [ - "rm", - "-rf", - os.path.join("/var/vm", vm_entry.uuid), - ] - else: - command_to_delete = [ - "rbd", - "rm", - path_without_protocol, - ] - - subprocess.check_output( - command_to_delete, stderr=subprocess.PIPE - ) - except subprocess.CalledProcessError as e: - if "No such file" in e.stderr.decode("utf-8"): + if image_storage_handler.is_vm_image_exists(vm_entry.uuid): + r_status = image_storage_handler.delete_vm_image(vm_entry.uuid) + if r_status: etcd_client.client.delete(vm_entry.key) return {"message": "VM successfully deleted"} else: - logger.exception(e) - return { - "message": "Some error occurred while deleting VM" - } + logger.error("Some Error Occurred while deleting VM") + return {"message": "VM deletion unsuccessfull"} else: etcd_client.client.delete(vm_entry.key) return {"message": "VM successfully deleted"} @@ -211,8 +190,8 @@ class VMMigration(Resource): r = RequestEntry.from_scratch( type=RequestType.ScheduleVM, uuid=vm.uuid, - destination=os.path.join( - env_vars.get("HOST_PREFIX"), data["destination"] + destination=join_path( + env_vars.get("HOST_PREFIX"), validator.destination.value ), migration=True, request_prefix=env_vars.get("REQUEST_PREFIX") @@ -289,7 +268,7 @@ class CreateHost(Resource): data = request.json validator = schemas.CreateHostSchema(data) if validator.is_valid(): - host_key = os.path.join(env_vars.get("HOST_PREFIX"), uuid4().hex) + host_key = join_path(env_vars.get("HOST_PREFIX"), uuid4().hex) host_entry = { "specs": data["specs"], "hostname": data["hostname"], @@ -327,7 +306,7 @@ class GetSSHKeys(Resource): if not validator.key_name.value: # {user_prefix}/{realm}/{name}/key/ - etcd_key = os.path.join( + etcd_key = join_path( env_vars.get('USER_PREFIX'), data["realm"], data["name"], @@ -344,7 +323,7 @@ class GetSSHKeys(Resource): else: # {user_prefix}/{realm}/{name}/key/{key_name} - etcd_key = os.path.join( + etcd_key = join_path( env_vars.get('USER_PREFIX'), data["realm"], data["name"], @@ -373,7 +352,7 @@ class AddSSHKey(Resource): if validator.is_valid(): # {user_prefix}/{realm}/{name}/key/{key_name} - etcd_key = os.path.join( + etcd_key = join_path( env_vars.get("USER_PREFIX"), data["realm"], data["name"], @@ -403,7 +382,7 @@ class RemoveSSHKey(Resource): if validator.is_valid(): # {user_prefix}/{realm}/{name}/key/{key_name} - etcd_key = os.path.join( + etcd_key = join_path( env_vars.get("USER_PREFIX"), data["realm"], data["name"], @@ -462,7 +441,7 @@ class CreateNetwork(Resource): else: network_entry["ipv6"] = "fd00::/64" - network_key = os.path.join( + network_key = join_path( env_vars.get("NETWORK_PREFIX"), data["name"], data["network_name"], @@ -480,7 +459,7 @@ class ListUserNetwork(Resource): validator = schemas.OTPSchema(data) if validator.is_valid(): - prefix = os.path.join( + prefix = join_path( env_vars.get("NETWORK_PREFIX"), data["name"] ) networks = etcd_client.get_prefix(prefix, value_in_json=True) @@ -517,15 +496,17 @@ api.add_resource(CreateNetwork, "/network/create") def main(): - data = { - "is_public": True, - "type": "ceph", - "name": "images", - "description": "first ever public image-store", - "attributes": {"list": [], "key": [], "pool": "images"}, - } + image_stores = list(etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True)) + if len(image_stores) == 0: + data = { + "is_public": True, + "type": "ceph", + "name": "images", + "description": "first ever public image-store", + "attributes": {"list": [], "key": [], "pool": "images"}, + } - etcd_client.put(os.path.join(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data)) + etcd_client.put(join_path(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data)) app.run(host="::", debug=True) diff --git a/api/schemas.py b/api/schemas.py index 28a1bc1..e50d9f0 100755 --- a/api/schemas.py +++ b/api/schemas.py @@ -381,12 +381,14 @@ class VmMigrationSchema(OTPSchema): super().__init__(data=data, fields=fields) def destination_validation(self): - host_key = self.destination.value - host = host_pool.get(host_key) + hostname = self.destination.value + host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None) if not host: self.add_error("No Such Host ({}) exists".format(self.destination.value)) elif host.status != HostStatus.alive: self.add_error("Destination Host is dead") + else: + self.destination.value = host.key def validation(self): vm = vm_pool.get(self.uuid.value) diff --git a/common/classes.py b/common/classes.py index 2cea033..2eae809 100644 --- a/common/classes.py +++ b/common/classes.py @@ -1,28 +1,6 @@ -from decouple import Config, RepositoryEnv, UndefinedValueError from etcd3_wrapper import EtcdEntry -class EnvironmentVariables: - def __init__(self, env_file): - try: - env_config = Config(RepositoryEnv(env_file)) - except FileNotFoundError: - print("{} does not exists".format(env_file)) - exit(1) - else: - self.config = env_config - - def get(self, *args, **kwargs): - """Return value of var from env_vars""" - try: - value = self.config.get(*args, **kwargs) - except UndefinedValueError as e: - print(e) - exit(1) - else: - return value - - class SpecificEtcdEntryBase: def __init__(self, e: EtcdEntry): self.key = e.key diff --git a/common/helpers.py b/common/helpers.py index c0d64e4..1bdf0b4 100644 --- a/common/helpers.py +++ b/common/helpers.py @@ -1,5 +1,9 @@ import logging import socket +import requests +import json + +from ipaddress import ip_address from os.path import join as join_path @@ -37,3 +41,14 @@ def get_ipv4_address(): address = s.getsockname()[0] return address + + +def get_ipv6_address(): + try: + r = requests.get("https://api6.ipify.org?format=json") + content = json.loads(r.content.decode("utf-8")) + ip = ip_address(content["ip"]).exploded + except Exception as e: + logging.exception(e) + else: + return ip diff --git a/common/storage_handlers.py b/common/storage_handlers.py new file mode 100644 index 0000000..c74bca8 --- /dev/null +++ b/common/storage_handlers.py @@ -0,0 +1,158 @@ +import shutil +import subprocess as sp +import os +import stat + +from abc import ABC +from host import logger +from os.path import join as join_path + + +class ImageStorageHandler(ABC): + def __init__(self, image_base, vm_base): + self.image_base = image_base + self.vm_base = vm_base + + def import_image(self, image_src, image_dest, protect=False): + """Put an image at the destination + :param src: An Image file + :param dest: A path where :param src: is to be put. + :param protect: If protect is true then the dest is protect (readonly etc) + The obj must exist on filesystem. + """ + + raise NotImplementedError() + + def make_vm_image(self, image_path, path): + """Copy image from src to dest + + :param src: A path + :param dest: A path + + src and destination must be on same storage system i.e both on file system or both on CEPH etc. + """ + raise NotImplementedError() + + def resize_vm_image(self, path, size): + """Resize image located at :param path: + :param path: The file which is to be resized + :param size: Size must be in Megabytes + """ + raise NotImplementedError() + + def delete_vm_image(self, path): + raise NotImplementedError() + + def execute_command(self, command, report=True): + command = list(map(str, command)) + try: + output = sp.check_output(command, stderr=sp.PIPE) + except Exception as e: + if report: + print(e) + logger.exception(e) + return False + return True + + def vm_path_string(self, path): + raise NotImplementedError() + + def qemu_path_string(self, path): + raise NotImplementedError() + + def is_vm_image_exists(self, path): + raise NotImplementedError() + + +class FileSystemBasedImageStorageHandler(ImageStorageHandler): + def import_image(self, src, dest, protect=False): + dest = join_path(self.image_base, dest) + try: + shutil.copy(src, dest) + if protect: + os.chmod(dest, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + except Exception as e: + logger.exception(e) + return False + return True + + def make_vm_image(self, src, dest): + src = join_path(self.image_base, src) + dest = join_path(self.vm_base, dest) + try: + shutil.copy(src, dest) + except Exception as e: + logger.exception(e) + return False + return True + + def resize_vm_image(self, path, size): + path = join_path(self.vm_base, path) + command = ["qemu-img", "resize", "-f", "raw", path, "{}M".format(size)] + if self.execute_command(command): + return True + else: + self.delete_vm_image(path) + return False + + def delete_vm_image(self, path): + path = join_path(self.vm_base, path) + try: + os.remove(path) + except Exception as e: + logger.exception(e) + return False + return True + + def vm_path_string(self, path): + return join_path(self.vm_base, path) + + def qemu_path_string(self, path): + return self.vm_path_string(path) + + def is_vm_image_exists(self, path): + path = join_path(self.vm_base, path) + command = ["ls", path] + return self.execute_command(command, report=False) + + +class CEPHBasedImageStorageHandler(ImageStorageHandler): + def import_image(self, src, dest, protect=False): + dest = join_path(self.image_base, dest) + command = ["rbd", "import", src, dest] + if protect: + snap_create_command = ["rbd", "snap", "create", "{}@protected".format(dest)] + snap_protect_command = ["rbd", "snap", "protect", "{}@protected".format(dest)] + + return self.execute_command(command) and self.execute_command(snap_create_command) and\ + self.execute_command(snap_protect_command) + + return self.execute_command(command) + + def make_vm_image(self, src, dest): + src = join_path(self.image_base, src) + dest = join_path(self.vm_base, dest) + + command = ["rbd", "clone", "{}@protected".format(src), dest] + return self.execute_command(command) + + def resize_vm_image(self, path, size): + path = join_path(self.vm_base, path) + command = ["rbd", "resize", path, "--size", size] + return self.execute_command(command) + + def delete_vm_image(self, path): + path = join_path(self.vm_base, path) + command = ["rbd", "rm", path] + return self.execute_command(command) + + def vm_path_string(self, path): + return join_path(self.vm_base, path) + + def qemu_path_string(self, path): + return "rbd:{}".format(self.vm_path_string(path)) + + def is_vm_image_exists(self, path): + path = join_path(self.vm_base, path) + command = ["rbd", "info", path] + return self.execute_command(command, report=False) diff --git a/common/vm.py b/common/vm.py index c778fac..c1c1928 100644 --- a/common/vm.py +++ b/common/vm.py @@ -60,10 +60,6 @@ class VMEntry(SpecificEtcdEntryBase): self.log = self.log[:5] self.log.append("{} - {}".format(datetime.now().isoformat(), msg)) - @property - def path(self): - return "rbd:uservms/{}".format(self.uuid) - class VmPool: def __init__(self, etcd_client, vm_prefix): diff --git a/config.py b/config.py index 5729fed..1048320 100644 --- a/config.py +++ b/config.py @@ -1,14 +1,16 @@ from etcd3_wrapper import Etcd3Wrapper -from common.classes import EnvironmentVariables from common.host import HostPool from common.request import RequestPool from common.vm import VmPool +from common.storage_handlers import FileSystemBasedImageStorageHandler, CEPHBasedImageStorageHandler +from decouple import Config, RepositoryEnv -env_vars = EnvironmentVariables('/etc/ucloud/ucloud.conf') + +env_vars = Config(RepositoryEnv('/etc/ucloud/ucloud.conf')) etcd_wrapper_args = () -etcd_wrapper_kwargs = {"host": env_vars.get("ETCD_URL")} +etcd_wrapper_kwargs = {'host': env_vars.get('ETCD_URL')} etcd_client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs) @@ -17,3 +19,12 @@ vm_pool = VmPool(etcd_client, env_vars.get('VM_PREFIX')) request_pool = RequestPool(etcd_client, env_vars.get('REQUEST_PREFIX')) running_vms = [] + +__storage_backend = env_vars.get("STORAGE_BACKEND") +if __storage_backend == "filesystem": + image_storage_handler = FileSystemBasedImageStorageHandler(vm_base=env_vars.get("VM_DIR"), + image_base=env_vars.get("IMAGE_DIR")) +elif __storage_backend == "ceph": + image_storage_handler = CEPHBasedImageStorageHandler(vm_base="ssd", image_base="ssd") +else: + raise Exception("Unknown Image Storage Handler") diff --git a/docs/source/diagram-code/ucloud b/docs/source/diagram-code/ucloud new file mode 100644 index 0000000..5e73b3d --- /dev/null +++ b/docs/source/diagram-code/ucloud @@ -0,0 +1,44 @@ +graph LR + style ucloud fill:#FFD2FC + style cron fill:#FFF696 + style infrastructure fill:#BDF0FF + subgraph ucloud[ucloud] + ucloud-cli[CLI]-->ucloud-api[API] + ucloud-api-->ucloud-scheduler[Scheduler] + ucloud-api-->ucloud-imagescanner[Image Scanner] + ucloud-api-->ucloud-host[Host] + ucloud-scheduler-->ucloud-host + + ucloud-host-->need-networking{VM need Networking} + need-networking-->|Yes| networking-scripts + need-networking-->|No| VM[Virtual Machine] + need-networking-->|SLAAC?| radvd + networking-scripts-->VM + networking-scripts--Create Networks Devices-->networking-scripts + subgraph cron[Cron Jobs] + ucloud-imagescanner + ucloud-filescanner[File Scanner] + ucloud-filescanner--Track User files-->ucloud-filescanner + end + subgraph infrastructure[Infrastructure] + radvd + etcd + networking-scripts[Networking Scripts] + ucloud-imagescanner-->image-store + image-store{Image Store} + image-store-->|CEPH| ceph + image-store-->|FILE| file-system + ceph[CEPH] + file-system[File System] + end +subgraph virtual-machine[Virtual Machine] + VM + VM-->ucloud-init + +end + +subgraph metadata-group[Metadata Server] +metadata-->ucloud-init +ucloud-init<-->metadata +end +end diff --git a/docs/source/images/ucloud.svg b/docs/source/images/ucloud.svg new file mode 100644 index 0000000..f7e33f8 --- /dev/null +++ b/docs/source/images/ucloud.svg @@ -0,0 +1,494 @@ +
ucloud
Cron Jobs
Infrastructure
Virtual Machine
Metadata Server
Yes
No
SLAAC?
Create Networks Devices
Track User files
CEPH
FILE
API
CLI
Scheduler
Image Scanner
Host
VM need Networking
Networking Scripts
Virtual Machine
radvd
ucloud-init
metadata
etcd
Image Store
CEPH
File System
File Scanner
\ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 0307de8..6443af1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,7 +1,7 @@ .. ucloud documentation master file, created by -sphinx-quickstart on Mon Nov 11 19:08:16 2019. -You can adapt this file completely to your liking, but it should at least -contain the root `toctree` directive. + sphinx-quickstart on Mon Nov 11 19:08:16 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. Welcome to ucloud's documentation! ================================== @@ -15,7 +15,9 @@ Welcome to ucloud's documentation! usage/usage-for-admins usage/usage-for-users usage/how-to-create-an-os-image-for-ucloud + theory/summary misc/todo + troubleshooting/installation-troubleshooting Indices and tables ================== diff --git a/docs/source/introduction/installation.rst b/docs/source/introduction/installation.rst index b271ab9..0f36714 100644 --- a/docs/source/introduction/installation.rst +++ b/docs/source/introduction/installation.rst @@ -135,7 +135,7 @@ You just need to update **AUTH_SEED** in the below code to match your auth's see ETCD_URL=localhost - WITHOUT_CEPH=True + STORAGE_BACKEND=filesystem BASE_DIR=/var/www IMAGE_DIR=/var/image @@ -195,3 +195,35 @@ profile e.g *~/.profile* alias uotp='cd /root/uotp/ && pipenv run python app.py' and run :code:`source ~/.profile` + + +Arch +----- + +.. code-block:: sh + + # Update/Upgrade + pacman -Syuu + pacman -S python3 qemu chrony python-pip + + pip3 install pipenv + + cat > /etc/chrony.conf << EOF + server 0.arch.pool.ntp.org + server 1.arch.pool.ntp.org + server 2.arch.pool.ntp.org + EOF + + systemctl start chronyd + systemctl enable chronyd + + # Create non-root user and allow it sudo access + # without password + useradd -m ucloud + echo "ucloud ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + + sudo -H -u ucloud bash -c 'cd /home/ucloud && git clone https://aur.archlinux.org/yay.git && cd yay && makepkg -si' + sudo -H -u ucloud bash -c 'yay -S etcd' + + systemctl start etcd + systemctl enable etcd \ No newline at end of file diff --git a/docs/source/misc/todo.rst b/docs/source/misc/todo.rst index 3b85e89..4f7fde4 100644 --- a/docs/source/misc/todo.rst +++ b/docs/source/misc/todo.rst @@ -1,6 +1,18 @@ TODO ==== +* **Check Authentication:** Nico reported that some endpoints + even work without providing token. (ListUserVM) + +* Put overrides for **IMAGE_BASE**, **VM_BASE** in **ImageStorageHandler**. + +* Put "Always use only one StorageHandler" + +* Create Network Manager + * That would handle tasks like up/down an interface + * Create VXLANs, Bridges, TAPs. + * Remove them when they are no longer used. + * Check for :code:`etcd3.exceptions.ConnectionFailedError` when calling some etcd operation to avoid crashing whole application. * Throw KeyError instead of returning None when some key is not found in etcd. diff --git a/docs/source/theory/summary.rst b/docs/source/theory/summary.rst new file mode 100644 index 0000000..62f6200 --- /dev/null +++ b/docs/source/theory/summary.rst @@ -0,0 +1,98 @@ +Summary +======= + +.. image:: /images/ucloud.svg + +.. code-block:: + + + | + | + | + +------------------------- + | | + | |```````````````|```````````````| + | | | | + | + | | + | | + +------------------------- + | + | + | + Virtual Machine------------ + + + +**ucloud-cli** interact with **ucloud-api** to do the following operations: + +- Create/Delete/Start/Stop/Migrate/Probe (Status of) Virtual Machines +- Create/Delete Networks +- Add/Get/Delete SSH Keys +- Create OS Image out of a file (tracked by file_scanner) +- List User's files/networks/vms +- Add Host + +ucloud can currently stores OS-Images on + +* File System +* `CEPH `_ + + +**ucloud-api** in turns creates appropriate Requests which are taken +by suitable components of ucloud. For Example, if user uses ucloud-cli +to create a VM, **ucloud-api** would create a **ScheduleVMRequest** containing +things like pointer to VM's entry which have specs, networking +configuration of VMs. + +**ucloud-scheduler** accepts requests for VM's scheduling and +migration. It finds a host from a list of available host on which +the incoming VM can run and schedules it on that host. + +**ucloud-host** runs on host servers i.e servers that +actually runs virtual machines, accepts requests +intended only for them. It creates/delete/start/stop/migrate +virtual machines. It also arrange network resources needed for the +incoming VM. + +**ucloud-filescanner** keep tracks of user's files which would be needed +later for creating OS Images. + +**ucloud-imagescanner** converts images files from qcow2 format to raw +format which would then be imported into image store. + +* In case of **File System**, the converted image would be copied to + :file:`/var/image/` or the path referred by :envvar:`IMAGE_PATH` environement variable + mentioned in :file:`/etc/ucloud/ucloud.conf`. + +* In case of **CEPH**, the converted image would be imported into + specific pool (it depends on the image store in which the image + belongs) of CEPH Block Storage. + +**ucloud-metadata** provides metadata which is used to contextualize +VMs. When, the VM is created, it is just clone (duplicate) of OS +image from which it is created. So, to differentiate between my +VM and your VM, the VM need to be contextualized. This works +like the following + +.. note:: + Actually, ucloud-init makes the GET request. You can also try it + yourself using curl but ucloud-init does that for yourself. + +* VM make a GET requests http://metadata which resolves to actual + address of metadata server. The metadata server looks at the IPv6 + Address of the requester and extracts the MAC Address which is possible + because the IPv6 address is + `IPv6 EUI-64 `_. + Metadata use this MAC address to find the actual VM to which it belongs + and its owner, ssh-keys and much more. Then, metadata return these + details back to the calling VM in JSON format. These details are + then used be the **ucloud-init** which is explained next. + +**ucloud-init** gets the metadata from **ucloud-metadata** to contextualize +the VM. Specifically, it gets owner's ssh keys (or any other keys the +owner of VM added to authorized keys for this VM) and put them to ssh +server's (installed on VM) authorized keys so that owner can access +the VM using ssh. It also install softwares that are needed for correct +behavior of VM e.g rdnssd (needed for `SLAAC `_). + diff --git a/docs/source/troubleshooting/installation-troubleshooting.rst b/docs/source/troubleshooting/installation-troubleshooting.rst new file mode 100644 index 0000000..4d9dda4 --- /dev/null +++ b/docs/source/troubleshooting/installation-troubleshooting.rst @@ -0,0 +1,24 @@ +Installation Troubleshooting +============================ + +etcd doesn't start +------------------ + +.. code-block:: sh + + [root@archlinux ~]# systemctl start etcd + Job for etcd.service failed because the control process exited with error code. + See "systemctl status etcd.service" and "journalctl -xe" for details + +possible solution +~~~~~~~~~~~~~~~~~ +Try :code:`cat /etc/hosts` if its output contain the following + +.. code-block:: sh + + 127.0.0.1 localhost.localdomain localhost + ::1 localhost localhost.localdomain + + +then unfortunately, we can't help you. But, if it doesn't contain the +above you can put the above in :file:`/etc/hosts` to fix the issue. diff --git a/filescanner/__init__.py b/filescanner/__init__.py index e69de29..eea436a 100644 --- a/filescanner/__init__.py +++ b/filescanner/__init__.py @@ -0,0 +1,3 @@ +import logging + +logger = logging.getLogger(__name__) diff --git a/filescanner/main.py b/filescanner/main.py index d1ffa46..b80169c 100755 --- a/filescanner/main.py +++ b/filescanner/main.py @@ -6,7 +6,7 @@ import time from uuid import uuid4 from etcd3_wrapper import Etcd3Wrapper - +from filescanner import logger from config import env_vars @@ -17,9 +17,10 @@ def getxattr(file, attr): value = sp.check_output(['getfattr', file, '--name', attr, '--only-values', - '--absolute-names']) + '--absolute-names'], stderr=sp.DEVNULL) value = value.decode("utf-8") - except sp.CalledProcessError: + except sp.CalledProcessError as e: + logger.exception(e) value = None return value @@ -63,7 +64,7 @@ try: sp.check_output(['which', 'getfattr']) sp.check_output(['which', 'setfattr']) except Exception as e: - print(e) + logger.exception(e) print('Make sure you have getfattr and setfattr available') exit(1) diff --git a/host/helper.py b/host/helper.py new file mode 100644 index 0000000..edcb82d --- /dev/null +++ b/host/helper.py @@ -0,0 +1,13 @@ +import socket +from contextlib import closing + + +def find_free_port(): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + try: + s.bind(('', 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + except Exception: + return None + else: + return s.getsockname()[1] diff --git a/host/main.py b/host/main.py index 5b5e620..f512fee 100755 --- a/host/main.py +++ b/host/main.py @@ -1,6 +1,5 @@ import argparse import multiprocessing as mp -import os import time from etcd3_wrapper import Etcd3Wrapper @@ -10,13 +9,17 @@ from config import (vm_pool, request_pool, etcd_client, running_vms, etcd_wrapper_args, etcd_wrapper_kwargs, HostPool, env_vars) + +from .helper import find_free_port from . import virtualmachine from host import logger -def update_heartbeat(host): + +def update_heartbeat(hostname): + """Update Last HeartBeat Time for :param hostname: in etcd""" client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs) host_pool = HostPool(client, env_vars.get('HOST_PREFIX')) - this_host = next(filter(lambda h: h.hostname == host, host_pool.hosts), None) + this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None) while True: this_host.update_heartbeat() @@ -35,17 +38,22 @@ def maintenance(host): # whether this host vm is successfully migrated. If yes # then we shutdown "vm1" on this host. + to_be_removed = [] for running_vm in running_vms: with vm_pool.get_put(running_vm.key) as vm_entry: if vm_entry.hostname != host.key and not vm_entry.in_migration: running_vm.handle.shutdown() - vm_entry.add_log("VM on source host shutdown.") + logger.info("VM migration not completed successfully.") + to_be_removed.append(running_vm) + + for r in to_be_removed: + running_vms.remove(r) + # To check vm running according to etcd entries alleged_running_vms = vm_pool.by_status("RUNNING", vm_pool.by_host(host.key)) for vm_entry in alleged_running_vms: _vm = virtualmachine.get_vm(running_vms, vm_entry.key) - # Whether, the allegedly running vm is in our # running_vms list or not if it is said to be # running on this host but it is not then we @@ -64,10 +72,6 @@ def maintenance(host): def main(hostname): - assert env_vars.get('WITHOUT_CEPH') and os.path.isdir(env_vars.get('VM_DIR')), ( - "You have set env_vars.get('WITHOUT_CEPH') to True. So, the vm directory mentioned" - " in .env file must exists. But, it don't.") - heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,)) host_pool = HostPool(etcd_client, env_vars.get('HOST_PREFIX')) @@ -99,7 +103,6 @@ def main(hostname): request_event = RequestEntry(request_event) if request_event.type == "TIMEOUT": - logger.info("Timeout Event") maintenance(host) continue @@ -121,7 +124,7 @@ def main(hostname): virtualmachine.delete(vm_entry) elif request_event.type == RequestType.InitVMMigration: - virtualmachine.init_migration(vm_entry, host.key) + virtualmachine.start(vm_entry, host.key, find_free_port()) elif request_event.type == RequestType.TransferVM: virtualmachine.transfer(request_event) diff --git a/host/qmp/__init__.py b/host/qmp/__init__.py index ba15838..775b397 100755 --- a/host/qmp/__init__.py +++ b/host/qmp/__init__.py @@ -304,6 +304,7 @@ class QEMUMachine(object): LOG.debug('Command: %r', ' '.join(self._qemu_full_args)) if self._iolog: LOG.debug('Output: %r', self._iolog) + raise Exception(self._iolog) raise def _launch(self): diff --git a/host/virtualmachine.py b/host/virtualmachine.py index 80e9846..5000410 100755 --- a/host/virtualmachine.py +++ b/host/virtualmachine.py @@ -4,27 +4,28 @@ # For QEMU Monitor Protocol Commands Information, See # https://qemu.weilnetz.de/doc/qemu-doc.html#pcsys_005fmonitor -import errno import os import random import subprocess as sp import tempfile import time + from functools import wraps -from os.path import join +from os.path import join as join_path from string import Template from typing import Union import bitmath import sshtunnel -from common.helpers import get_ipv4_address +from common.helpers import get_ipv6_address from common.request import RequestEntry, RequestType from common.vm import VMEntry, VMStatus -from config import etcd_client, request_pool, running_vms, vm_pool, env_vars +from config import etcd_client, request_pool, running_vms, vm_pool, env_vars, image_storage_handler from . import qmp from host import logger + class VM: def __init__(self, key, handle, vnc_socket_file): self.key = key # type: str @@ -106,24 +107,16 @@ def update_radvd_conf(etcd_client): sp.check_output(['systemctl', 'restart', 'radvd']) -def get_start_command_args( - vm_entry, vnc_sock_filename: str, migration=False, migration_port=4444, -): +def get_start_command_args(vm_entry, vnc_sock_filename: str, migration=False, migration_port=None): threads_per_core = 1 - vm_memory = int(bitmath.parse_string(vm_entry.specs["ram"]).to_MB()) + vm_memory = int(bitmath.parse_string_unsafe(vm_entry.specs["ram"]).to_MB()) vm_cpus = int(vm_entry.specs["cpu"]) vm_uuid = vm_entry.uuid vm_networks = vm_entry.network - if env_vars.get('WITHOUT_CEPH'): - command = "-drive file={},format=raw,if=virtio,cache=none".format( - os.path.join(env_vars.get('VM_DIR'), vm_uuid) - ) - else: - command = "-drive file=rbd:uservms/{},format=raw,if=virtio,cache=none".format( - vm_uuid - ) - + command = "-drive file={},format=raw,if=virtio,cache=none".format( + image_storage_handler.qemu_path_string(vm_uuid) + ) command += " -device virtio-rng-pci -vnc unix:{}".format(vnc_sock_filename) command += " -m {} -smp cores={},threads={}".format( vm_memory, vm_cpus, threads_per_core @@ -131,7 +124,7 @@ def get_start_command_args( command += " -name {}".format(vm_uuid) if migration: - command += " -incoming tcp:0:{}".format(migration_port) + command += " -incoming tcp:[::]:{}".format(migration_port) tap = None for network_and_mac in vm_networks: @@ -154,7 +147,7 @@ def get_start_command_args( return command.split(" ") -def create_vm_object(vm_entry, migration=False, migration_port=4444): +def create_vm_object(vm_entry, migration=False, migration_port=None): # NOTE: If migration suddenly stop working, having different # VNC unix filename on source and destination host can # be a possible cause of it. @@ -198,61 +191,19 @@ def need_running_vm(func): def create(vm_entry: VMEntry): - vm_hdd = int(bitmath.parse_string(vm_entry.specs["os-ssd"]).to_MB()) - - if env_vars.get('WITHOUT_CEPH'): - _command_to_create = [ - "cp", - os.path.join(env_vars.get('IMAGE_DIR'), vm_entry.image_uuid), - os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid), - ] - - _command_to_extend = [ - "qemu-img", - "resize", - "-f", "raw", - os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid), - "{}M".format(vm_hdd), - ] + if image_storage_handler.is_vm_image_exists(vm_entry.uuid): + # File Already exists. No Problem Continue + logger.debug("Image for vm %s exists", vm_entry.uuid) else: - _command_to_create = [ - "rbd", - "clone", - "images/{}@protected".format(vm_entry.image_uuid), - "uservms/{}".format(vm_entry.uuid), - ] - - _command_to_extend = [ - "rbd", - "resize", - "uservms/{}".format(vm_entry.uuid), - "--size", - vm_hdd, - ] - - try: - sp.check_output(_command_to_create) - except sp.CalledProcessError as e: - if e.returncode == errno.EEXIST: - logger.debug("Image for vm %s exists", vm_entry.uuid) - # File Already exists. No Problem Continue - return - - # This exception catches all other exceptions - # i.e FileNotFound (BaseImage), pool Does Not Exists etc. - logger.exception(e) - - vm_entry.status = "ERROR" - else: - try: - sp.check_output(_command_to_extend) - except Exception as e: - logger.exception(e) - else: - logger.info("New VM Created") + vm_hdd = int(bitmath.parse_string_unsafe(vm_entry.specs["os-ssd"]).to_MB()) + if image_storage_handler.make_vm_image(src=vm_entry.image_uuid, dest=vm_entry.uuid): + if not image_storage_handler.resize_vm_image(path=vm_entry.uuid, size=vm_hdd): + vm_entry.status = "ERROR" + else: + logger.info("New VM Created") -def start(vm_entry: VMEntry): +def start(vm_entry: VMEntry, destination_host_key=None, migration_port=None): _vm = get_vm(running_vms, vm_entry.key) # VM already running. No need to proceed further. @@ -260,8 +211,12 @@ def start(vm_entry: VMEntry): logger.info("VM %s already running", vm_entry.uuid) return else: - create(vm_entry) - launch_vm(vm_entry) + if destination_host_key: + launch_vm(vm_entry, migration=True, migration_port=migration_port, + destination_host_key=destination_host_key) + else: + create(vm_entry) + launch_vm(vm_entry) @need_running_vm @@ -278,18 +233,9 @@ def stop(vm_entry): def delete(vm_entry): logger.info("Deleting VM | %s", vm_entry) stop(vm_entry) - path_without_protocol = vm_entry.path[vm_entry.path.find(":") + 1:] - if env_vars.get('WITHOUT_CEPH'): - vm_deletion_command = ["rm", os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid)] - else: - vm_deletion_command = ["rbd", "rm", path_without_protocol] - - try: - sp.check_output(vm_deletion_command) - except Exception as e: - logger.exception(e) - else: + r_status = image_storage_handler.delete_vm_image(vm_entry.uuid) + if r_status: etcd_client.client.delete(vm_entry.key) @@ -301,15 +247,16 @@ def transfer(request_event): _host, _port = request_event.parameters["host"], request_event.parameters["port"] _uuid = request_event.uuid _destination = request_event.destination_host_key - vm = get_vm(running_vms, join(env_vars.get('VM_PREFIX'), _uuid)) + vm = get_vm(running_vms, join_path(env_vars.get('VM_PREFIX'), _uuid)) if vm: tunnel = sshtunnel.SSHTunnelForwarder( - (_host, 22), + _host, ssh_username=env_vars.get("ssh_username"), ssh_pkey=env_vars.get("ssh_pkey"), - ssh_private_key_password=env_vars.get("ssh_private_key_password"), remote_bind_address=("127.0.0.1", _port), + ssh_proxy_enabled=True, + ssh_proxy=(_host, 22) ) try: tunnel.start() @@ -317,7 +264,7 @@ def transfer(request_event): logger.exception("Couldn't establish connection to (%s, 22)", _host) else: vm.handle.command( - "migrate", uri="tcp:{}:{}".format(_host, tunnel.local_bind_port) + "migrate", uri="tcp:0.0.0.0:{}".format(tunnel.local_bind_port) ) status = vm.handle.command("query-migrate")["status"] @@ -340,38 +287,22 @@ def transfer(request_event): tunnel.close() -def init_migration(vm_entry, destination_host_key): - # This function would run on destination host i.e host on which the vm - # would be transferred after migration. - # This host would be responsible for starting VM that would receive - # state of VM running on source host. - - _vm = get_vm(running_vms, vm_entry.key) - - if _vm: - # VM already running. No need to proceed further. - logger.info("%s Already running", _vm.key) - return - - launch_vm(vm_entry, migration=True, migration_port=4444, - destination_host_key=destination_host_key) - - def launch_vm(vm_entry, migration=False, migration_port=None, destination_host_key=None): logger.info("Starting %s", vm_entry.key) vm = create_vm_object(vm_entry, migration=migration, migration_port=migration_port) try: vm.handle.launch() - except Exception as e: - logger.exception(e) + except Exception: + logger.exception("Error Occured while starting VM") + vm.handle.shutdown() if migration: # We don't care whether MachineError or any other error occurred - vm.handle.shutdown() + pass else: # Error during typical launch of a vm - vm_entry.add_log("Error Occurred while starting VM") + vm.handle.shutdown() vm_entry.declare_killed() vm_pool.put(vm_entry) else: @@ -383,7 +314,7 @@ def launch_vm(vm_entry, migration=False, migration_port=None, destination_host_k r = RequestEntry.from_scratch( type=RequestType.TransferVM, hostname=vm_entry.hostname, - parameters={"host": get_ipv4_address(), "port": 4444}, + parameters={"host": get_ipv6_address(), "port": migration_port}, uuid=vm_entry.uuid, destination_host_key=destination_host_key, request_prefix=env_vars.get("REQUEST_PREFIX") diff --git a/imagescanner/main.py b/imagescanner/main.py index 97da589..4b41642 100755 --- a/imagescanner/main.py +++ b/imagescanner/main.py @@ -1,9 +1,9 @@ import json import os import subprocess -import sys -from config import etcd_client, env_vars +from os.path import join as join_path +from config import etcd_client, env_vars, image_storage_handler from imagescanner import logger @@ -20,20 +20,6 @@ def qemu_img_type(path): def main(): - # If you are using env_vars.get('WITHOUT_CEPH') FLAG in .env - # then please make sure that env_vars.get('IMAGE_DIR') directory - # exists otherwise this script would fail - if env_vars.get('WITHOUT_CEPH') and not os.path.isdir(env_vars.get('IMAGE_DIR')): - print("You have set env_vars.get('WITHOUT_CEPH') to True. So," - "the {} must exists. But, it don't".format(env_vars.get('IMAGE_DIR'))) - sys.exit(1) - - try: - subprocess.check_output(['which', 'qemu-img']) - except Exception: - print("qemu-img missing") - sys.exit(1) - # We want to get images entries that requests images to be created images = etcd_client.get_prefix(env_vars.get('IMAGE_PREFIX'), value_in_json=True) images_to_be_created = list(filter(lambda im: im.value['status'] == 'TO_BE_CREATED', images)) @@ -44,7 +30,7 @@ def main(): image_owner = image.value['owner'] image_filename = image.value['filename'] image_store_name = image.value['store_name'] - image_full_path = os.path.join(env_vars.get('BASE_DIR'), image_owner, image_filename) + image_full_path = join_path(env_vars.get('BASE_DIR'), image_owner, image_filename) image_stores = etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True) user_image_store = next(filter( @@ -58,43 +44,25 @@ def main(): logger.exception(e) else: # At least our basic data is available - qemu_img_convert_command = ["qemu-img", "convert", "-f", "qcow2", "-O", "raw", image_full_path, "image.raw"] - if env_vars.get('WITHOUT_CEPH'): - image_import_command = ["mv", "image.raw", os.path.join(env_vars.get('IMAGE_DIR'), image_uuid)] - snapshot_creation_command = ["true"] - snapshot_protect_command = ["true"] - else: - image_import_command = ["rbd", "import", "image.raw", - "{}/{}".format(image_store_pool, image_uuid)] - snapshot_creation_command = ["rbd", "snap", "create", - "{}/{}@protected".format(image_store_pool, image_uuid)] - snapshot_protect_command = ["rbd", "snap", "protect", - "{}/{}@protected".format(image_store_pool, image_uuid)] - - # First check whether the image is qcow2 - if qemu_img_type(image_full_path) == "qcow2": try: # Convert .qcow2 to .raw subprocess.check_output(qemu_img_convert_command) - - # Import image either to ceph/filesystem - subprocess.check_output(image_import_command) - - # Create and Protect Snapshot - subprocess.check_output(snapshot_creation_command) - subprocess.check_output(snapshot_protect_command) - except Exception as e: logger.exception(e) - else: - # Everything is successfully done - image.value["status"] = "CREATED" - etcd_client.put(image.key, json.dumps(image.value)) + # Import and Protect + r_status = image_storage_handler.import_image(src="image.raw", + dest=image_uuid, + protect=True) + if r_status: + # Everything is successfully done + image.value["status"] = "CREATED" + etcd_client.put(image.key, json.dumps(image.value)) + else: # The user provided image is either not found or of invalid format image.value["status"] = "INVALID_IMAGE" diff --git a/sanity_checks.py b/sanity_checks.py new file mode 100644 index 0000000..2c645a5 --- /dev/null +++ b/sanity_checks.py @@ -0,0 +1,33 @@ +import sys +import subprocess as sp + +from os.path import isdir +from config import env_vars + + +def check(): + ######################### + # ucloud-image-scanner # + ######################### + if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('IMAGE_DIR')): + print("You have set STORAGE_BACKEND to filesystem. So," + "the {} must exists. But, it don't".format(env_vars.get('IMAGE_DIR'))) + sys.exit(1) + + try: + sp.check_output(['which', 'qemu-img']) + except Exception: + print("qemu-img missing") + sys.exit(1) + + ############### + # ucloud-host # + ############### + + if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('VM_DIR')): + print("You have set STORAGE_BACKEND to filesystem. So, the vm directory mentioned" + " in .env file must exists. But, it don't.") + sys.exit(1) + +if __name__ == "__main__": + check() \ No newline at end of file diff --git a/scheduler/helper.py b/scheduler/helper.py index 81b5869..79bfd70 100755 --- a/scheduler/helper.py +++ b/scheduler/helper.py @@ -23,16 +23,16 @@ def remaining_resources(host_specs, vms_specs): for component in _vms_specs: if isinstance(_vms_specs[component], str): - _vms_specs[component] = int(bitmath.parse_string(_vms_specs[component]).to_MB()) + _vms_specs[component] = int(bitmath.parse_string_unsafe(_vms_specs[component]).to_MB()) elif isinstance(_vms_specs[component], list): - _vms_specs[component] = map(lambda x: int(bitmath.parse_string(x).to_MB()), _vms_specs[component]) + _vms_specs[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _vms_specs[component]) _vms_specs[component] = reduce(lambda x, y: x + y, _vms_specs[component], 0) for component in _remaining: if isinstance(_remaining[component], str): - _remaining[component] = int(bitmath.parse_string(_remaining[component]).to_MB()) + _remaining[component] = int(bitmath.parse_string_unsafe(_remaining[component]).to_MB()) elif isinstance(_remaining[component], list): - _remaining[component] = map(lambda x: int(bitmath.parse_string(x).to_MB()), _remaining[component]) + _remaining[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _remaining[component]) _remaining[component] = reduce(lambda x, y: x + y, _remaining[component], 0) _remaining.subtract(_vms_specs) diff --git a/scheduler/main.py b/scheduler/main.py index 507ac44..1d8dc44 100755 --- a/scheduler/main.py +++ b/scheduler/main.py @@ -23,8 +23,6 @@ def main(): ]: for request_event in request_iterator: request_entry = RequestEntry(request_event) - logger.debug("%s, %s", request_entry.key, request_entry.value) - # Never Run time critical mechanism inside timeout # mechanism because timeout mechanism only comes # when no other event is happening. It means under @@ -33,10 +31,10 @@ def main(): # Detect hosts that are dead and set their status # to "DEAD", and their VMs' status to "KILLED" - logger.debug("TIMEOUT event occured") dead_hosts = dead_host_detection() - logger.debug("Dead hosts: %s", dead_hosts) - dead_host_mitigation(dead_hosts) + if dead_hosts: + logger.debug("Dead hosts: %s", dead_hosts) + dead_host_mitigation(dead_hosts) # If there are VMs that weren't assigned a host # because there wasn't a host available which @@ -52,6 +50,8 @@ def main(): request_pool.put(r) elif request_entry.type == RequestType.ScheduleVM: + logger.debug("%s, %s", request_entry.key, request_entry.value) + vm_entry = vm_pool.get(request_entry.uuid) if vm_entry is None: logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid)) @@ -67,7 +67,7 @@ def main(): hosts=[host_pool.get(request_entry.destination)]) except NoSuitableHostFound: logger.info("Requested destination host doesn't have enough capacity" - "to hold %s", vm_entry.uuid) + "to hold %s" % vm_entry.uuid) else: r = RequestEntry.from_scratch(type=RequestType.InitVMMigration, uuid=request_entry.uuid, diff --git a/ucloud.py b/ucloud.py index 8774fa3..28979b3 100644 --- a/ucloud.py +++ b/ucloud.py @@ -3,6 +3,7 @@ import multiprocessing as mp import logging from os.path import join as join_path +from sanity_checks import check if __name__ == "__main__": arg_parser = argparse.ArgumentParser(prog='ucloud', @@ -21,30 +22,36 @@ if __name__ == "__main__": format="%(name)s %(asctime)s: %(levelname)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S", ) + try: + check() - if args.component == 'api': - from api.main import main + if args.component == 'api': + from api.main import main - main() - elif args.component == 'host': - from host.main import main + main() + elif args.component == 'host': + from host.main import main - hostname = args.component_args - mp.set_start_method('spawn') - main(*hostname) - elif args.component == 'scheduler': - from scheduler.main import main + hostname = args.component_args + mp.set_start_method('spawn') + main(*hostname) + elif args.component == 'scheduler': + from scheduler.main import main - main() - elif args.component == 'filescanner': - from filescanner.main import main + main() + elif args.component == 'filescanner': + from filescanner.main import main - main() - elif args.component == 'imagescanner': - from imagescanner.main import main + main() + elif args.component == 'imagescanner': + from imagescanner.main import main - main() - elif args.component == 'metadata': - from metadata.main import main + main() + elif args.component == 'metadata': + from metadata.main import main - main() + main() + + except Exception as e: + logging.exception(e) + print(e) \ No newline at end of file