From d13a4bcc3778b5032f117857587f2b7857c56096 Mon Sep 17 00:00:00 2001 From: meow Date: Mon, 30 Dec 2019 20:05:12 +0500 Subject: [PATCH] Remove pending vm handling mechanism from scheduler + fixed issue that update VM's hostname even on migration failure --- scripts/ucloud | 2 +- ucloud/api/main.py | 4 +- ucloud/host/main.py | 93 +++++++++++------------------------ ucloud/host/virtualmachine.py | 14 +++--- ucloud/scheduler/main.py | 21 +------- 5 files changed, 40 insertions(+), 94 deletions(-) diff --git a/scripts/ucloud b/scripts/ucloud index 9d05118..05e47a5 100755 --- a/scripts/ucloud +++ b/scripts/ucloud @@ -24,7 +24,7 @@ sys.excepthook = exception_hook if __name__ == '__main__': # Setting up root logger logger = logging.getLogger() - logger.setLevel(logging.INFO) + logger.setLevel(logging.DEBUG) syslog_handler = SysLogHandler(address='/dev/log') syslog_handler.setLevel(logging.DEBUG) diff --git a/ucloud/api/main.py b/ucloud/api/main.py index d4cdbe9..85133df 100644 --- a/ucloud/api/main.py +++ b/ucloud/api/main.py @@ -567,7 +567,7 @@ def main(): settings["etcd"]["image_store_prefix"], value_in_json=True ) ) - if len(image_stores) == 0: + if not image_stores: data = { "is_public": True, "type": "ceph", @@ -583,7 +583,7 @@ def main(): json.dumps(data), ) - app.run(host="::", debug=True) + app.run(host="::", debug=False) if __name__ == "__main__": diff --git a/ucloud/host/main.py b/ucloud/host/main.py index 88dfb7c..ed734b5 100755 --- a/ucloud/host/main.py +++ b/ucloud/host/main.py @@ -28,10 +28,8 @@ def maintenance(host): vmm = VMM() running_vms = vmm.discover() for vm_uuid in running_vms: - if ( - vmm.is_running(vm_uuid) - and vmm.get_status(vm_uuid) == "running" - ): + if vmm.is_running(vm_uuid) and vmm.get_status(vm_uuid) == "running": + logger.debug('VM {} is running on {}'.format(vm_uuid, host)) vm = shared.vm_pool.get( join_path(settings["etcd"]["vm_prefix"], vm_uuid) ) @@ -43,32 +41,18 @@ def maintenance(host): def main(hostname): host_pool = shared.host_pool - host = next( - filter(lambda h: h.hostname == hostname, host_pool.hosts), None - ) - assert host is not None, "No such host with name = {}".format( - hostname - ) + host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None) + assert host is not None, "No such host with name = {}".format(hostname) try: - heartbeat_updating_process = mp.Process( - target=update_heartbeat, args=(hostname,) - ) + heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,)) heartbeat_updating_process.start() except Exception as e: - raise Exception( - "ucloud-host heartbeat updating mechanism is not working" - ) from e + raise Exception("ucloud-host heartbeat updating mechanism is not working") from e for events_iterator in [ - shared.etcd_client.get_prefix( - settings["etcd"]["request_prefix"], value_in_json=True - ), - shared.etcd_client.watch_prefix( - settings["etcd"]["request_prefix"], - timeout=10, - value_in_json=True, - ), + shared.etcd_client.get_prefix(settings["etcd"]["request_prefix"], value_in_json=True), + shared.etcd_client.watch_prefix(settings["etcd"]["request_prefix"], timeout=10, value_in_json=True) ]: for request_event in events_iterator: request_event = RequestEntry(request_event) @@ -76,52 +60,35 @@ def main(hostname): if request_event.type == "TIMEOUT": maintenance(host.key) - if request_event.hostname == host.key: - logger.debug("VM Request: %s", request_event) - - shared.request_pool.client.client.delete( - request_event.key - ) + elif request_event.hostname == host.key: + logger.debug("VM Request: %s on Host %s", request_event, host.hostname) + shared.request_pool.client.client.delete(request_event.key) vm_entry = shared.etcd_client.get( - join_path( - settings["etcd"]["vm_prefix"], - request_event.uuid, - ) + join_path(settings["etcd"]["vm_prefix"], request_event.uuid) ) + logger.debug("VM hostname: {}".format(vm_entry.value)) + vm = virtualmachine.VM(vm_entry) + if request_event.type == RequestType.StartVM: + vm.start() - if vm_entry: - vm = virtualmachine.VM(vm_entry) - if request_event.type == RequestType.StartVM: - vm.start() + elif request_event.type == RequestType.StopVM: + vm.stop() - elif request_event.type == RequestType.StopVM: - vm.stop() + elif request_event.type == RequestType.DeleteVM: + vm.delete() - elif request_event.type == RequestType.DeleteVM: - vm.delete() + elif request_event.type == RequestType.InitVMMigration: + vm.start(destination_host_key=host.key) - elif ( - request_event.type - == RequestType.InitVMMigration - ): - vm.start(destination_host_key=host.key) - - elif request_event.type == RequestType.TransferVM: - host = host_pool.get( - request_event.destination_host_key + elif request_event.type == RequestType.TransferVM: + destination_host = host_pool.get(request_event.destination_host_key) + if destination_host: + vm.migrate( + destination_host=destination_host.hostname, + destination_sock_path=request_event.destination_sock_path, ) - if host: - vm.migrate( - destination_host=host.hostname, - destination_sock_path=request_event.destination_sock_path, - ) - else: - logger.error( - "Host %s not found!", - request_event.destination_host_key, - ) - else: - logger.info("VM Entry missing") + else: + logger.error("Host %s not found!", request_event.destination_host_key) if __name__ == "__main__": diff --git a/ucloud/host/virtualmachine.py b/ucloud/host/virtualmachine.py index d795b3f..8f6c79e 100755 --- a/ucloud/host/virtualmachine.py +++ b/ucloud/host/virtualmachine.py @@ -38,13 +38,14 @@ class VM: else: self.uuid = vm_entry.key.split("/")[-1] self.host_key = self.vm["hostname"] + logger.debug('VM Hostname {}'.format(self.host_key)) def get_qemu_args(self): command = ( - "-name {owner}_{name}" - " -drive file={file},format=raw,if=virtio,cache=none" + "-drive file={file},format=raw,if=virtio,cache=none" " -device virtio-rng-pci" " -m {memory} -smp cores={cores},threads={threads}" + " -name {owner}_{name}" ).format( owner=self.vm["owner"], name=self.vm["name"], @@ -67,11 +68,7 @@ class VM: except Exception as err: declare_stopped(self.vm) self.vm["log"].append("Cannot Setup Network Properly") - logger.error( - "Cannot Setup Network Properly for vm %s", - self.uuid, - exc_info=err, - ) + logger.error("Cannot Setup Network Properly for vm %s", self.uuid, exc_info=err) else: self.vmm.start( uuid=self.uuid, @@ -81,6 +78,7 @@ class VM: ) status = self.vmm.get_status(self.uuid) + logger.debug('VM {} status is {}'.format(self.uuid, status)) if status == "running": self.vm["status"] = VMStatus.running self.vm["vnc_socket"] = self.vmm.get_vnc(self.uuid) @@ -99,7 +97,7 @@ class VM: else: self.stop() declare_stopped(self.vm) - + logger.debug('VM {} has hostname {}'.format(self.uuid, self.vm['hostname'])) self.sync() def stop(self): diff --git a/ucloud/scheduler/main.py b/ucloud/scheduler/main.py index 051b338..d64017a 100755 --- a/ucloud/scheduler/main.py +++ b/ucloud/scheduler/main.py @@ -17,8 +17,6 @@ from . import logger def main(): - pending_vms = [] - for request_iterator in [ shared.etcd_client.get_prefix( settings["etcd"]["request_prefix"], value_in_json=True @@ -44,24 +42,8 @@ def main(): logger.debug("Dead hosts: %s", dead_hosts) dead_host_mitigation(dead_hosts) - # If there are VMs that weren't assigned a host - # because there wasn't a host available which - # meets requirement of that VM then we would - # create a new ScheduleVM request for that VM - # on our behalf. - while pending_vms: - pending_vm_entry = pending_vms.pop() - r = RequestEntry.from_scratch( - type="ScheduleVM", - uuid=pending_vm_entry.uuid, - hostname=pending_vm_entry.hostname, - request_prefix=settings["etcd"][ - "request_prefix" - ], - ) - shared.request_pool.put(r) - elif request_entry.type == RequestType.ScheduleVM: + print(request_event.value) logger.debug( "%s, %s", request_entry.key, request_entry.value ) @@ -86,7 +68,6 @@ def main(): ) shared.vm_pool.put(vm_entry) - pending_vms.append(vm_entry) logger.info("No Resource Left. Emailing admin....")