Remove pending vm handling mechanism from scheduler + fixed issue that update VM's hostname even on migration failure

This commit is contained in:
ahmadbilalkhalid 2019-12-30 20:05:12 +05:00
parent d2d6c6bf5c
commit d13a4bcc37
5 changed files with 40 additions and 94 deletions

View file

@ -24,7 +24,7 @@ sys.excepthook = exception_hook
if __name__ == '__main__':
# Setting up root logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.setLevel(logging.DEBUG)
syslog_handler = SysLogHandler(address='/dev/log')
syslog_handler.setLevel(logging.DEBUG)

View file

@ -567,7 +567,7 @@ def main():
settings["etcd"]["image_store_prefix"], value_in_json=True
)
)
if len(image_stores) == 0:
if not image_stores:
data = {
"is_public": True,
"type": "ceph",
@ -583,7 +583,7 @@ def main():
json.dumps(data),
)
app.run(host="::", debug=True)
app.run(host="::", debug=False)
if __name__ == "__main__":

View file

@ -28,10 +28,8 @@ def maintenance(host):
vmm = VMM()
running_vms = vmm.discover()
for vm_uuid in running_vms:
if (
vmm.is_running(vm_uuid)
and vmm.get_status(vm_uuid) == "running"
):
if vmm.is_running(vm_uuid) and vmm.get_status(vm_uuid) == "running":
logger.debug('VM {} is running on {}'.format(vm_uuid, host))
vm = shared.vm_pool.get(
join_path(settings["etcd"]["vm_prefix"], vm_uuid)
)
@ -43,32 +41,18 @@ def maintenance(host):
def main(hostname):
host_pool = shared.host_pool
host = next(
filter(lambda h: h.hostname == hostname, host_pool.hosts), None
)
assert host is not None, "No such host with name = {}".format(
hostname
)
host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
assert host is not None, "No such host with name = {}".format(hostname)
try:
heartbeat_updating_process = mp.Process(
target=update_heartbeat, args=(hostname,)
)
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
heartbeat_updating_process.start()
except Exception as e:
raise Exception(
"ucloud-host heartbeat updating mechanism is not working"
) from e
raise Exception("ucloud-host heartbeat updating mechanism is not working") from e
for events_iterator in [
shared.etcd_client.get_prefix(
settings["etcd"]["request_prefix"], value_in_json=True
),
shared.etcd_client.watch_prefix(
settings["etcd"]["request_prefix"],
timeout=10,
value_in_json=True,
),
shared.etcd_client.get_prefix(settings["etcd"]["request_prefix"], value_in_json=True),
shared.etcd_client.watch_prefix(settings["etcd"]["request_prefix"], timeout=10, value_in_json=True)
]:
for request_event in events_iterator:
request_event = RequestEntry(request_event)
@ -76,52 +60,35 @@ def main(hostname):
if request_event.type == "TIMEOUT":
maintenance(host.key)
if request_event.hostname == host.key:
logger.debug("VM Request: %s", request_event)
shared.request_pool.client.client.delete(
request_event.key
)
elif request_event.hostname == host.key:
logger.debug("VM Request: %s on Host %s", request_event, host.hostname)
shared.request_pool.client.client.delete(request_event.key)
vm_entry = shared.etcd_client.get(
join_path(
settings["etcd"]["vm_prefix"],
request_event.uuid,
)
join_path(settings["etcd"]["vm_prefix"], request_event.uuid)
)
logger.debug("VM hostname: {}".format(vm_entry.value))
vm = virtualmachine.VM(vm_entry)
if request_event.type == RequestType.StartVM:
vm.start()
if vm_entry:
vm = virtualmachine.VM(vm_entry)
if request_event.type == RequestType.StartVM:
vm.start()
elif request_event.type == RequestType.StopVM:
vm.stop()
elif request_event.type == RequestType.StopVM:
vm.stop()
elif request_event.type == RequestType.DeleteVM:
vm.delete()
elif request_event.type == RequestType.DeleteVM:
vm.delete()
elif request_event.type == RequestType.InitVMMigration:
vm.start(destination_host_key=host.key)
elif (
request_event.type
== RequestType.InitVMMigration
):
vm.start(destination_host_key=host.key)
elif request_event.type == RequestType.TransferVM:
host = host_pool.get(
request_event.destination_host_key
elif request_event.type == RequestType.TransferVM:
destination_host = host_pool.get(request_event.destination_host_key)
if destination_host:
vm.migrate(
destination_host=destination_host.hostname,
destination_sock_path=request_event.destination_sock_path,
)
if host:
vm.migrate(
destination_host=host.hostname,
destination_sock_path=request_event.destination_sock_path,
)
else:
logger.error(
"Host %s not found!",
request_event.destination_host_key,
)
else:
logger.info("VM Entry missing")
else:
logger.error("Host %s not found!", request_event.destination_host_key)
if __name__ == "__main__":

View file

@ -38,13 +38,14 @@ class VM:
else:
self.uuid = vm_entry.key.split("/")[-1]
self.host_key = self.vm["hostname"]
logger.debug('VM Hostname {}'.format(self.host_key))
def get_qemu_args(self):
command = (
"-name {owner}_{name}"
" -drive file={file},format=raw,if=virtio,cache=none"
"-drive file={file},format=raw,if=virtio,cache=none"
" -device virtio-rng-pci"
" -m {memory} -smp cores={cores},threads={threads}"
" -name {owner}_{name}"
).format(
owner=self.vm["owner"],
name=self.vm["name"],
@ -67,11 +68,7 @@ class VM:
except Exception as err:
declare_stopped(self.vm)
self.vm["log"].append("Cannot Setup Network Properly")
logger.error(
"Cannot Setup Network Properly for vm %s",
self.uuid,
exc_info=err,
)
logger.error("Cannot Setup Network Properly for vm %s", self.uuid, exc_info=err)
else:
self.vmm.start(
uuid=self.uuid,
@ -81,6 +78,7 @@ class VM:
)
status = self.vmm.get_status(self.uuid)
logger.debug('VM {} status is {}'.format(self.uuid, status))
if status == "running":
self.vm["status"] = VMStatus.running
self.vm["vnc_socket"] = self.vmm.get_vnc(self.uuid)
@ -99,7 +97,7 @@ class VM:
else:
self.stop()
declare_stopped(self.vm)
logger.debug('VM {} has hostname {}'.format(self.uuid, self.vm['hostname']))
self.sync()
def stop(self):

View file

@ -17,8 +17,6 @@ from . import logger
def main():
pending_vms = []
for request_iterator in [
shared.etcd_client.get_prefix(
settings["etcd"]["request_prefix"], value_in_json=True
@ -44,24 +42,8 @@ def main():
logger.debug("Dead hosts: %s", dead_hosts)
dead_host_mitigation(dead_hosts)
# If there are VMs that weren't assigned a host
# because there wasn't a host available which
# meets requirement of that VM then we would
# create a new ScheduleVM request for that VM
# on our behalf.
while pending_vms:
pending_vm_entry = pending_vms.pop()
r = RequestEntry.from_scratch(
type="ScheduleVM",
uuid=pending_vm_entry.uuid,
hostname=pending_vm_entry.hostname,
request_prefix=settings["etcd"][
"request_prefix"
],
)
shared.request_pool.put(r)
elif request_entry.type == RequestType.ScheduleVM:
print(request_event.value)
logger.debug(
"%s, %s", request_entry.key, request_entry.value
)
@ -86,7 +68,6 @@ def main():
)
shared.vm_pool.put(vm_entry)
pending_vms.append(vm_entry)
logger.info("No Resource Left. Emailing admin....")