ucloud-vm/main.py

128 lines
4.7 KiB
Python
Raw Normal View History

2019-07-04 05:19:40 +00:00
import argparse
2019-07-27 09:05:35 +00:00
import threading
import time
2019-09-03 16:18:43 +00:00
import virtualmachine
2019-09-03 16:18:43 +00:00
from ucloud_common.host import HostEntry
from ucloud_common.request import RequestEntry, RequestType
2019-09-03 16:18:43 +00:00
from config import (vm_pool, host_pool, request_pool,
2019-09-13 11:26:26 +00:00
etcd_client, logging, running_vms,
REQUEST_PREFIX)
2019-07-11 08:31:46 +00:00
2019-07-27 09:05:35 +00:00
def update_heartbeat(host: HostEntry):
while True:
host.update_heartbeat()
host_pool.put(host)
2019-07-27 09:05:35 +00:00
time.sleep(10)
2019-09-13 11:26:26 +00:00
logging.info("Updated last heartbeat time %s", host.last_heartbeat)
def maintenance(host):
# To capture vm running according to running_vms list
# This is to capture successful migration of a VM.
# Suppose, this host is running "vm1" and user initiated
# request to migrate this "vm1" to some other host. On,
# successful migration the destination host would set
# the vm hostname to itself. Thus, we are checking
# whether this host vm is successfully migrated. If yes
# then we shutdown "vm1" on this host.
2019-09-03 16:18:43 +00:00
for running_vm in running_vms:
with vm_pool.get_put(running_vm.key) as vm_entry:
if vm_entry.hostname != host.key and not vm_entry.in_migration:
2019-09-03 16:18:43 +00:00
running_vm.handle.shutdown()
vm_entry.add_log("VM on source host shutdown.")
# To check vm running according to etcd entries
alleged_running_vms = vm_pool.by_status("RUNNING", vm_pool.by_host(host.key))
for vm_entry in alleged_running_vms:
2019-09-03 16:18:43 +00:00
_vm = virtualmachine.get_vm(running_vms, vm_entry.key)
# Whether, the allegedly running vm is in our
# running_vms list or not if it is said to be
# running on this host but it is not then we
# need to shut it down
# This is to capture poweroff/shutdown of a VM
# initiated by user inside VM. OR crash of VM by some
# user running process
2019-09-13 11:26:26 +00:00
if (_vm and not _vm.handle.is_running()) or not _vm:
vm_entry.add_log("""{} is not running but is said to be running.
So, shutting it down and declare it killed""".format(vm_entry.key))
vm_entry.declare_killed()
vm_pool.put(vm_entry)
if _vm:
running_vms.remove(_vm)
def main():
argparser = argparse.ArgumentParser()
argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1")
args = argparser.parse_args()
host = host_pool.get(args.hostname)
if not host:
print("No Such Host")
exit(1)
2019-09-13 11:26:26 +00:00
logging.info("%s Session Started %s", '*' * 5, '*' * 5)
2019-07-27 09:05:35 +00:00
# It is seen that under heavy load, timeout event doesn't come
2019-09-03 16:18:43 +00:00
# in a predictive manner (which is intentional because we give
# higher priority to customer's requests) which delays heart
# beat update which in turn misunderstood by scheduler that the
# host is dead when it is actually alive. So, to ensure that we
# update the heart beat in a predictive manner we start Heart
# beat updating mechanism in separated thread
2019-07-27 09:05:35 +00:00
heartbeat_updating_thread = threading.Thread(target=update_heartbeat, args=(host,))
try:
heartbeat_updating_thread.start()
except Exception as e:
print("No Need To Go Further. Our heartbeat updating mechanism is not working")
logging.exception(e)
exit(-1)
2019-07-27 09:05:35 +00:00
for events_iterator in [
2019-09-13 11:26:26 +00:00
etcd_client.get_prefix(REQUEST_PREFIX, value_in_json=True),
etcd_client.watch_prefix(REQUEST_PREFIX, timeout=10, value_in_json=True),
2019-07-27 09:05:35 +00:00
]:
for request_event in events_iterator:
request_event = RequestEntry(request_event)
if request_event.type == "TIMEOUT":
logging.info("Timeout Event")
maintenance(host)
continue
# If the event is directed toward me OR I am destination of a InitVMMigration
2019-09-17 05:35:19 +00:00
if (request_event.hostname == host.key or request_event.destination == host.key):
logging.debug("EVENT: %s", request_event)
2019-09-13 11:26:26 +00:00
request_pool.client.client.delete(request_event.key)
vm_entry = vm_pool.get(request_event.uuid)
if request_event.type == RequestType.StartVM:
2019-09-03 16:18:43 +00:00
virtualmachine.start(vm_entry)
elif request_event.type == RequestType.StopVM:
2019-09-03 16:18:43 +00:00
virtualmachine.stop(vm_entry)
elif request_event.type == RequestType.DeleteVM:
2019-09-03 16:18:43 +00:00
virtualmachine.delete(vm_entry)
elif request_event.type == RequestType.InitVMMigration:
2019-09-03 16:18:43 +00:00
virtualmachine.init_migration(vm_entry, host.key)
2019-07-03 13:02:21 +00:00
elif request_event.type == RequestType.TransferVM:
2019-09-03 16:18:43 +00:00
virtualmachine.transfer(request_event)
2019-07-11 08:31:46 +00:00
2019-09-13 11:26:26 +00:00
logging.info("Running VMs %s", running_vms)
2019-07-03 13:02:21 +00:00
main()