2019-07-18 10:56:46 +00:00
|
|
|
# TODO: Use Unix File Socket for VNC instead of TCP
|
|
|
|
|
|
|
|
# QEMU Manual
|
|
|
|
# https://qemu.weilnetz.de/doc/qemu-doc.html
|
2019-07-04 05:19:40 +00:00
|
|
|
|
|
|
|
# For QEMU Monitor Protocol Commands Information, See
|
|
|
|
# https://qemu.weilnetz.de/doc/qemu-doc.html#pcsys_005fmonitor
|
2019-07-03 13:02:21 +00:00
|
|
|
|
2019-07-04 05:19:40 +00:00
|
|
|
import argparse
|
2019-07-03 13:02:21 +00:00
|
|
|
import qmp
|
2019-07-11 08:31:46 +00:00
|
|
|
import logging
|
2019-07-18 10:56:46 +00:00
|
|
|
import subprocess
|
2019-07-27 09:05:35 +00:00
|
|
|
import threading
|
|
|
|
import time
|
2019-07-03 13:02:21 +00:00
|
|
|
|
|
|
|
from etcd3_wrapper import Etcd3Wrapper
|
2019-07-11 08:31:46 +00:00
|
|
|
from dataclasses import dataclass
|
|
|
|
from typing import Union
|
|
|
|
from functools import wraps
|
2019-07-27 09:05:35 +00:00
|
|
|
|
|
|
|
from ucloud_common.rbd import RBD
|
|
|
|
from ucloud_common.vm import VmPool, VMEntry, VMStatus
|
|
|
|
from ucloud_common.host import HostPool, HostEntry
|
2019-07-03 13:02:21 +00:00
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
running_vms = []
|
2019-07-22 07:12:44 +00:00
|
|
|
vnc_port_pool = list(range(0, 100))
|
2019-07-11 08:31:46 +00:00
|
|
|
client = Etcd3Wrapper()
|
2019-07-27 09:05:35 +00:00
|
|
|
VM_POOL = None
|
|
|
|
HOST_POOL = None
|
2019-07-20 15:06:55 +00:00
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
logging.basicConfig(
|
|
|
|
level=logging.DEBUG,
|
|
|
|
filename="log.txt",
|
|
|
|
filemode="a",
|
|
|
|
format="%(asctime)s: %(levelname)s - %(message)s",
|
|
|
|
datefmt="%d-%b-%y %H:%M:%S",
|
|
|
|
)
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
@dataclass
|
|
|
|
class VM:
|
|
|
|
key: str
|
|
|
|
vm: qmp.QEMUMachine
|
2019-07-11 08:31:46 +00:00
|
|
|
|
2019-07-25 09:59:28 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
def update_heartbeat(host: HostEntry):
|
|
|
|
while True:
|
|
|
|
host.update_heartbeat()
|
|
|
|
HOST_POOL.put(host)
|
|
|
|
time.sleep(10)
|
2019-07-25 13:22:40 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
logging.info(f"Updated last heartbeat time {host.last_heartbeat}")
|
2019-07-18 10:56:46 +00:00
|
|
|
|
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
def need_running_vm(func):
|
|
|
|
@wraps(func)
|
|
|
|
def wrapper(e):
|
|
|
|
vm = get_vm(running_vms, e.key)
|
|
|
|
if vm:
|
|
|
|
try:
|
|
|
|
status = vm.vm.command("query-status")
|
|
|
|
logging.debug(f"VM Status Check - {status}")
|
|
|
|
except OSError:
|
2019-07-27 09:05:35 +00:00
|
|
|
logging.info(
|
|
|
|
f"{func.__name__} failed - VM {e.key} - Unknown Error"
|
|
|
|
)
|
2019-07-03 13:02:21 +00:00
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
return func(e)
|
|
|
|
else:
|
2019-07-27 09:05:35 +00:00
|
|
|
logging.info(
|
|
|
|
f"{func.__name__} failed because VM {e.key} is not running"
|
|
|
|
)
|
2019-07-11 08:31:46 +00:00
|
|
|
return
|
2019-07-27 09:05:35 +00:00
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
def create_vm(vm):
|
2019-07-11 08:31:46 +00:00
|
|
|
image = client.get(
|
2019-07-27 09:05:35 +00:00
|
|
|
f"/v1/image/{vm.value['image_uuid']}", value_in_json=True
|
2019-07-04 05:19:40 +00:00
|
|
|
)
|
2019-07-11 08:31:46 +00:00
|
|
|
if image:
|
|
|
|
logging.debug(image)
|
|
|
|
|
|
|
|
logging.info("Creating New VM...")
|
2019-07-27 09:05:35 +00:00
|
|
|
_command_to_create = f"rbd clone images/{vm.image_uuid}@protected uservms/{vm.uuid}"
|
2019-07-25 11:14:18 +00:00
|
|
|
try:
|
|
|
|
subprocess.call(_command_to_create.split(" "))
|
2019-07-27 09:05:35 +00:00
|
|
|
vm.status = VMStatus.requested_start
|
|
|
|
VM_POOL.put(vm)
|
2019-07-25 11:14:18 +00:00
|
|
|
except:
|
2019-07-27 09:05:35 +00:00
|
|
|
logging.exception("Can't clone image")
|
2019-07-11 08:31:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
def start_vm(vm_path, e):
|
2019-07-27 09:05:35 +00:00
|
|
|
try:
|
|
|
|
user_vms = RBD.ls("uservms")
|
|
|
|
except:
|
|
|
|
logging.info("Can't access uservms pool")
|
2019-07-20 15:06:55 +00:00
|
|
|
return
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
if not vm_path.split("/")[-1] in user_vms:
|
|
|
|
logging.info(f"Image file of vm {e.key} does not exists")
|
|
|
|
logging.info(f"Deleting vm {e.key}")
|
|
|
|
client.client.delete(e.key)
|
|
|
|
return
|
2019-07-11 08:31:46 +00:00
|
|
|
|
|
|
|
_vm = get_vm(running_vms, e.key)
|
|
|
|
if _vm:
|
|
|
|
logging.info(f"{e.key} already running")
|
2019-07-27 09:05:35 +00:00
|
|
|
e.status = VMStatus.running
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
return
|
|
|
|
|
2019-07-18 10:56:46 +00:00
|
|
|
# FIXME: There should be better vnc port allocation scheme
|
2019-07-11 08:31:46 +00:00
|
|
|
vm = qmp.QEMUMachine(
|
|
|
|
"/usr/bin/qemu-system-x86_64",
|
|
|
|
test_dir="vm_socklog",
|
|
|
|
args=[
|
|
|
|
vm_path,
|
2019-07-27 09:05:35 +00:00
|
|
|
"-boot",
|
|
|
|
"c", # First Boot Hard drive
|
|
|
|
"-m",
|
|
|
|
"1024", # RAM limit
|
2019-07-18 10:56:46 +00:00
|
|
|
# Ever growing port number
|
2019-07-27 09:05:35 +00:00
|
|
|
"-vnc",
|
|
|
|
f":{vnc_port_pool.pop(0)}", # Allow VNC
|
2019-07-11 08:31:46 +00:00
|
|
|
],
|
|
|
|
)
|
2019-07-03 13:02:21 +00:00
|
|
|
try:
|
2019-07-11 08:31:46 +00:00
|
|
|
logging.info(f"Starting {e.key}")
|
|
|
|
vm.launch()
|
|
|
|
if vm.is_running():
|
|
|
|
running_vms.append(VM(e.key, vm))
|
2019-07-27 09:05:35 +00:00
|
|
|
e.status = VMStatus.running
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
else:
|
2019-07-27 09:05:35 +00:00
|
|
|
e.declare_killed()
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
return
|
|
|
|
except (qmp.QEMUMachineError, TypeError):
|
2019-07-18 10:56:46 +00:00
|
|
|
logging.exception(f"Machine Error Occurred on {e.key}")
|
2019-07-27 09:05:35 +00:00
|
|
|
e.declare_killed()
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
else:
|
|
|
|
logging.info(f"Started Successfully {e.key}")
|
|
|
|
|
|
|
|
|
|
|
|
@need_running_vm
|
|
|
|
def suspend_vm(e):
|
|
|
|
vm = get_vm(running_vms, e.key)
|
|
|
|
vm.vm.command("stop")
|
|
|
|
if vm.vm.command("query-status")["status"] == "paused":
|
2019-07-27 09:05:35 +00:00
|
|
|
e.status = VMStatus.suspended
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
logging.info(f"Successfully suspended VM {e.key}")
|
|
|
|
else:
|
|
|
|
logging.info(f"Suspending VM {e.key} failed")
|
|
|
|
|
|
|
|
|
|
|
|
@need_running_vm
|
|
|
|
def resume_vm(e):
|
|
|
|
vm = get_vm(running_vms, e.key)
|
|
|
|
vm.vm.command("cont")
|
|
|
|
if vm.vm.command("query-status")["status"] == "running":
|
2019-07-27 09:05:35 +00:00
|
|
|
e.status = VMStatus.running
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
logging.info(f"Successfully resumed VM {e.key}")
|
|
|
|
else:
|
|
|
|
logging.info(f"Resuming VM {e.key} failed")
|
|
|
|
|
|
|
|
|
|
|
|
@need_running_vm
|
|
|
|
def shutdown_vm(e):
|
|
|
|
vm = get_vm(running_vms, e.key)
|
|
|
|
vm.vm.shutdown()
|
|
|
|
if not vm.vm.is_running():
|
|
|
|
logging.info(f"VM {e.key} shutdown successfully")
|
2019-07-27 09:05:35 +00:00
|
|
|
e.status = VMStatus.stopped
|
|
|
|
VM_POOL.put(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
running_vms.remove(vm)
|
|
|
|
|
|
|
|
|
|
|
|
def delete_vm(e):
|
2019-07-27 09:05:35 +00:00
|
|
|
# FIXME: Implementation Obseleted after CEPH Integeration
|
2019-07-11 08:31:46 +00:00
|
|
|
logging.info(f"Deleting VM {e.key}")
|
|
|
|
shutdown_vm(e)
|
2019-07-27 09:05:35 +00:00
|
|
|
client.client.delete(e.key)
|
2019-07-03 13:02:21 +00:00
|
|
|
|
|
|
|
|
2019-07-11 08:31:46 +00:00
|
|
|
def get_vm(vm_list: list, vm_key) -> Union[VM, None]:
|
|
|
|
return next((vm for vm in vm_list if vm.key == vm_key), None)
|
|
|
|
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
def maintenence(host):
|
|
|
|
_vms = VM_POOL.by_host(host.key)
|
|
|
|
alleged_running_vms = VM_POOL.by_status("RUNNING", _vms)
|
|
|
|
|
2019-07-23 05:25:50 +00:00
|
|
|
for vm in alleged_running_vms:
|
|
|
|
_vm = get_vm(running_vms, vm.key)
|
|
|
|
if (_vm and not _vm.vm.is_running()) or _vm is None:
|
|
|
|
logging.debug(f"{_vm} {vm.key}")
|
2019-07-27 09:05:35 +00:00
|
|
|
logging.info(
|
|
|
|
f"{vm.key} is not running but is said to be running"
|
|
|
|
)
|
2019-07-23 05:25:50 +00:00
|
|
|
logging.info(f"Updating {vm.key} status to KILLED")
|
2019-07-27 09:05:35 +00:00
|
|
|
vm.declare_killed()
|
|
|
|
VM_POOL.put(vm)
|
|
|
|
running_vms.remove(vm)
|
2019-07-23 05:25:50 +00:00
|
|
|
|
|
|
|
|
2019-07-18 10:56:46 +00:00
|
|
|
def main():
|
|
|
|
argparser = argparse.ArgumentParser()
|
2019-07-27 09:05:35 +00:00
|
|
|
argparser.add_argument(
|
|
|
|
"hostname", help="Name of this host. e.g /v1/host/1"
|
|
|
|
)
|
2019-07-18 10:56:46 +00:00
|
|
|
args = argparser.parse_args()
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
global HOST_POOL, VM_POOL
|
|
|
|
HOST_POOL = HostPool(client, "/v1/host")
|
|
|
|
VM_POOL = VmPool(client, "/v1/vm")
|
|
|
|
|
|
|
|
host = HOST_POOL.get(args.hostname)
|
2019-07-20 15:18:23 +00:00
|
|
|
if not host:
|
|
|
|
print("No Such Host")
|
|
|
|
exit(1)
|
2019-07-18 10:56:46 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
# It is seen that under heavy load, timeout event doesn't come
|
|
|
|
# in a predictive manner which delays heart beat update which
|
|
|
|
# in turn misunderstood by scheduler that the host is dead
|
|
|
|
# when it is actually alive. So, to ensure that we update the
|
|
|
|
# heart beat in a predictive manner we start Heart beat updating
|
|
|
|
# mechanism in separated thread
|
|
|
|
heartbeat_updating_thread = threading.Thread(target=update_heartbeat, args=(host,))
|
|
|
|
heartbeat_updating_thread.start()
|
|
|
|
|
|
|
|
for events_iterator in [
|
|
|
|
client.get_prefix("/v1/vm/", value_in_json=True),
|
|
|
|
client.watch_prefix("/v1/vm/", timeout=10, value_in_json=True),
|
|
|
|
]:
|
2019-07-20 10:07:12 +00:00
|
|
|
for e in events_iterator:
|
2019-07-27 09:05:35 +00:00
|
|
|
e = VMEntry(e)
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
if e.status == "TIMEOUT":
|
2019-07-20 09:51:10 +00:00
|
|
|
logging.info("Timeout")
|
2019-07-27 09:05:35 +00:00
|
|
|
maintenence(host)
|
2019-07-20 09:51:10 +00:00
|
|
|
continue
|
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
if hasattr(e, "migration_destination"):
|
|
|
|
e_migration_destination = e.value[
|
|
|
|
"migration_destination"
|
|
|
|
]
|
2019-07-25 07:59:54 +00:00
|
|
|
else:
|
|
|
|
e_migration_destination = ""
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-25 07:59:54 +00:00
|
|
|
# If the event is directed toward me or
|
|
|
|
# I am destination of a REQUESTED_MIGRATION
|
2019-07-27 09:05:35 +00:00
|
|
|
if e.hostname == host.key or e_migration_destination == host.key:
|
2019-07-25 07:59:54 +00:00
|
|
|
logging.debug(f"EVENT: {e}")
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
if e.status == "SCHEDULED_DEPLOY":
|
|
|
|
create_vm(e)
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
elif e.status == "REQUESTED_SUSPEND":
|
2019-07-25 07:59:54 +00:00
|
|
|
suspend_vm(e)
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
elif e.status == "REQUESTED_RESUME":
|
2019-07-25 07:59:54 +00:00
|
|
|
resume_vm(e)
|
2019-07-20 09:51:10 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
elif e.status == "REQUESTED_START":
|
|
|
|
vm_path = f"rbd:uservms/{e.uuid}"
|
2019-07-25 07:59:54 +00:00
|
|
|
start_vm(vm_path, e)
|
2019-07-18 10:56:46 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
elif e.status == "REQUESTED_SHUTDOWN":
|
2019-07-25 07:59:54 +00:00
|
|
|
shutdown_vm(e)
|
2019-07-03 13:02:21 +00:00
|
|
|
|
2019-07-27 09:05:35 +00:00
|
|
|
elif e.status == "DELETED":
|
2019-07-25 07:59:54 +00:00
|
|
|
delete_vm(e)
|
2019-07-11 08:31:46 +00:00
|
|
|
|
2019-07-25 07:59:54 +00:00
|
|
|
# elif e_status == "REQUESTED_MIGRATION":
|
|
|
|
# if e.value["migration_destination"]
|
2019-07-27 09:05:35 +00:00
|
|
|
|
2019-07-25 07:59:54 +00:00
|
|
|
logging.info(f"Running VMs {running_vms}")
|
2019-07-03 13:02:21 +00:00
|
|
|
|
|
|
|
|
2019-07-18 10:56:46 +00:00
|
|
|
main()
|