Totally revamped ucloud-vm

This commit is contained in:
ahmadbilalkhalid 2019-07-11 13:31:46 +05:00
parent 5518164c15
commit ebbd04ad03
6 changed files with 3910 additions and 85 deletions

3
.gitignore vendored
View file

@ -3,3 +3,6 @@
__pycache__/
venv/
.env
log.txt
vm_socklog/

@ -1 +1 @@
Subproject commit cb2a416a17d6789e613ba3b9957917770f4211e1
Subproject commit 615a0709186e74ee5c9ae78f385fd0c4c4b3834d

3167
log.txt Normal file

File diff suppressed because it is too large Load diff

282
main.py
View file

@ -1,127 +1,247 @@
# TODO
# 1. Verify that commands successfully did what they are
# supposed to do by querying state of VM using QMP.
# 2. Implement Monitoring of VM.
# For QEMU Monitor Protocol Commands Information, See
# https://qemu.weilnetz.de/doc/qemu-doc.html#pcsys_005fmonitor
import json
import shutil
import os
import subprocess
import argparse
import qmp
import logging
import os
import shutil
from etcd3_wrapper import Etcd3Wrapper
from dataclasses import dataclass
from typing import Union
from functools import wraps
from decouple import config
def get_vm_start_cmd(owner_dir, vm_uuid, vm=False):
vm_sock_flags = f"-qmp unix:{owner_dir}/.vm/{vm_uuid}-sock,server,nowait"
vm_start_command_flags = (
f"-boot c -net nic -net user -m 256 {vm_sock_flags} -daemonize"
)
vm_start_command = f"""qemu-system-x86_64 {owner_dir}/.vm/{vm_uuid}.raw {vm_start_command_flags}"""
if vm:
vm_start_command += " -display none"
return vm_start_command
def get_qemu_mon(sock_file):
m = qmp.QEMUMonitorProtocol(sock_file)
try:
m.connect()
except FileNotFoundError as _:
return None
return m
def main(hostname, is_vm):
running_vms = []
client = Etcd3Wrapper()
events = client.watch_prefix("/v1/vm/")
# events = client.get_prefix("/v1/vm/")
for e in events:
e.value = json.loads(e.value)
e_hostname = e.value["hostname"]
e_status = e.value["status"]
vm_uuid = e.key.split("/")[-1]
owner_dir = f"/var/www/{e.value['owner']}"
@dataclass
class VM:
key: str
vm: qmp.QEMUMachine
# If it is not for me then skip it
if e_hostname != hostname:
continue
print(e_status, e)
if e_status == "SCHEDULED_DEPLOY":
logging.basicConfig(
level=logging.DEBUG,
filename="log.txt",
filemode="a",
format="%(asctime)s: %(levelname)s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
def need_running_vm(func):
@wraps(func)
def wrapper(e):
vm = get_vm(running_vms, e.key)
if vm:
try:
status = vm.vm.command("query-status")
logging.debug(f"VM Status Check - {status}")
except OSError:
logging.info(f"{func.__name__} failed - VM {e.key} - Unknown Error")
return func(e)
else:
logging.info(f"{func.__name__} failed because VM {e.key} is not running")
return
return wrapper
def create_vm(owner_dir, vm_uuid, e):
image = client.get(
f"/v1/image/{e.value['image_uuid']}", value_in_json=True
)
if image:
logging.debug(image)
image_uuid = e.value["image_uuid"]
print(image)
print("Creating New VM...")
logging.info("Creating New VM...")
os.makedirs(f"{owner_dir}/.vm", exist_ok=True)
if not os.path.isfile(f"{owner_dir}/.vm/{vm_uuid}.raw"):
shutil.copy(
f"/var/vm/{image_uuid}.raw",
f"{owner_dir}/.vm/{vm_uuid}.raw",
f"/var/vm/{image_uuid}.raw", f"{owner_dir}/.vm/{vm_uuid}.raw"
)
e.value["status"] = "REQUESTED_START"
client.put(e.key, json.dumps(e.value))
elif e_status == "REQUESTED_SUSPEND":
m = get_qemu_mon(f"{owner_dir}/.vm/{vm_uuid}-sock")
if m:
print("Suspending")
m.command("stop")
m.close()
def start_vm(vm_path, e):
if not os.path.isfile(vm_path):
logging.info(f"Image file of vm {e.key} does not exists")
logging.info(f"Setting vm {e.key} status to DELETED")
e.value["status"] = "DELETED"
client.put(e.key, json.dumps(e.value))
return
_vm = get_vm(running_vms, e.key)
if _vm:
logging.info(f"{e.key} already running")
e.value["status"] = "RUNNING"
client.put(e.key, json.dumps(e.value))
return
vm = qmp.QEMUMachine(
"/usr/bin/qemu-system-x86_64",
test_dir="vm_socklog",
args=[
vm_path,
"-boot",
"c",
"-net",
"nic",
"-net",
"user",
"-m",
"1024",
"-vnc",
f":{len(running_vms)}",
],
)
try:
logging.info(f"Starting {e.key}")
vm.launch()
if vm.is_running():
running_vms.append(VM(e.key, vm))
e.value["status"] = "RUNNING"
client.put(e.key, e.value, value_in_json=True)
else:
e.value["status"] = "KILLED"
client.put(e.key, e.value, value_in_json=True)
return
except (qmp.QEMUMachineError, TypeError):
logging.info(f"Machine Error Occurred on {e.key}")
e.value["status"] = "KILLED"
client.put(e.key, e.value, value_in_json=True)
else:
logging.info(f"Started Successfully {e.key}")
@need_running_vm
def suspend_vm(e):
vm = get_vm(running_vms, e.key)
vm.vm.command("stop")
if vm.vm.command("query-status")["status"] == "paused":
e.value["status"] = "SUSPENDED"
client.put(e.key, json.dumps(e.value))
logging.info(f"Successfully suspended VM {e.key}")
else:
print("VM Not Running")
logging.info(f"Suspending VM {e.key} failed")
@need_running_vm
def resume_vm(e):
vm = get_vm(running_vms, e.key)
vm.vm.command("cont")
if vm.vm.command("query-status")["status"] == "running":
e.value["status"] = "RUNNING"
client.put(e.key, json.dumps(e.value))
logging.info(f"Successfully resumed VM {e.key}")
else:
logging.info(f"Resuming VM {e.key} failed")
@need_running_vm
def shutdown_vm(e):
vm = get_vm(running_vms, e.key)
vm.vm.shutdown()
if not vm.vm.is_running():
logging.info(f"VM {e.key} shutdown successfully")
e.value["status"] = "STOPPED"
client.put(e.key, json.dumps(e.value))
running_vms.remove(vm)
def delete_vm(e):
logging.info(f"Deleting VM {e.key}")
shutdown_vm(e)
vm = client.get(e.key, value_in_json=True)
if vm:
vm_id = e.key.split('/')[-1]
vm_owner = e.value['owner']
vm_path = f"{config('BASE_DIR')}/{vm_owner}/.vm/{vm_id}"
if os.path.exists(vm_path):
os.remove(vm_path)
client.client.delete(e.key)
logging.info(f"VM {vm.key} deleted")
else:
logging.info(f"Cannot delete key {e.key} because it doesn't exists")
def get_vm(vm_list: list, vm_key) -> Union[VM, None]:
return next((vm for vm in vm_list if vm.key == vm_key), None)
def main(hostname):
events = client.watch_prefix("/v1/vm/", timeout=10)
# events = client.get_prefix("/v1/vm/")
for e in events:
try:
e.value = json.loads(e.value)
except json.JSONDecodeError:
logging.error(f"Invalid JSON {e.value}")
continue
e_status = e.value["status"]
if e_status == "TIMEOUT":
logging.info("Timeout")
_vms = filter(lambda v: v.value["hostname"] == hostname, client.get_prefix("/v1/vm", value_in_json=True))
alleged_running_vms = filter(lambda v: v.value["status"] == "RUNNING", _vms)
for vm in alleged_running_vms:
_vm = get_vm(running_vms, vm.key)
if (_vm and not _vm.vm.is_running()) or _vm is None:
logging.debug(f"{_vm} {vm.key}")
logging.info(f"{vm.key} is not running but is said to be running")
logging.info(f"Updating {vm.key} status to KILLED")
vm.value["status"] = "KILLED"
client.put(vm.key, json.dumps(vm.value))
continue
e_hostname = e.value["hostname"]
vm_uuid = e.key.split("/")[-1]
owner_dir = f"{config('BASE_DIR')}/{e.value['owner']}"
# If it is not for me then skip it
if e_hostname != hostname:
continue
logging.debug(f"EVENT: {e}")
if e_status == "SCHEDULED_DEPLOY":
create_vm(owner_dir, vm_uuid, e)
elif e_status == "REQUESTED_SUSPEND":
suspend_vm(e)
elif e_status == "REQUESTED_RESUME":
m = get_qemu_mon(f"{owner_dir}/.vm/{vm_uuid}-sock")
if m:
print("Resuming")
m.command("cont")
m.close()
e.value["status"] = "RESUMED"
client.put(e.key, json.dumps(e.value))
else:
print("VM Not Running")
resume_vm(e)
elif e_status == "REQUESTED_START":
m = get_qemu_mon(f"{owner_dir}/.vm/{vm_uuid}-sock")
if m:
m.close()
print("VM already running")
e.value["status"] = "RUNNING"
client.put(e.key, e.value, value_in_json=True)
else:
print("Starting VM")
subprocess.run(
get_vm_start_cmd(owner_dir, vm_uuid, is_vm).split(" ")
)
e.value["status"] = "RUNNING"
client.put(e.key, e.value, value_in_json=True)
else:
continue
vm_path = f"{owner_dir}/.vm/{vm_uuid}.raw"
start_vm(vm_path, e)
elif e_status == "REQUESTED_SHUTDOWN":
shutdown_vm(e)
elif e_status == "DELETED":
delete_vm(e)
logging.info(f"Running VMs {running_vms}")
argparser = argparse.ArgumentParser()
argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1")
argparser.add_argument("--vm", type=bool, default=False)
args = argparser.parse_args()
main(args.hostname, args.vm)
main(args.hostname)

535
qmp/__init__.py Normal file
View file

@ -0,0 +1,535 @@
# QEMU library
#
# Copyright (C) 2015-2016 Red Hat Inc.
# Copyright (C) 2012 IBM Corp.
#
# Authors:
# Fam Zheng <famz@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
#
# Based on qmp.py.
#
import errno
import logging
import os
import subprocess
import re
import shutil
import socket
import tempfile
from . import qmp
LOG = logging.getLogger(__name__)
# Mapping host architecture to any additional architectures it can
# support which often includes its 32 bit cousin.
ADDITIONAL_ARCHES = {
"x86_64" : "i386",
"aarch64" : "armhf"
}
def kvm_available(target_arch=None):
host_arch = os.uname()[4]
if target_arch and target_arch != host_arch:
if target_arch != ADDITIONAL_ARCHES.get(host_arch):
return False
return os.access("/dev/kvm", os.R_OK | os.W_OK)
class QEMUMachineError(Exception):
"""
Exception called when an error in QEMUMachine happens.
"""
class QEMUMachineAddDeviceError(QEMUMachineError):
"""
Exception raised when a request to add a device can not be fulfilled
The failures are caused by limitations, lack of information or conflicting
requests on the QEMUMachine methods. This exception does not represent
failures reported by the QEMU binary itself.
"""
class MonitorResponseError(qmp.QMPError):
"""
Represents erroneous QMP monitor reply
"""
def __init__(self, reply):
try:
desc = reply["error"]["desc"]
except KeyError:
desc = reply
super(MonitorResponseError, self).__init__(desc)
self.reply = reply
class QEMUMachine(object):
"""
A QEMU VM
Use this object as a context manager to ensure the QEMU process terminates::
with VM(binary) as vm:
...
# vm is guaranteed to be shut down here
"""
def __init__(self, binary, args=None, wrapper=None, name=None,
test_dir="/var/tmp", monitor_address=None,
socket_scm_helper=None):
'''
Initialize a QEMUMachine
@param binary: path to the qemu binary
@param args: list of extra arguments
@param wrapper: list of arguments used as prefix to qemu binary
@param name: prefix for socket and log file names (default: qemu-PID)
@param test_dir: where to create socket and log file
@param monitor_address: address for QMP monitor
@param socket_scm_helper: helper program, required for send_fd_scm()
@note: Qemu process is not started until launch() is used.
'''
if args is None:
args = []
if wrapper is None:
wrapper = []
if name is None:
name = "qemu-%d" % os.getpid()
self._name = name
self._monitor_address = monitor_address
self._vm_monitor = None
self._qemu_log_path = None
self._qemu_log_file = None
self._popen = None
self._binary = binary
self._args = list(args) # Force copy args in case we modify them
self._wrapper = wrapper
self._events = []
self._iolog = None
self._socket_scm_helper = socket_scm_helper
self._qmp = None
self._qemu_full_args = None
self._test_dir = test_dir
self._temp_dir = None
self._launched = False
self._machine = None
self._console_set = False
self._console_device_type = None
self._console_address = None
self._console_socket = None
# just in case logging wasn't configured by the main script:
logging.basicConfig(level=logging.DEBUG)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.shutdown()
return False
# This can be used to add an unused monitor instance.
def add_monitor_null(self):
self._args.append('-monitor')
self._args.append('null')
def add_fd(self, fd, fdset, opaque, opts=''):
"""
Pass a file descriptor to the VM
"""
options = ['fd=%d' % fd,
'set=%d' % fdset,
'opaque=%s' % opaque]
if opts:
options.append(opts)
# This did not exist before 3.4, but since then it is
# mandatory for our purpose
if hasattr(os, 'set_inheritable'):
os.set_inheritable(fd, True)
self._args.append('-add-fd')
self._args.append(','.join(options))
return self
# Exactly one of fd and file_path must be given.
# (If it is file_path, the helper will open that file and pass its
# own fd)
def send_fd_scm(self, fd=None, file_path=None):
# In iotest.py, the qmp should always use unix socket.
assert self._qmp.is_scm_available()
if self._socket_scm_helper is None:
raise QEMUMachineError("No path to socket_scm_helper set")
if not os.path.exists(self._socket_scm_helper):
raise QEMUMachineError("%s does not exist" %
self._socket_scm_helper)
# This did not exist before 3.4, but since then it is
# mandatory for our purpose
if hasattr(os, 'set_inheritable'):
os.set_inheritable(self._qmp.get_sock_fd(), True)
if fd is not None:
os.set_inheritable(fd, True)
fd_param = ["%s" % self._socket_scm_helper,
"%d" % self._qmp.get_sock_fd()]
if file_path is not None:
assert fd is None
fd_param.append(file_path)
else:
assert fd is not None
fd_param.append(str(fd))
devnull = open(os.path.devnull, 'rb')
proc = subprocess.Popen(fd_param, stdin=devnull, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, close_fds=False)
output = proc.communicate()[0]
if output:
LOG.debug(output)
return proc.returncode
@staticmethod
def _remove_if_exists(path):
"""
Remove file object at path if it exists
"""
try:
os.remove(path)
except OSError as exception:
if exception.errno == errno.ENOENT:
return
raise
def is_running(self):
return self._popen is not None and self._popen.poll() is None
def exitcode(self):
if self._popen is None:
return None
return self._popen.poll()
def get_pid(self):
if not self.is_running():
return None
return self._popen.pid
def _load_io_log(self):
if self._qemu_log_path is not None:
with open(self._qemu_log_path, "r") as iolog:
self._iolog = iolog.read()
def _base_args(self):
if isinstance(self._monitor_address, tuple):
moncdev = "socket,id=mon,host=%s,port=%s" % (
self._monitor_address[0],
self._monitor_address[1])
else:
moncdev = 'socket,id=mon,path=%s' % self._vm_monitor
args = ['-chardev', moncdev,
'-mon', 'chardev=mon,mode=control',
'-display', 'none', '-vga', 'none']
if self._machine is not None:
args.extend(['-machine', self._machine])
if self._console_set:
self._console_address = os.path.join(self._temp_dir,
self._name + "-console.sock")
chardev = ('socket,id=console,path=%s,server,nowait' %
self._console_address)
args.extend(['-chardev', chardev])
if self._console_device_type is None:
args.extend(['-serial', 'chardev:console'])
else:
device = '%s,chardev=console' % self._console_device_type
args.extend(['-device', device])
return args
def _pre_launch(self):
self._temp_dir = tempfile.mkdtemp(dir=self._test_dir)
if self._monitor_address is not None:
self._vm_monitor = self._monitor_address
else:
self._vm_monitor = os.path.join(self._temp_dir,
self._name + "-monitor.sock")
self._qemu_log_path = os.path.join(self._temp_dir, self._name + ".log")
self._qemu_log_file = open(self._qemu_log_path, 'wb')
self._qmp = qmp.QEMUMonitorProtocol(self._vm_monitor,
server=True)
def _post_launch(self):
self._qmp.accept()
def _post_shutdown(self):
if self._qemu_log_file is not None:
self._qemu_log_file.close()
self._qemu_log_file = None
self._qemu_log_path = None
if self._console_socket is not None:
self._console_socket.close()
self._console_socket = None
if self._temp_dir is not None:
shutil.rmtree(self._temp_dir)
self._temp_dir = None
def launch(self):
"""
Launch the VM and make sure we cleanup and expose the
command line/output in case of exception
"""
if self._launched:
raise QEMUMachineError('VM already launched')
self._iolog = None
self._qemu_full_args = None
try:
self._launch()
self._launched = True
except:
self.shutdown()
LOG.debug('Error launching VM')
if self._qemu_full_args:
LOG.debug('Command: %r', ' '.join(self._qemu_full_args))
if self._iolog:
LOG.debug('Output: %r', self._iolog)
raise
def _launch(self):
"""
Launch the VM and establish a QMP connection
"""
devnull = open(os.path.devnull, 'rb')
self._pre_launch()
self._qemu_full_args = (self._wrapper + [self._binary] +
self._base_args() + self._args)
LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
self._popen = subprocess.Popen(self._qemu_full_args,
stdin=devnull,
stdout=self._qemu_log_file,
stderr=subprocess.STDOUT,
shell=False,
close_fds=False)
self._post_launch()
def wait(self):
"""
Wait for the VM to power off
"""
self._popen.wait()
self._qmp.close()
self._load_io_log()
self._post_shutdown()
def shutdown(self):
"""
Terminate the VM and clean up
"""
if self.is_running():
try:
self._qmp.cmd('quit')
self._qmp.close()
except:
self._popen.kill()
self._popen.wait()
self._load_io_log()
self._post_shutdown()
exitcode = self.exitcode()
if exitcode is not None and exitcode < 0:
msg = 'qemu received signal %i: %s'
if self._qemu_full_args:
command = ' '.join(self._qemu_full_args)
else:
command = ''
LOG.warn(msg, -exitcode, command)
self._launched = False
def qmp(self, cmd, conv_keys=True, **args):
"""
Invoke a QMP command and return the response dict
"""
qmp_args = dict()
for key, value in args.items():
if conv_keys:
qmp_args[key.replace('_', '-')] = value
else:
qmp_args[key] = value
return self._qmp.cmd(cmd, args=qmp_args)
def command(self, cmd, conv_keys=True, **args):
"""
Invoke a QMP command.
On success return the response dict.
On failure raise an exception.
"""
reply = self.qmp(cmd, conv_keys, **args)
if reply is None:
raise qmp.QMPError("Monitor is closed")
if "error" in reply:
raise MonitorResponseError(reply)
return reply["return"]
def get_qmp_event(self, wait=False):
"""
Poll for one queued QMP events and return it
"""
if len(self._events) > 0:
return self._events.pop(0)
return self._qmp.pull_event(wait=wait)
def get_qmp_events(self, wait=False):
"""
Poll for queued QMP events and return a list of dicts
"""
events = self._qmp.get_events(wait=wait)
events.extend(self._events)
del self._events[:]
self._qmp.clear_events()
return events
@staticmethod
def event_match(event, match=None):
"""
Check if an event matches optional match criteria.
The match criteria takes the form of a matching subdict. The event is
checked to be a superset of the subdict, recursively, with matching
values whenever the subdict values are not None.
This has a limitation that you cannot explicitly check for None values.
Examples, with the subdict queries on the left:
- None matches any object.
- {"foo": None} matches {"foo": {"bar": 1}}
- {"foo": None} matches {"foo": 5}
- {"foo": {"abc": None}} does not match {"foo": {"bar": 1}}
- {"foo": {"rab": 2}} matches {"foo": {"bar": 1, "rab": 2}}
"""
if match is None:
return True
try:
for key in match:
if key in event:
if not QEMUMachine.event_match(event[key], match[key]):
return False
else:
return False
return True
except TypeError:
# either match or event wasn't iterable (not a dict)
return match == event
def event_wait(self, name, timeout=60.0, match=None):
"""
event_wait waits for and returns a named event from QMP with a timeout.
name: The event to wait for.
timeout: QEMUMonitorProtocol.pull_event timeout parameter.
match: Optional match criteria. See event_match for details.
"""
return self.events_wait([(name, match)], timeout)
def events_wait(self, events, timeout=60.0):
"""
events_wait waits for and returns a named event from QMP with a timeout.
events: a sequence of (name, match_criteria) tuples.
The match criteria are optional and may be None.
See event_match for details.
timeout: QEMUMonitorProtocol.pull_event timeout parameter.
"""
def _match(event):
for name, match in events:
if (event['event'] == name and
self.event_match(event, match)):
return True
return False
# Search cached events
for event in self._events:
if _match(event):
self._events.remove(event)
return event
# Poll for new events
while True:
event = self._qmp.pull_event(wait=timeout)
if _match(event):
return event
self._events.append(event)
return None
def get_log(self):
"""
After self.shutdown or failed qemu execution, this returns the output
of the qemu process.
"""
return self._iolog
def add_args(self, *args):
"""
Adds to the list of extra arguments to be given to the QEMU binary
"""
self._args.extend(args)
def set_machine(self, machine_type):
"""
Sets the machine type
If set, the machine type will be added to the base arguments
of the resulting QEMU command line.
"""
self._machine = machine_type
def set_console(self, device_type=None):
"""
Sets the device type for a console device
If set, the console device and a backing character device will
be added to the base arguments of the resulting QEMU command
line.
This is a convenience method that will either use the provided
device type, or default to a "-serial chardev:console" command
line argument.
The actual setting of command line arguments will be be done at
machine launch time, as it depends on the temporary directory
to be created.
@param device_type: the device type, such as "isa-serial". If
None is given (the default value) a "-serial
chardev:console" command line argument will
be used instead, resorting to the machine's
default device type.
"""
self._console_set = True
self._console_device_type = device_type
@property
def console_socket(self):
"""
Returns a socket connected to the console
"""
if self._console_socket is None:
self._console_socket = socket.socket(socket.AF_UNIX,
socket.SOCK_STREAM)
self._console_socket.connect(self._console_address)
return self._console_socket

View file