Refactoring, VMM added, uncloud-host mostly new, migration is better now

This commit is contained in:
ahmadbilalkhalid 2019-12-28 15:39:11 +05:00
parent cd9d4cb78c
commit ba515f0b48
12 changed files with 423 additions and 364 deletions

View file

@ -40,7 +40,7 @@ setup(name='ucloud',
'colorama', 'colorama',
'sphinx-rtd-theme', 'sphinx-rtd-theme',
'etcd3 @ https://github.com/kragniz/python-etcd3/tarball/master#egg=etcd3', 'etcd3 @ https://github.com/kragniz/python-etcd3/tarball/master#egg=etcd3',
'werkzeug' 'werkzeug', 'marshmallow'
], ],
scripts=['scripts/ucloud'], scripts=['scripts/ucloud'],
data_files=[(os.path.expanduser('~/ucloud/'), ['conf/ucloud.conf'])], data_files=[(os.path.expanduser('~/ucloud/'), ['conf/ucloud.conf'])],

View file

@ -1,7 +1,6 @@
import json import json
import pynetbox import pynetbox
import logging import logging
import urllib3
from uuid import uuid4 from uuid import uuid4
from os.path import join as join_path from os.path import join as join_path
@ -78,6 +77,7 @@ class CreateVM(Resource):
"vnc_socket": "", "vnc_socket": "",
"network": list(zip(data["network"], macs, tap_ids)), "network": list(zip(data["network"], macs, tap_ids)),
"metadata": {"ssh-keys": []}, "metadata": {"ssh-keys": []},
"in_migration": False
} }
shared.etcd_client.put(vm_key, vm_entry, value_in_json=True) shared.etcd_client.put(vm_key, vm_entry, value_in_json=True)
@ -216,16 +216,13 @@ class VMMigration(Resource):
if validator.is_valid(): if validator.is_valid():
vm = shared.vm_pool.get(data["uuid"]) vm = shared.vm_pool.get(data["uuid"])
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
uuid=vm.uuid,
hostname=join_path(
settings['etcd']['host_prefix'], validator.destination.value
),
request_prefix=settings['etcd']['request_prefix'])
r = RequestEntry.from_scratch(
type=RequestType.ScheduleVM,
uuid=vm.uuid,
destination=join_path(
settings['etcd']['host_prefix'], validator.destination.value
),
migration=True,
request_prefix=settings['etcd']['request_prefix']
)
shared.request_pool.put(r) shared.request_pool.put(r)
return {"message": "VM Migration Initialization Queued"}, 200 return {"message": "VM Migration Initialization Queued"}, 200
else: else:

View file

@ -30,7 +30,7 @@ def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt='
def create_dev(script, _id, dev, ip=None): def create_dev(script, _id, dev, ip=None):
command = [script, _id, dev] command = [script, str(_id), dev]
if ip: if ip:
command.append(ip) command.append(ip)
try: try:

View file

@ -19,6 +19,7 @@ class RequestType:
class RequestEntry(SpecificEtcdEntryBase): class RequestEntry(SpecificEtcdEntryBase):
def __init__(self, e): def __init__(self, e):
self.destination_host_key = None
self.type = None # type: str self.type = None # type: str
self.migration = None # type: bool self.migration = None # type: bool
self.destination = None # type: str self.destination = None # type: str

39
ucloud/common/schemas.py Normal file
View file

@ -0,0 +1,39 @@
import bitmath
from marshmallow import fields, Schema
class StorageUnit(fields.Field):
def _serialize(self, value, attr, obj, **kwargs):
return str(value)
def _deserialize(self, value, attr, data, **kwargs):
return bitmath.parse_string_unsafe(value)
class SpecsSchema(Schema):
cpu = fields.Int()
ram = StorageUnit()
os_ssd = StorageUnit(data_key='os-ssd', attribute='os-ssd')
hdd = fields.List(StorageUnit())
class VMSchema(Schema):
name = fields.Str()
owner = fields.Str()
owner_realm = fields.Str()
specs = fields.Nested(SpecsSchema)
status = fields.Str()
log = fields.List(fields.Str())
vnc_socket = fields.Str()
image_uuid = fields.Str()
hostname = fields.Str()
metadata = fields.Dict()
network = fields.List(fields.Tuple((fields.Str(), fields.Str(), fields.Int())))
in_migration = fields.Bool()
class NetworkSchema(Schema):
_id = fields.Int(data_key='id', attribute='id')
_type = fields.Str(data_key='type', attribute='type')
ipv6 = fields.Str()

View file

@ -19,8 +19,8 @@ class ImageStorageHandler(ABC):
def import_image(self, image_src, image_dest, protect=False): def import_image(self, image_src, image_dest, protect=False):
"""Put an image at the destination """Put an image at the destination
:param src: An Image file :param image_src: An Image file
:param dest: A path where :param src: is to be put. :param image_dest: A path where :param src: is to be put.
:param protect: If protect is true then the dest is protect (readonly etc) :param protect: If protect is true then the dest is protect (readonly etc)
The obj must exist on filesystem. The obj must exist on filesystem.
""" """
@ -30,8 +30,8 @@ class ImageStorageHandler(ABC):
def make_vm_image(self, image_path, path): def make_vm_image(self, image_path, path):
"""Copy image from src to dest """Copy image from src to dest
:param src: A path :param image_path: A path
:param dest: A path :param path: A path
src and destination must be on same storage system i.e both on file system or both on CEPH etc. src and destination must be on same storage system i.e both on file system or both on CEPH etc.
""" """

View file

@ -12,6 +12,12 @@ class VMStatus:
error = "ERROR" # An error occurred that cannot be resolved automatically error = "ERROR" # An error occurred that cannot be resolved automatically
def declare_stopped(vm):
vm['hostname'] = ''
vm['in_migration'] = False
vm['status'] = VMStatus.stopped
class VMEntry(SpecificEtcdEntryBase): class VMEntry(SpecificEtcdEntryBase):
def __init__(self, e): def __init__(self, e):

View file

@ -1,17 +1,16 @@
import argparse import argparse
import multiprocessing as mp import multiprocessing as mp
import time import time
import sys
from ucloud.common.request import RequestEntry, RequestType from ucloud.common.request import RequestEntry, RequestType
from ucloud.common.host import HostPool
from ucloud.shared import shared from ucloud.shared import shared
from ucloud.settings import settings from ucloud.settings import settings
from ucloud.common.vm import VMStatus
from ucloud.vmm import VMM
from os.path import join as join_path
from . import virtualmachine, logger from . import virtualmachine, logger
vmm = virtualmachine.VMM()
def update_heartbeat(hostname): def update_heartbeat(hostname):
"""Update Last HeartBeat Time for :param hostname: in etcd""" """Update Last HeartBeat Time for :param hostname: in etcd"""
@ -25,6 +24,16 @@ def update_heartbeat(hostname):
time.sleep(10) time.sleep(10)
def maintenance():
vmm = VMM()
running_vms = vmm.discover()
for vm_uuid in running_vms:
if vmm.is_running(vm_uuid) and vmm.get_status(vm_uuid) == 'running':
vm = shared.vm_pool.get(join_path(settings['etcd']['vm_prefix'], vm_uuid))
vm.status = VMStatus.running
shared.vm_pool.put(vm)
def main(hostname): def main(hostname):
host_pool = shared.host_pool host_pool = shared.host_pool
host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None) host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
@ -34,8 +43,7 @@ def main(hostname):
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,)) heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
heartbeat_updating_process.start() heartbeat_updating_process.start()
except Exception as e: except Exception as e:
logger.exception(e) raise e.__class__('ucloud-host heartbeat updating mechanism is not working') from e
sys.exit("No Need To Go Further. ucloud-host heartbeat updating mechanism is not working")
for events_iterator in [ for events_iterator in [
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True), shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True),
@ -45,36 +53,37 @@ def main(hostname):
request_event = RequestEntry(request_event) request_event = RequestEntry(request_event)
if request_event.type == "TIMEOUT": if request_event.type == "TIMEOUT":
vmm.maintenance(host) maintenance()
continue
# If the event is directed toward me OR I am destination of a InitVMMigration if request_event.hostname == host.key:
if request_event.hostname == host.key or request_event.destination == host.key:
logger.debug("VM Request: %s", request_event) logger.debug("VM Request: %s", request_event)
shared.request_pool.client.client.delete(request_event.key) shared.request_pool.client.client.delete(request_event.key)
vm_entry = shared.vm_pool.get(request_event.uuid) vm_entry = shared.etcd_client.get(join_path(settings['etcd']['vm_prefix'], request_event.uuid))
if vm_entry: if vm_entry:
vm = virtualmachine.VM(vm_entry)
if request_event.type == RequestType.StartVM: if request_event.type == RequestType.StartVM:
vmm.start(vm_entry) vm.start()
elif request_event.type == RequestType.StopVM: elif request_event.type == RequestType.StopVM:
vmm.stop(vm_entry) vm.stop()
elif request_event.type == RequestType.DeleteVM: elif request_event.type == RequestType.DeleteVM:
vmm.delete(vm_entry) vm.delete()
elif request_event.type == RequestType.InitVMMigration: elif request_event.type == RequestType.InitVMMigration:
vmm.start(vm_entry, host.key) vm.start(destination_host_key=host.key)
elif request_event.type == RequestType.TransferVM: elif request_event.type == RequestType.TransferVM:
vmm.transfer(request_event) host = host_pool.get(request_event.destination_host_key)
if host:
vm.migrate(destination=host.hostname)
else:
logger.error('Host %s not found!', request_event.destination_host_key)
else: else:
logger.info("VM Entry missing") logger.info("VM Entry missing")
logger.info("Running VMs %s", vmm.running_vms)
if __name__ == "__main__": if __name__ == "__main__":
argparser = argparse.ArgumentParser() argparser = argparse.ArgumentParser()

View file

@ -6,344 +6,189 @@
import os import os
import subprocess as sp import subprocess as sp
import tempfile
import time
import ipaddress import ipaddress
from functools import wraps
from string import Template from string import Template
from typing import Union
from os.path import join as join_path from os.path import join as join_path
import bitmath
import sshtunnel
from ucloud.common.helpers import get_ipv6_address
from ucloud.common.request import RequestEntry, RequestType from ucloud.common.request import RequestEntry, RequestType
from ucloud.common.vm import VMEntry, VMStatus from ucloud.common.vm import VMStatus, declare_stopped
from ucloud.common.network import create_dev, delete_network_interface, find_free_port from ucloud.common.network import create_dev, delete_network_interface
from ucloud.common.schemas import VMSchema, NetworkSchema
from ucloud.host import logger from ucloud.host import logger
from ucloud.shared import shared from ucloud.shared import shared
from ucloud.settings import settings from ucloud.settings import settings
from ucloud.vmm import VMM
from . import qmp from marshmallow import ValidationError
def maintenance():
pass
class VM: class VM:
def __init__(self, key, handle, vnc_socket_file): def __init__(self, vm_entry):
self.key = key # type: str self.schema = VMSchema()
self.handle = handle # type: qmp.QEMUMachine self.vmm = VMM()
self.vnc_socket_file = vnc_socket_file # type: tempfile.NamedTemporaryFile self.key = vm_entry.key
def __repr__(self):
return "VM({})".format(self.key)
def capture_all_exception(func):
@wraps(func)
def wrapper(*args, **kwargs):
try: try:
func(*args, **kwargs) self.vm = self.schema.loads(vm_entry.value)
except Exception: except ValidationError:
logger.exception('Unhandled exception occur in %s. For more details see Syslog.', __name__) logger.exception('Couldn\'t validate VM Entry', vm_entry.value)
self.vm = None
return wrapper
class VMM:
def __init__(self):
self.etcd_client = shared.etcd_client
self.storage_handler = shared.storage_handler
self.running_vms = []
def get_start_command_args(self, vm_entry, vnc_sock_filename: str, migration=False, migration_port=None):
threads_per_core = 1
vm_memory = int(bitmath.parse_string_unsafe(vm_entry.specs['ram']).to_MB())
vm_cpus = int(vm_entry.specs['cpu'])
vm_uuid = vm_entry.uuid
vm_networks = vm_entry.network
command = '-name {}_{}'.format(vm_entry.owner, vm_entry.name)
command += ' -drive file={},format=raw,if=virtio,cache=none'.format(
self.storage_handler.qemu_path_string(vm_uuid)
)
command += ' -device virtio-rng-pci -vnc unix:{}'.format(vnc_sock_filename)
command += ' -m {} -smp cores={},threads={}'.format(
vm_memory, vm_cpus, threads_per_core
)
if migration:
command += ' -incoming tcp:[::]:{}'.format(migration_port)
for network_mac_and_tap in vm_networks:
network_name, mac, tap = network_mac_and_tap
_key = os.path.join(settings['etcd']['network_prefix'], vm_entry.owner, network_name)
network = self.etcd_client.get(_key, value_in_json=True)
network_type = network.value["type"]
network_id = str(network.value["id"])
network_ipv6 = network.value["ipv6"]
if network_type == "vxlan":
tap = create_vxlan_br_tap(_id=network_id,
_dev=settings['network']['vxlan_phy_dev'],
tap_id=tap,
ip=network_ipv6)
all_networks = self.etcd_client.get_prefix('/v1/network/', value_in_json=True)
if ipaddress.ip_network(network_ipv6).is_global:
update_radvd_conf(all_networks)
command += " -netdev tap,id=vmnet{net_id},ifname={tap},script=no,downscript=no" \
" -device virtio-net-pci,netdev=vmnet{net_id},mac={mac}" \
.format(tap=tap, net_id=network_id, mac=mac)
return command.split(" ")
def create_vm_object(self, vm_entry, migration=False, migration_port=None):
vnc_sock_file = tempfile.NamedTemporaryFile()
qemu_args = self.get_start_command_args(
vm_entry=vm_entry,
vnc_sock_filename=vnc_sock_file.name,
migration=migration,
migration_port=migration_port,
)
qemu_machine = qmp.QEMUMachine("/usr/bin/qemu-system-x86_64", args=qemu_args)
return VM(vm_entry.key, qemu_machine, vnc_sock_file)
@staticmethod
def get_vm(vm_list: list, vm_key) -> Union[VM, None]:
return next((vm for vm in vm_list if vm.key == vm_key), None)
@capture_all_exception
def create(self, vm_entry: VMEntry):
if self.storage_handler.is_vm_image_exists(vm_entry.uuid):
# File Already exists. No Problem Continue
logger.debug("Image for vm %s exists", vm_entry.uuid)
return None
else: else:
vm_hdd = int(bitmath.parse_string_unsafe(vm_entry.specs["os-ssd"]).to_MB()) self.uuid = vm_entry.key.split('/')[-1]
if self.storage_handler.make_vm_image(src=vm_entry.image_uuid, dest=vm_entry.uuid): self.host_key = self.vm['hostname']
if not self.storage_handler.resize_vm_image(path=vm_entry.uuid, size=vm_hdd):
vm_entry.status = VMStatus.error
else:
logger.info("New VM Created")
@capture_all_exception def get_qemu_args(self):
def start(self, vm_entry: VMEntry, destination_host_key=None): command = (
_vm = self.get_vm(self.running_vms, vm_entry.key) '-name {owner}_{name}'
' -drive file={file},format=raw,if=virtio,cache=none'
' -device virtio-rng-pci'
' -m {memory} -smp cores={cores},threads={threads}'
).format(owner=self.vm['owner'], name=self.vm['name'],
memory=int(self.vm['specs']['ram'].to_MB()), cores=self.vm['specs']['cpu'],
threads=1, file=shared.storage_handler.qemu_path_string(self.uuid))
# VM already running. No need to proceed further. return command.split(' ')
if _vm:
logger.info("VM %s already running" % vm_entry.uuid)
return
else:
logger.info("Trying to start %s" % vm_entry.uuid)
if destination_host_key:
migration_port = find_free_port()
self.launch_vm(vm_entry, migration=True, migration_port=migration_port,
destination_host_key=destination_host_key)
else:
self.create(vm_entry)
self.launch_vm(vm_entry)
@capture_all_exception def start(self, destination_host_key=None):
def stop(self, vm_entry): migration = False
vm = self.get_vm(self.running_vms, vm_entry.key) if destination_host_key:
vm.handle.shutdown() migration = True
if not vm.handle.is_running():
vm_entry.add_log("Shutdown successfully")
vm_entry.declare_stopped()
shared.vm_pool.put(vm_entry)
self.running_vms.remove(vm)
delete_vm_network(vm_entry)
@capture_all_exception self.create()
def delete(self, vm_entry):
logger.info("Deleting VM | %s", vm_entry)
self.stop(vm_entry)
if self.storage_handler.is_vm_image_exists(vm_entry.uuid):
r_status = self.storage_handler.delete_vm_image(vm_entry.uuid)
if r_status:
shared.etcd_client.client.delete(vm_entry.key)
else:
shared.etcd_client.client.delete(vm_entry.key)
@capture_all_exception
def transfer(self, request_event):
# This function would run on source host i.e host on which the vm
# is running initially. This host would be responsible for transferring
# vm state to destination host.
_host, _port = request_event.parameters["host"], request_event.parameters["port"]
_uuid = request_event.uuid
_destination = request_event.destination_host_key
vm = self.get_vm(self.running_vms, join_path(settings['etcd']['vm_prefix'], _uuid))
if vm:
tunnel = sshtunnel.SSHTunnelForwarder(
_host,
ssh_username=settings['ssh']['username'],
ssh_pkey=settings['ssh']['private_key_path'],
remote_bind_address=("127.0.0.1", _port),
ssh_proxy_enabled=True,
ssh_proxy=(_host, 22)
)
try:
tunnel.start()
except sshtunnel.BaseSSHTunnelForwarderError:
logger.exception("Couldn't establish connection to (%s, 22)", _host)
else:
vm.handle.command(
"migrate", uri="tcp:0.0.0.0:{}".format(tunnel.local_bind_port)
)
status = vm.handle.command("query-migrate")["status"]
while status not in ["failed", "completed"]:
time.sleep(2)
status = vm.handle.command("query-migrate")["status"]
with shared.vm_pool.get_put(request_event.uuid) as source_vm:
if status == "failed":
source_vm.add_log("Migration Failed")
elif status == "completed":
# If VM is successfully migrated then shutdown the VM
# on this host and update hostname to destination host key
source_vm.add_log("Successfully migrated")
source_vm.hostname = _destination
self.running_vms.remove(vm)
vm.handle.shutdown()
source_vm.in_migration = False # VM transfer finished
finally:
tunnel.close()
@capture_all_exception
def launch_vm(self, vm_entry, migration=False, migration_port=None, destination_host_key=None):
logger.info("Starting %s" % vm_entry.key)
vm = self.create_vm_object(vm_entry, migration=migration, migration_port=migration_port)
try: try:
vm.handle.launch() network_args = self.create_network_dev()
except Exception: except Exception as err:
logger.exception("Error Occured while starting VM") declare_stopped(self.vm)
vm.handle.shutdown() self.vm['log'].append('Cannot Setup Network Properly')
logger.error('Cannot Setup Network Properly for vm %s', self.uuid, exc_info=err)
if migration:
# We don't care whether MachineError or any other error occurred
pass
else:
# Error during typical launch of a vm
vm.handle.shutdown()
vm_entry.declare_killed()
shared.vm_pool.put(vm_entry)
else: else:
vm_entry.vnc_socket = vm.vnc_socket_file.name self.vmm.start(uuid=self.uuid, migration=migration,
self.running_vms.append(vm) *self.get_qemu_args(), *network_args)
if migration: status = self.vmm.get_status(self.uuid)
vm_entry.in_migration = True if status == 'running':
self.vm['status'] = VMStatus.running
self.vm['vnc_socket'] = self.vmm.get_vnc(self.uuid)
elif status == 'inmigrate':
r = RequestEntry.from_scratch( r = RequestEntry.from_scratch(
type=RequestType.TransferVM, type=RequestType.TransferVM, # Transfer VM
hostname=vm_entry.hostname, hostname=self.host_key, # Which VM should get this request. It is source host
parameters={"host": get_ipv6_address(), "port": migration_port}, uuid=self.uuid, # uuid of VM
uuid=vm_entry.uuid, destination_host_key=destination_host_key, # Where source host transfer VM
destination_host_key=destination_host_key,
request_prefix=settings['etcd']['request_prefix'] request_prefix=settings['etcd']['request_prefix']
) )
shared.request_pool.put(r) shared.request_pool.put(r)
else: else:
# Typical launching of a vm self.stop()
vm_entry.status = VMStatus.running declare_stopped(self.vm)
vm_entry.add_log("Started successfully")
shared.vm_pool.put(vm_entry) self.sync()
@capture_all_exception def stop(self):
def maintenance(self, host): self.vmm.stop(self.uuid)
# To capture vm running according to running_vms list self.delete_network_dev()
declare_stopped(self.vm)
self.sync()
# This is to capture successful migration of a VM. def migrate(self, destination):
# Suppose, this host is running "vm1" and user initiated self.vmm.transfer(src_uuid=self.uuid, dest_uuid=self.uuid, host=destination)
# request to migrate this "vm1" to some other host. On,
# successful migration the destination host would set
# the vm hostname to itself. Thus, we are checking
# whether this host vm is successfully migrated. If yes
# then we shutdown "vm1" on this host.
logger.debug("Starting Maintenance!!")
to_be_removed = []
for running_vm in self.running_vms:
with shared.vm_pool.get_put(running_vm.key) as vm_entry:
if vm_entry.hostname != host.key and not vm_entry.in_migration:
running_vm.handle.shutdown()
logger.info("VM migration not completed successfully.")
to_be_removed.append(running_vm)
for r in to_be_removed: def create_network_dev(self):
self.running_vms.remove(r) command = ''
for network_mac_and_tap in self.vm['network']:
network_name, mac, tap = network_mac_and_tap
# To check vm running according to etcd entries _key = os.path.join(settings['etcd']['network_prefix'], self.vm['owner'], network_name)
alleged_running_vms = shared.vm_pool.by_status("RUNNING", shared.vm_pool.by_host(host.key)) network = shared.etcd_client.get(_key, value_in_json=True)
network_schema = NetworkSchema()
try:
network = network_schema.load(network.value)
except ValidationError:
continue
for vm_entry in alleged_running_vms: if network['type'] == "vxlan":
_vm = self.get_vm(self.running_vms, vm_entry.key) tap = create_vxlan_br_tap(_id=network['id'],
# Whether, the allegedly running vm is in our _dev=settings['network']['vxlan_phy_dev'],
# running_vms list or not if it is said to be tap_id=tap,
# running on this host but it is not then we ip=network['ipv6'])
# need to shut it down
# This is to capture poweroff/shutdown of a VM all_networks = shared.etcd_client.get_prefix(settings['etcd']['network_prefix'],
# initiated by user inside VM. OR crash of VM by some value_in_json=True)
# user running process
if (_vm and not _vm.handle.is_running()) or not _vm: if ipaddress.ip_network(network['ipv6']).is_global:
logger.debug("_vm = %s, is_running() = %s" % (_vm, _vm.handle.is_running())) update_radvd_conf(all_networks)
vm_entry.add_log("""{} is not running but is said to be running.
So, shutting it down and declare it killed""".format(vm_entry.key)) command += '-netdev tap,id=vmnet{net_id},ifname={tap},script=no,downscript=no' \
vm_entry.declare_killed() ' -device virtio-net-pci,netdev=vmnet{net_id},mac={mac}' \
shared.vm_pool.put(vm_entry) .format(tap=tap, net_id=network['id'], mac=mac)
if _vm:
self.running_vms.remove(_vm) return command.split(' ')
def delete_network_dev(self):
try:
for network in self.vm['network']:
network_name = network[0]
_ = network[1] # tap_mac
tap_id = network[2]
delete_network_interface('tap{}'.format(tap_id))
owners_vms = shared.vm_pool.by_owner(self.vm['owner'])
owners_running_vms = shared.vm_pool.by_status(VMStatus.running,
_vms=owners_vms)
networks = map(
lambda n: n[0], map(lambda vm: vm.network, owners_running_vms)
)
networks_in_use_by_user_vms = [vm[0] for vm in networks]
if network_name not in networks_in_use_by_user_vms:
network_entry = resolve_network(network[0], self.vm['owner'])
if network_entry:
network_type = network_entry.value["type"]
network_id = network_entry.value["id"]
if network_type == "vxlan":
delete_network_interface('br{}'.format(network_id))
delete_network_interface('vxlan{}'.format(network_id))
except Exception:
logger.exception("Exception in network interface deletion")
def create(self):
if shared.storage_handler.is_vm_image_exists(self.uuid):
# File Already exists. No Problem Continue
logger.debug("Image for vm %s exists", self.uuid)
else:
if shared.storage_handler.make_vm_image(src=self.vm['image_uuid'], dest=self.uuid):
if not shared.storage_handler.resize_vm_image(path=self.uuid,
size=int(self.vm['specs']['os-ssd'].to_MB())):
self.vm['status'] = VMStatus.error
else:
logger.info("New VM Created")
def sync(self):
shared.etcd_client.put(self.key, self.schema.dump(self.vm), value_in_json=True)
def delete(self):
self.stop()
if shared.storage_handler.is_vm_image_exists(self.uuid):
r_status = shared.storage_handler.delete_vm_image(self.uuid)
if r_status:
shared.etcd_client.client.delete(self.key)
else:
shared.etcd_client.client.delete(self.key)
def resolve_network(network_name, network_owner): def resolve_network(network_name, network_owner):
network = shared.etcd_client.get(join_path(settings['etcd']['network_prefix'], network = shared.etcd_client.get(
network_owner, join_path(settings['etcd']['network_prefix'], network_owner, network_name), value_in_json=True
network_name), )
value_in_json=True)
return network return network
def delete_vm_network(vm_entry):
try:
for network in vm_entry.network:
network_name = network[0]
tap_mac = network[1]
tap_id = network[2]
delete_network_interface('tap{}'.format(tap_id))
owners_vms = shared.vm_pool.by_owner(vm_entry.owner)
owners_running_vms = shared.vm_pool.by_status(VMStatus.running,
_vms=owners_vms)
networks = map(
lambda n: n[0], map(lambda vm: vm.network, owners_running_vms)
)
networks_in_use_by_user_vms = [vm[0] for vm in networks]
if network_name not in networks_in_use_by_user_vms:
network_entry = resolve_network(network[0], vm_entry.owner)
if network_entry:
network_type = network_entry.value["type"]
network_id = network_entry.value["id"]
if network_type == "vxlan":
delete_network_interface('br{}'.format(network_id))
delete_network_interface('vxlan{}'.format(network_id))
except Exception:
logger.exception("Exception in network interface deletion")
def create_vxlan_br_tap(_id, _dev, tap_id, ip=None): def create_vxlan_br_tap(_id, _dev, tap_id, ip=None):
network_script_base = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'network') network_script_base = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'network')
vxlan = create_dev(script=os.path.join(network_script_base, 'create-vxlan.sh'), vxlan = create_dev(script=os.path.join(network_script_base, 'create-vxlan.sh'),
@ -377,10 +222,12 @@ def update_radvd_conf(all_networks):
) )
for net in networks if networks.get(net) for net in networks if networks.get(net)
] ]
with open('/etc/radvd.conf', 'w') as radvd_conf: with open('/etc/radvd.conf', 'w') as radvd_conf:
radvd_conf.writelines(content) radvd_conf.writelines(content)
try: try:
sp.check_output(['systemctl', 'restart', 'radvd']) sp.check_output(['systemctl', 'restart', 'radvd'])
except Exception: except sp.CalledProcessError:
sp.check_output(['service', 'radvd', 'restart']) try:
sp.check_output(['service', 'radvd', 'restart'])
except sp.CalledProcessError as err:
raise err.__class__('Cannot start/restart radvd service', err.cmd) from err

View file

@ -95,7 +95,7 @@ def dead_host_mitigation(dead_hosts_keys):
vms_hosted_on_dead_host = shared.vm_pool.by_host(host_key) vms_hosted_on_dead_host = shared.vm_pool.by_host(host_key)
for vm in vms_hosted_on_dead_host: for vm in vms_hosted_on_dead_host:
vm.declare_killed() vm.status = 'UNKNOWN'
shared.vm_pool.put(vm) shared.vm_pool.put(vm)
shared.host_pool.put(host) shared.host_pool.put(host)

View file

@ -56,35 +56,14 @@ def main():
continue continue
shared.etcd_client.client.delete(request_entry.key) # consume Request shared.etcd_client.client.delete(request_entry.key) # consume Request
# If the Request is about a VM which is labelled as "migration" try:
# and has a destination assign_host(vm_entry)
if hasattr(request_entry, "migration") and request_entry.migration \ except NoSuitableHostFound:
and hasattr(request_entry, "destination") and request_entry.destination: vm_entry.add_log("Can't schedule VM. No Resource Left.")
try: shared.vm_pool.put(vm_entry)
get_suitable_host(vm_specs=vm_entry.specs,
hosts=[shared.host_pool.get(request_entry.destination)])
except NoSuitableHostFound:
logger.info("Requested destination host doesn't have enough capacity"
"to hold %s" % vm_entry.uuid)
else:
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
uuid=request_entry.uuid,
destination=request_entry.destination,
request_prefix=settings['etcd']['request_prefix'])
shared.request_pool.put(r)
# If the Request is about a VM that just want to get started/created pending_vms.append(vm_entry)
else: logger.info("No Resource Left. Emailing admin....")
# assign_host only returns None when we couldn't be able to assign
# a host to a VM because of resource constraints
try:
assign_host(vm_entry)
except NoSuitableHostFound:
vm_entry.add_log("Can't schedule VM. No Resource Left.")
shared.vm_pool.put(vm_entry)
pending_vms.append(vm_entry)
logger.info("No Resource Left. Emailing admin....")
if __name__ == "__main__": if __name__ == "__main__":

181
ucloud/vmm/__init__.py Normal file
View file

@ -0,0 +1,181 @@
import os
import subprocess as sp
import logging
import socket
import json
import tempfile
import time
from contextlib import suppress
from multiprocessing import Process
from os.path import join as join_path
from os.path import isdir
logger = logging.getLogger(__name__)
class VMQMPHandles:
def __init__(self, path):
self.path = path
self.sock = socket.socket(socket.AF_UNIX)
self.file = self.sock.makefile()
def __enter__(self):
self.sock.connect(self.path)
# eat qmp greetings
self.file.readline()
# init qmp
self.sock.sendall(b'{ "execute": "qmp_capabilities" }')
self.file.readline()
return self.sock, self.file
def __exit__(self, exc_type, exc_val, exc_tb):
self.file.close()
self.sock.close()
if exc_type:
logger.error('Couldn\'t get handle for VM.', exc_type, exc_val, exc_tb)
raise exc_type("Couldn't get handle for VM.") from exc_type
class TransferVM(Process):
def __init__(self, src_uuid, dest_uuid, host, socket_dir):
self.src_uuid = src_uuid
self.dest_uuid = dest_uuid
self.host = host
self.src_sock_path = os.path.join(socket_dir, self.src_uuid)
self.dest_sock_path = os.path.join(socket_dir, self.dest_uuid)
super().__init__()
def run(self):
with suppress(FileNotFoundError):
os.remove(self.src_sock_path)
command = ['ssh', '-nNT', '-L', '{}:{}'.format(self.src_sock_path, self.dest_sock_path),
'root@{}'.format(self.host)]
try:
p = sp.Popen(command)
except Exception as e:
logger.error('Couldn\' forward unix socks over ssh.', exc_info=e)
else:
time.sleep(2)
vmm = VMM()
logger.debug('Executing: ssh forwarding command: %s', command)
vmm.execute_command(self.src_uuid, command='migrate',
arguments={'uri': 'unix:{}'.format(self.src_sock_path)})
while p.poll() is None:
success, output = vmm.execute_command(self.src_uuid, command='query-migrate')
if success:
status = output['return']['status']
if status != 'active':
print('Migration Status: ', status)
return
else:
print('Migration Status: ', status)
else:
return
time.sleep(0.2)
class VMM:
# Virtual Machine Manager
def __init__(self, qemu_path='/usr/bin/qemu-system-x86_64',
vmm_backend=os.path.expanduser('~/ucloud/vmm/')):
self.qemu_path = qemu_path
self.vmm_backend = vmm_backend
self.socket_dir = os.path.join(self.vmm_backend, 'sock')
def is_running(self, uuid):
sock_path = os.path.join(self.vmm_backend, uuid)
try:
sock = socket.socket(socket.AF_UNIX)
sock.connect(sock_path)
recv = sock.recv(4096)
except Exception as err:
# unix sock doesn't exists or it is closed
logger.info('VM %s sock either don\' exists or it is closed.', uuid,
'It mean VM is stopped.', exc_info=err)
else:
# if we receive greetings from qmp it mean VM is running
if len(recv) > 0:
return True
with suppress(FileNotFoundError):
os.remove(sock_path)
return False
def start(self, *args, uuid, migration=False):
# start --> sucess?
migration_args = ()
if migration:
migration_args = ('-incoming', 'unix:{}'.format(os.path.join(self.socket_dir, uuid)))
if self.is_running(uuid):
logger.warning('Cannot start VM. It is already running.')
else:
qmp_arg = ('-qmp', 'unix:{}/{},server,nowait'.format(self.vmm_backend, uuid))
vnc_arg = ('-vnc', 'unix:{}'.format(tempfile.NamedTemporaryFile().name))
command = [self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize']
try:
sp.check_output(command, stderr=sp.PIPE)
except sp.CalledProcessError as err:
logger.exception('Error occurred while starting VM.\nDetail %s', err.stderr.decode('utf-8'))
else:
time.sleep(2)
def execute_command(self, uuid, command, **kwargs):
# execute_command -> sucess?, output
try:
with VMQMPHandles(os.path.join(self.vmm_backend, uuid)) as (sock_handle, file_handle):
command_to_execute = {
'execute': command,
**kwargs
}
sock_handle.sendall(json.dumps(command_to_execute).encode('utf-8'))
output = file_handle.readline()
except Exception as err:
logger.exception('Error occurred while executing command and getting valid output from qmp')
else:
try:
output = json.loads(output)
except:
logger.exception('QMP Output isn\'t valid JSON. %s', output)
else:
return 'return' in output, output
return False, None
def stop(self, uuid):
success, output = self.execute_command(command='quit', uuid=uuid)
return success
def get_status(self, uuid):
success, output = self.execute_command(command='query-status', uuid=uuid)
if success:
return output['return']['status']
else:
return 'STOPPED'
def discover(self):
vms = [
uuid for uuid in os.listdir(self.vmm_backend)
if not isdir(join_path(self.vmm_backend, uuid))
]
return vms
def get_vnc(self, uuid):
success, output = self.execute_command(uuid, command='query-vnc')
if success:
return output['return']['service']
return None
def transfer(self, src_uuid, dest_uuid, host):
p = TransferVM(src_uuid, dest_uuid, socket_dir=self.socket_dir, host=host)
p.start()