forked from uncloud/uncloud
* Refactoring
* Fix issue that causes a new image store to be created at every start of ucloud-api. * VM Migration API call now takes hostname instead of host key. * StorageHandler Classes are introduced. They transparently handles things related to importing of image, make vm out of image, resize vm image, delete vm image etc. * Loggers added to __init__.py of every ucloud component's subpackage. * Non-Trivial Timeout Events are no longer logged. * Fix issue that prevents removal of stopped VMs (i.e VMs that are successfully migrated). * Improved unit handling added. e.g MB, Mb, mB, mb are all Mega Bytes. * VM migration is now possible on IPv6 host. * Destination VM (receiving side of migration of a vm) now correctly expects incoming data on free ephemeral port. * Traceback is no longer output to screen, instead it goes to log file. * All sanity checks are put into a single file. These checks are run by ucloud.py before running any of ucloud component.
This commit is contained in:
parent
6fa77bce4d
commit
cc0ca68498
26 changed files with 1101 additions and 294 deletions
|
@ -22,7 +22,7 @@ def check_otp(name, realm, token):
|
|||
except binascii.Error:
|
||||
return 400
|
||||
|
||||
response = requests.get(
|
||||
response = requests.post(
|
||||
"{OTP_SERVER}{OTP_VERIFY_ENDPOINT}".format(
|
||||
OTP_SERVER=env_vars.get("OTP_SERVER", ""),
|
||||
OTP_VERIFY_ENDPOINT=env_vars.get("OTP_VERIFY_ENDPOINT", "verify"),
|
||||
|
|
99
api/main.py
99
api/main.py
|
@ -1,16 +1,16 @@
|
|||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from uuid import uuid4
|
||||
|
||||
import pynetbox
|
||||
|
||||
from uuid import uuid4
|
||||
from os.path import join as join_path
|
||||
|
||||
from flask import Flask, request
|
||||
from flask_restful import Resource, Api
|
||||
|
||||
from common import counters
|
||||
from common.request import RequestEntry, RequestType
|
||||
from common.vm import VMStatus
|
||||
from config import (etcd_client, request_pool, vm_pool, host_pool, env_vars)
|
||||
from config import (etcd_client, request_pool, vm_pool, host_pool, env_vars, image_storage_handler)
|
||||
from . import schemas
|
||||
from .helper import generate_mac, mac2ipv6
|
||||
from api import logger
|
||||
|
@ -20,13 +20,15 @@ api = Api(app)
|
|||
|
||||
|
||||
class CreateVM(Resource):
|
||||
"""API Request to Handle Creation of VM"""
|
||||
|
||||
@staticmethod
|
||||
def post():
|
||||
data = request.json
|
||||
validator = schemas.CreateVMSchema(data)
|
||||
if validator.is_valid():
|
||||
vm_uuid = uuid4().hex
|
||||
vm_key = os.path.join(env_vars.get("VM_PREFIX"), vm_uuid)
|
||||
vm_key = join_path(env_vars.get("VM_PREFIX"), vm_uuid)
|
||||
specs = {
|
||||
"cpu": validator.specs["cpu"],
|
||||
"ram": validator.specs["ram"],
|
||||
|
@ -67,14 +69,14 @@ class VmStatus(Resource):
|
|||
validator = schemas.VMStatusSchema(data)
|
||||
if validator.is_valid():
|
||||
vm = vm_pool.get(
|
||||
os.path.join(env_vars.get("VM_PREFIX"), data["uuid"])
|
||||
join_path(env_vars.get("VM_PREFIX"), data["uuid"])
|
||||
)
|
||||
vm_value = vm.value.copy()
|
||||
vm_value["ip"] = []
|
||||
for network_and_mac in vm.network:
|
||||
network_name, mac = network_and_mac
|
||||
network = etcd_client.get(
|
||||
os.path.join(
|
||||
join_path(
|
||||
env_vars.get("NETWORK_PREFIX"),
|
||||
data["name"],
|
||||
network_name,
|
||||
|
@ -96,7 +98,7 @@ class CreateImage(Resource):
|
|||
validator = schemas.CreateImageSchema(data)
|
||||
if validator.is_valid():
|
||||
file_entry = etcd_client.get(
|
||||
os.path.join(env_vars.get("FILE_PREFIX"), data["uuid"])
|
||||
join_path(env_vars.get("FILE_PREFIX"), data["uuid"])
|
||||
)
|
||||
file_entry_value = json.loads(file_entry.value)
|
||||
|
||||
|
@ -109,7 +111,7 @@ class CreateImage(Resource):
|
|||
"visibility": "public",
|
||||
}
|
||||
etcd_client.put(
|
||||
os.path.join(env_vars.get("IMAGE_PREFIX"), data["uuid"]),
|
||||
join_path(env_vars.get("IMAGE_PREFIX"), data["uuid"]),
|
||||
json.dumps(image_entry_json),
|
||||
)
|
||||
|
||||
|
@ -123,8 +125,9 @@ class ListPublicImages(Resource):
|
|||
images = etcd_client.get_prefix(
|
||||
env_vars.get("IMAGE_PREFIX"), value_in_json=True
|
||||
)
|
||||
r = {}
|
||||
r["images"] = []
|
||||
r = {
|
||||
"images": []
|
||||
}
|
||||
for image in images:
|
||||
image_key = "{}:{}".format(
|
||||
image.value["store_name"], image.value["name"]
|
||||
|
@ -143,46 +146,22 @@ class VMAction(Resource):
|
|||
|
||||
if validator.is_valid():
|
||||
vm_entry = vm_pool.get(
|
||||
os.path.join(env_vars.get("VM_PREFIX"), data["uuid"])
|
||||
join_path(env_vars.get("VM_PREFIX"), data["uuid"])
|
||||
)
|
||||
action = data["action"]
|
||||
|
||||
if action == "start":
|
||||
vm_entry.status = VMStatus.requested_start
|
||||
vm_pool.put(vm_entry)
|
||||
action = "schedule"
|
||||
|
||||
if action == "delete" and vm_entry.hostname == "":
|
||||
try:
|
||||
path_without_protocol = vm_entry.path[
|
||||
vm_entry.path.find(":") + 1:
|
||||
]
|
||||
|
||||
if env_vars.get("WITHOUT_CEPH"):
|
||||
command_to_delete = [
|
||||
"rm",
|
||||
"-rf",
|
||||
os.path.join("/var/vm", vm_entry.uuid),
|
||||
]
|
||||
else:
|
||||
command_to_delete = [
|
||||
"rbd",
|
||||
"rm",
|
||||
path_without_protocol,
|
||||
]
|
||||
|
||||
subprocess.check_output(
|
||||
command_to_delete, stderr=subprocess.PIPE
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if "No such file" in e.stderr.decode("utf-8"):
|
||||
if image_storage_handler.is_vm_image_exists(vm_entry.uuid):
|
||||
r_status = image_storage_handler.delete_vm_image(vm_entry.uuid)
|
||||
if r_status:
|
||||
etcd_client.client.delete(vm_entry.key)
|
||||
return {"message": "VM successfully deleted"}
|
||||
else:
|
||||
logger.exception(e)
|
||||
return {
|
||||
"message": "Some error occurred while deleting VM"
|
||||
}
|
||||
logger.error("Some Error Occurred while deleting VM")
|
||||
return {"message": "VM deletion unsuccessfull"}
|
||||
else:
|
||||
etcd_client.client.delete(vm_entry.key)
|
||||
return {"message": "VM successfully deleted"}
|
||||
|
@ -211,8 +190,8 @@ class VMMigration(Resource):
|
|||
r = RequestEntry.from_scratch(
|
||||
type=RequestType.ScheduleVM,
|
||||
uuid=vm.uuid,
|
||||
destination=os.path.join(
|
||||
env_vars.get("HOST_PREFIX"), data["destination"]
|
||||
destination=join_path(
|
||||
env_vars.get("HOST_PREFIX"), validator.destination.value
|
||||
),
|
||||
migration=True,
|
||||
request_prefix=env_vars.get("REQUEST_PREFIX")
|
||||
|
@ -289,7 +268,7 @@ class CreateHost(Resource):
|
|||
data = request.json
|
||||
validator = schemas.CreateHostSchema(data)
|
||||
if validator.is_valid():
|
||||
host_key = os.path.join(env_vars.get("HOST_PREFIX"), uuid4().hex)
|
||||
host_key = join_path(env_vars.get("HOST_PREFIX"), uuid4().hex)
|
||||
host_entry = {
|
||||
"specs": data["specs"],
|
||||
"hostname": data["hostname"],
|
||||
|
@ -327,7 +306,7 @@ class GetSSHKeys(Resource):
|
|||
if not validator.key_name.value:
|
||||
|
||||
# {user_prefix}/{realm}/{name}/key/
|
||||
etcd_key = os.path.join(
|
||||
etcd_key = join_path(
|
||||
env_vars.get('USER_PREFIX'),
|
||||
data["realm"],
|
||||
data["name"],
|
||||
|
@ -344,7 +323,7 @@ class GetSSHKeys(Resource):
|
|||
else:
|
||||
|
||||
# {user_prefix}/{realm}/{name}/key/{key_name}
|
||||
etcd_key = os.path.join(
|
||||
etcd_key = join_path(
|
||||
env_vars.get('USER_PREFIX'),
|
||||
data["realm"],
|
||||
data["name"],
|
||||
|
@ -373,7 +352,7 @@ class AddSSHKey(Resource):
|
|||
if validator.is_valid():
|
||||
|
||||
# {user_prefix}/{realm}/{name}/key/{key_name}
|
||||
etcd_key = os.path.join(
|
||||
etcd_key = join_path(
|
||||
env_vars.get("USER_PREFIX"),
|
||||
data["realm"],
|
||||
data["name"],
|
||||
|
@ -403,7 +382,7 @@ class RemoveSSHKey(Resource):
|
|||
if validator.is_valid():
|
||||
|
||||
# {user_prefix}/{realm}/{name}/key/{key_name}
|
||||
etcd_key = os.path.join(
|
||||
etcd_key = join_path(
|
||||
env_vars.get("USER_PREFIX"),
|
||||
data["realm"],
|
||||
data["name"],
|
||||
|
@ -462,7 +441,7 @@ class CreateNetwork(Resource):
|
|||
else:
|
||||
network_entry["ipv6"] = "fd00::/64"
|
||||
|
||||
network_key = os.path.join(
|
||||
network_key = join_path(
|
||||
env_vars.get("NETWORK_PREFIX"),
|
||||
data["name"],
|
||||
data["network_name"],
|
||||
|
@ -480,7 +459,7 @@ class ListUserNetwork(Resource):
|
|||
validator = schemas.OTPSchema(data)
|
||||
|
||||
if validator.is_valid():
|
||||
prefix = os.path.join(
|
||||
prefix = join_path(
|
||||
env_vars.get("NETWORK_PREFIX"), data["name"]
|
||||
)
|
||||
networks = etcd_client.get_prefix(prefix, value_in_json=True)
|
||||
|
@ -517,15 +496,17 @@ api.add_resource(CreateNetwork, "/network/create")
|
|||
|
||||
|
||||
def main():
|
||||
data = {
|
||||
"is_public": True,
|
||||
"type": "ceph",
|
||||
"name": "images",
|
||||
"description": "first ever public image-store",
|
||||
"attributes": {"list": [], "key": [], "pool": "images"},
|
||||
}
|
||||
image_stores = list(etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True))
|
||||
if len(image_stores) == 0:
|
||||
data = {
|
||||
"is_public": True,
|
||||
"type": "ceph",
|
||||
"name": "images",
|
||||
"description": "first ever public image-store",
|
||||
"attributes": {"list": [], "key": [], "pool": "images"},
|
||||
}
|
||||
|
||||
etcd_client.put(os.path.join(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data))
|
||||
etcd_client.put(join_path(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data))
|
||||
|
||||
app.run(host="::", debug=True)
|
||||
|
||||
|
|
|
@ -381,12 +381,14 @@ class VmMigrationSchema(OTPSchema):
|
|||
super().__init__(data=data, fields=fields)
|
||||
|
||||
def destination_validation(self):
|
||||
host_key = self.destination.value
|
||||
host = host_pool.get(host_key)
|
||||
hostname = self.destination.value
|
||||
host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
|
||||
if not host:
|
||||
self.add_error("No Such Host ({}) exists".format(self.destination.value))
|
||||
elif host.status != HostStatus.alive:
|
||||
self.add_error("Destination Host is dead")
|
||||
else:
|
||||
self.destination.value = host.key
|
||||
|
||||
def validation(self):
|
||||
vm = vm_pool.get(self.uuid.value)
|
||||
|
|
|
@ -1,28 +1,6 @@
|
|||
from decouple import Config, RepositoryEnv, UndefinedValueError
|
||||
from etcd3_wrapper import EtcdEntry
|
||||
|
||||
|
||||
class EnvironmentVariables:
|
||||
def __init__(self, env_file):
|
||||
try:
|
||||
env_config = Config(RepositoryEnv(env_file))
|
||||
except FileNotFoundError:
|
||||
print("{} does not exists".format(env_file))
|
||||
exit(1)
|
||||
else:
|
||||
self.config = env_config
|
||||
|
||||
def get(self, *args, **kwargs):
|
||||
"""Return value of var from env_vars"""
|
||||
try:
|
||||
value = self.config.get(*args, **kwargs)
|
||||
except UndefinedValueError as e:
|
||||
print(e)
|
||||
exit(1)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
class SpecificEtcdEntryBase:
|
||||
def __init__(self, e: EtcdEntry):
|
||||
self.key = e.key
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
import logging
|
||||
import socket
|
||||
import requests
|
||||
import json
|
||||
|
||||
from ipaddress import ip_address
|
||||
|
||||
from os.path import join as join_path
|
||||
|
||||
|
@ -37,3 +41,14 @@ def get_ipv4_address():
|
|||
address = s.getsockname()[0]
|
||||
|
||||
return address
|
||||
|
||||
|
||||
def get_ipv6_address():
|
||||
try:
|
||||
r = requests.get("https://api6.ipify.org?format=json")
|
||||
content = json.loads(r.content.decode("utf-8"))
|
||||
ip = ip_address(content["ip"]).exploded
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
else:
|
||||
return ip
|
||||
|
|
158
common/storage_handlers.py
Normal file
158
common/storage_handlers.py
Normal file
|
@ -0,0 +1,158 @@
|
|||
import shutil
|
||||
import subprocess as sp
|
||||
import os
|
||||
import stat
|
||||
|
||||
from abc import ABC
|
||||
from host import logger
|
||||
from os.path import join as join_path
|
||||
|
||||
|
||||
class ImageStorageHandler(ABC):
|
||||
def __init__(self, image_base, vm_base):
|
||||
self.image_base = image_base
|
||||
self.vm_base = vm_base
|
||||
|
||||
def import_image(self, image_src, image_dest, protect=False):
|
||||
"""Put an image at the destination
|
||||
:param src: An Image file
|
||||
:param dest: A path where :param src: is to be put.
|
||||
:param protect: If protect is true then the dest is protect (readonly etc)
|
||||
The obj must exist on filesystem.
|
||||
"""
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
def make_vm_image(self, image_path, path):
|
||||
"""Copy image from src to dest
|
||||
|
||||
:param src: A path
|
||||
:param dest: A path
|
||||
|
||||
src and destination must be on same storage system i.e both on file system or both on CEPH etc.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def resize_vm_image(self, path, size):
|
||||
"""Resize image located at :param path:
|
||||
:param path: The file which is to be resized
|
||||
:param size: Size must be in Megabytes
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete_vm_image(self, path):
|
||||
raise NotImplementedError()
|
||||
|
||||
def execute_command(self, command, report=True):
|
||||
command = list(map(str, command))
|
||||
try:
|
||||
output = sp.check_output(command, stderr=sp.PIPE)
|
||||
except Exception as e:
|
||||
if report:
|
||||
print(e)
|
||||
logger.exception(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def vm_path_string(self, path):
|
||||
raise NotImplementedError()
|
||||
|
||||
def qemu_path_string(self, path):
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_vm_image_exists(self, path):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class FileSystemBasedImageStorageHandler(ImageStorageHandler):
|
||||
def import_image(self, src, dest, protect=False):
|
||||
dest = join_path(self.image_base, dest)
|
||||
try:
|
||||
shutil.copy(src, dest)
|
||||
if protect:
|
||||
os.chmod(dest, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def make_vm_image(self, src, dest):
|
||||
src = join_path(self.image_base, src)
|
||||
dest = join_path(self.vm_base, dest)
|
||||
try:
|
||||
shutil.copy(src, dest)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def resize_vm_image(self, path, size):
|
||||
path = join_path(self.vm_base, path)
|
||||
command = ["qemu-img", "resize", "-f", "raw", path, "{}M".format(size)]
|
||||
if self.execute_command(command):
|
||||
return True
|
||||
else:
|
||||
self.delete_vm_image(path)
|
||||
return False
|
||||
|
||||
def delete_vm_image(self, path):
|
||||
path = join_path(self.vm_base, path)
|
||||
try:
|
||||
os.remove(path)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def vm_path_string(self, path):
|
||||
return join_path(self.vm_base, path)
|
||||
|
||||
def qemu_path_string(self, path):
|
||||
return self.vm_path_string(path)
|
||||
|
||||
def is_vm_image_exists(self, path):
|
||||
path = join_path(self.vm_base, path)
|
||||
command = ["ls", path]
|
||||
return self.execute_command(command, report=False)
|
||||
|
||||
|
||||
class CEPHBasedImageStorageHandler(ImageStorageHandler):
|
||||
def import_image(self, src, dest, protect=False):
|
||||
dest = join_path(self.image_base, dest)
|
||||
command = ["rbd", "import", src, dest]
|
||||
if protect:
|
||||
snap_create_command = ["rbd", "snap", "create", "{}@protected".format(dest)]
|
||||
snap_protect_command = ["rbd", "snap", "protect", "{}@protected".format(dest)]
|
||||
|
||||
return self.execute_command(command) and self.execute_command(snap_create_command) and\
|
||||
self.execute_command(snap_protect_command)
|
||||
|
||||
return self.execute_command(command)
|
||||
|
||||
def make_vm_image(self, src, dest):
|
||||
src = join_path(self.image_base, src)
|
||||
dest = join_path(self.vm_base, dest)
|
||||
|
||||
command = ["rbd", "clone", "{}@protected".format(src), dest]
|
||||
return self.execute_command(command)
|
||||
|
||||
def resize_vm_image(self, path, size):
|
||||
path = join_path(self.vm_base, path)
|
||||
command = ["rbd", "resize", path, "--size", size]
|
||||
return self.execute_command(command)
|
||||
|
||||
def delete_vm_image(self, path):
|
||||
path = join_path(self.vm_base, path)
|
||||
command = ["rbd", "rm", path]
|
||||
return self.execute_command(command)
|
||||
|
||||
def vm_path_string(self, path):
|
||||
return join_path(self.vm_base, path)
|
||||
|
||||
def qemu_path_string(self, path):
|
||||
return "rbd:{}".format(self.vm_path_string(path))
|
||||
|
||||
def is_vm_image_exists(self, path):
|
||||
path = join_path(self.vm_base, path)
|
||||
command = ["rbd", "info", path]
|
||||
return self.execute_command(command, report=False)
|
|
@ -60,10 +60,6 @@ class VMEntry(SpecificEtcdEntryBase):
|
|||
self.log = self.log[:5]
|
||||
self.log.append("{} - {}".format(datetime.now().isoformat(), msg))
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return "rbd:uservms/{}".format(self.uuid)
|
||||
|
||||
|
||||
class VmPool:
|
||||
def __init__(self, etcd_client, vm_prefix):
|
||||
|
|
17
config.py
17
config.py
|
@ -1,14 +1,16 @@
|
|||
from etcd3_wrapper import Etcd3Wrapper
|
||||
|
||||
from common.classes import EnvironmentVariables
|
||||
from common.host import HostPool
|
||||
from common.request import RequestPool
|
||||
from common.vm import VmPool
|
||||
from common.storage_handlers import FileSystemBasedImageStorageHandler, CEPHBasedImageStorageHandler
|
||||
from decouple import Config, RepositoryEnv
|
||||
|
||||
env_vars = EnvironmentVariables('/etc/ucloud/ucloud.conf')
|
||||
|
||||
env_vars = Config(RepositoryEnv('/etc/ucloud/ucloud.conf'))
|
||||
|
||||
etcd_wrapper_args = ()
|
||||
etcd_wrapper_kwargs = {"host": env_vars.get("ETCD_URL")}
|
||||
etcd_wrapper_kwargs = {'host': env_vars.get('ETCD_URL')}
|
||||
|
||||
etcd_client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs)
|
||||
|
||||
|
@ -17,3 +19,12 @@ vm_pool = VmPool(etcd_client, env_vars.get('VM_PREFIX'))
|
|||
request_pool = RequestPool(etcd_client, env_vars.get('REQUEST_PREFIX'))
|
||||
|
||||
running_vms = []
|
||||
|
||||
__storage_backend = env_vars.get("STORAGE_BACKEND")
|
||||
if __storage_backend == "filesystem":
|
||||
image_storage_handler = FileSystemBasedImageStorageHandler(vm_base=env_vars.get("VM_DIR"),
|
||||
image_base=env_vars.get("IMAGE_DIR"))
|
||||
elif __storage_backend == "ceph":
|
||||
image_storage_handler = CEPHBasedImageStorageHandler(vm_base="ssd", image_base="ssd")
|
||||
else:
|
||||
raise Exception("Unknown Image Storage Handler")
|
||||
|
|
44
docs/source/diagram-code/ucloud
Normal file
44
docs/source/diagram-code/ucloud
Normal file
|
@ -0,0 +1,44 @@
|
|||
graph LR
|
||||
style ucloud fill:#FFD2FC
|
||||
style cron fill:#FFF696
|
||||
style infrastructure fill:#BDF0FF
|
||||
subgraph ucloud[ucloud]
|
||||
ucloud-cli[CLI]-->ucloud-api[API]
|
||||
ucloud-api-->ucloud-scheduler[Scheduler]
|
||||
ucloud-api-->ucloud-imagescanner[Image Scanner]
|
||||
ucloud-api-->ucloud-host[Host]
|
||||
ucloud-scheduler-->ucloud-host
|
||||
|
||||
ucloud-host-->need-networking{VM need Networking}
|
||||
need-networking-->|Yes| networking-scripts
|
||||
need-networking-->|No| VM[Virtual Machine]
|
||||
need-networking-->|SLAAC?| radvd
|
||||
networking-scripts-->VM
|
||||
networking-scripts--Create Networks Devices-->networking-scripts
|
||||
subgraph cron[Cron Jobs]
|
||||
ucloud-imagescanner
|
||||
ucloud-filescanner[File Scanner]
|
||||
ucloud-filescanner--Track User files-->ucloud-filescanner
|
||||
end
|
||||
subgraph infrastructure[Infrastructure]
|
||||
radvd
|
||||
etcd
|
||||
networking-scripts[Networking Scripts]
|
||||
ucloud-imagescanner-->image-store
|
||||
image-store{Image Store}
|
||||
image-store-->|CEPH| ceph
|
||||
image-store-->|FILE| file-system
|
||||
ceph[CEPH]
|
||||
file-system[File System]
|
||||
end
|
||||
subgraph virtual-machine[Virtual Machine]
|
||||
VM
|
||||
VM-->ucloud-init
|
||||
|
||||
end
|
||||
|
||||
subgraph metadata-group[Metadata Server]
|
||||
metadata-->ucloud-init
|
||||
ucloud-init<-->metadata
|
||||
end
|
||||
end
|
494
docs/source/images/ucloud.svg
Normal file
494
docs/source/images/ucloud.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 37 KiB |
|
@ -1,7 +1,7 @@
|
|||
.. ucloud documentation master file, created by
|
||||
sphinx-quickstart on Mon Nov 11 19:08:16 2019.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
sphinx-quickstart on Mon Nov 11 19:08:16 2019.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to ucloud's documentation!
|
||||
==================================
|
||||
|
@ -15,7 +15,9 @@ Welcome to ucloud's documentation!
|
|||
usage/usage-for-admins
|
||||
usage/usage-for-users
|
||||
usage/how-to-create-an-os-image-for-ucloud
|
||||
theory/summary
|
||||
misc/todo
|
||||
troubleshooting/installation-troubleshooting
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
|
|
@ -135,7 +135,7 @@ You just need to update **AUTH_SEED** in the below code to match your auth's see
|
|||
|
||||
ETCD_URL=localhost
|
||||
|
||||
WITHOUT_CEPH=True
|
||||
STORAGE_BACKEND=filesystem
|
||||
|
||||
BASE_DIR=/var/www
|
||||
IMAGE_DIR=/var/image
|
||||
|
@ -195,3 +195,35 @@ profile e.g *~/.profile*
|
|||
alias uotp='cd /root/uotp/ && pipenv run python app.py'
|
||||
|
||||
and run :code:`source ~/.profile`
|
||||
|
||||
|
||||
Arch
|
||||
-----
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# Update/Upgrade
|
||||
pacman -Syuu
|
||||
pacman -S python3 qemu chrony python-pip
|
||||
|
||||
pip3 install pipenv
|
||||
|
||||
cat > /etc/chrony.conf << EOF
|
||||
server 0.arch.pool.ntp.org
|
||||
server 1.arch.pool.ntp.org
|
||||
server 2.arch.pool.ntp.org
|
||||
EOF
|
||||
|
||||
systemctl start chronyd
|
||||
systemctl enable chronyd
|
||||
|
||||
# Create non-root user and allow it sudo access
|
||||
# without password
|
||||
useradd -m ucloud
|
||||
echo "ucloud ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
||||
|
||||
sudo -H -u ucloud bash -c 'cd /home/ucloud && git clone https://aur.archlinux.org/yay.git && cd yay && makepkg -si'
|
||||
sudo -H -u ucloud bash -c 'yay -S etcd'
|
||||
|
||||
systemctl start etcd
|
||||
systemctl enable etcd
|
|
@ -1,6 +1,18 @@
|
|||
TODO
|
||||
====
|
||||
|
||||
* **Check Authentication:** Nico reported that some endpoints
|
||||
even work without providing token. (ListUserVM)
|
||||
|
||||
* Put overrides for **IMAGE_BASE**, **VM_BASE** in **ImageStorageHandler**.
|
||||
|
||||
* Put "Always use only one StorageHandler"
|
||||
|
||||
* Create Network Manager
|
||||
* That would handle tasks like up/down an interface
|
||||
* Create VXLANs, Bridges, TAPs.
|
||||
* Remove them when they are no longer used.
|
||||
|
||||
* Check for :code:`etcd3.exceptions.ConnectionFailedError` when calling some etcd operation to
|
||||
avoid crashing whole application.
|
||||
* Throw KeyError instead of returning None when some key is not found in etcd.
|
||||
|
|
98
docs/source/theory/summary.rst
Normal file
98
docs/source/theory/summary.rst
Normal file
|
@ -0,0 +1,98 @@
|
|||
Summary
|
||||
=======
|
||||
|
||||
.. image:: /images/ucloud.svg
|
||||
|
||||
.. code-block::
|
||||
|
||||
<cli>
|
||||
|
|
||||
|
|
||||
|
|
||||
+-------------------------<api>
|
||||
| |
|
||||
| |```````````````|```````````````|
|
||||
| | | |
|
||||
| <file_scanner> <scheduler> <image_scanner>
|
||||
| |
|
||||
| |
|
||||
+-------------------------<host>
|
||||
|
|
||||
|
|
||||
|
|
||||
Virtual Machine------<init>------<metadata>
|
||||
|
||||
|
||||
|
||||
**ucloud-cli** interact with **ucloud-api** to do the following operations:
|
||||
|
||||
- Create/Delete/Start/Stop/Migrate/Probe (Status of) Virtual Machines
|
||||
- Create/Delete Networks
|
||||
- Add/Get/Delete SSH Keys
|
||||
- Create OS Image out of a file (tracked by file_scanner)
|
||||
- List User's files/networks/vms
|
||||
- Add Host
|
||||
|
||||
ucloud can currently stores OS-Images on
|
||||
|
||||
* File System
|
||||
* `CEPH <https://ceph.io/>`_
|
||||
|
||||
|
||||
**ucloud-api** in turns creates appropriate Requests which are taken
|
||||
by suitable components of ucloud. For Example, if user uses ucloud-cli
|
||||
to create a VM, **ucloud-api** would create a **ScheduleVMRequest** containing
|
||||
things like pointer to VM's entry which have specs, networking
|
||||
configuration of VMs.
|
||||
|
||||
**ucloud-scheduler** accepts requests for VM's scheduling and
|
||||
migration. It finds a host from a list of available host on which
|
||||
the incoming VM can run and schedules it on that host.
|
||||
|
||||
**ucloud-host** runs on host servers i.e servers that
|
||||
actually runs virtual machines, accepts requests
|
||||
intended only for them. It creates/delete/start/stop/migrate
|
||||
virtual machines. It also arrange network resources needed for the
|
||||
incoming VM.
|
||||
|
||||
**ucloud-filescanner** keep tracks of user's files which would be needed
|
||||
later for creating OS Images.
|
||||
|
||||
**ucloud-imagescanner** converts images files from qcow2 format to raw
|
||||
format which would then be imported into image store.
|
||||
|
||||
* In case of **File System**, the converted image would be copied to
|
||||
:file:`/var/image/` or the path referred by :envvar:`IMAGE_PATH` environement variable
|
||||
mentioned in :file:`/etc/ucloud/ucloud.conf`.
|
||||
|
||||
* In case of **CEPH**, the converted image would be imported into
|
||||
specific pool (it depends on the image store in which the image
|
||||
belongs) of CEPH Block Storage.
|
||||
|
||||
**ucloud-metadata** provides metadata which is used to contextualize
|
||||
VMs. When, the VM is created, it is just clone (duplicate) of OS
|
||||
image from which it is created. So, to differentiate between my
|
||||
VM and your VM, the VM need to be contextualized. This works
|
||||
like the following
|
||||
|
||||
.. note::
|
||||
Actually, ucloud-init makes the GET request. You can also try it
|
||||
yourself using curl but ucloud-init does that for yourself.
|
||||
|
||||
* VM make a GET requests http://metadata which resolves to actual
|
||||
address of metadata server. The metadata server looks at the IPv6
|
||||
Address of the requester and extracts the MAC Address which is possible
|
||||
because the IPv6 address is
|
||||
`IPv6 EUI-64 <https://community.cisco.com/t5/networking-documents/understanding-ipv6-eui-64-bit-address/ta-p/3116953>`_.
|
||||
Metadata use this MAC address to find the actual VM to which it belongs
|
||||
and its owner, ssh-keys and much more. Then, metadata return these
|
||||
details back to the calling VM in JSON format. These details are
|
||||
then used be the **ucloud-init** which is explained next.
|
||||
|
||||
**ucloud-init** gets the metadata from **ucloud-metadata** to contextualize
|
||||
the VM. Specifically, it gets owner's ssh keys (or any other keys the
|
||||
owner of VM added to authorized keys for this VM) and put them to ssh
|
||||
server's (installed on VM) authorized keys so that owner can access
|
||||
the VM using ssh. It also install softwares that are needed for correct
|
||||
behavior of VM e.g rdnssd (needed for `SLAAC <https://en.wikipedia.org/wiki/IPv6#Stateless_address_autoconfiguration_(SLAAC)>`_).
|
||||
|
24
docs/source/troubleshooting/installation-troubleshooting.rst
Normal file
24
docs/source/troubleshooting/installation-troubleshooting.rst
Normal file
|
@ -0,0 +1,24 @@
|
|||
Installation Troubleshooting
|
||||
============================
|
||||
|
||||
etcd doesn't start
|
||||
------------------
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
[root@archlinux ~]# systemctl start etcd
|
||||
Job for etcd.service failed because the control process exited with error code.
|
||||
See "systemctl status etcd.service" and "journalctl -xe" for details
|
||||
|
||||
possible solution
|
||||
~~~~~~~~~~~~~~~~~
|
||||
Try :code:`cat /etc/hosts` if its output contain the following
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
127.0.0.1 localhost.localdomain localhost
|
||||
::1 localhost localhost.localdomain
|
||||
|
||||
|
||||
then unfortunately, we can't help you. But, if it doesn't contain the
|
||||
above you can put the above in :file:`/etc/hosts` to fix the issue.
|
|
@ -0,0 +1,3 @@
|
|||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
|
@ -6,7 +6,7 @@ import time
|
|||
from uuid import uuid4
|
||||
|
||||
from etcd3_wrapper import Etcd3Wrapper
|
||||
|
||||
from filescanner import logger
|
||||
from config import env_vars
|
||||
|
||||
|
||||
|
@ -17,9 +17,10 @@ def getxattr(file, attr):
|
|||
value = sp.check_output(['getfattr', file,
|
||||
'--name', attr,
|
||||
'--only-values',
|
||||
'--absolute-names'])
|
||||
'--absolute-names'], stderr=sp.DEVNULL)
|
||||
value = value.decode("utf-8")
|
||||
except sp.CalledProcessError:
|
||||
except sp.CalledProcessError as e:
|
||||
logger.exception(e)
|
||||
value = None
|
||||
|
||||
return value
|
||||
|
@ -63,7 +64,7 @@ try:
|
|||
sp.check_output(['which', 'getfattr'])
|
||||
sp.check_output(['which', 'setfattr'])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.exception(e)
|
||||
print('Make sure you have getfattr and setfattr available')
|
||||
exit(1)
|
||||
|
||||
|
|
13
host/helper.py
Normal file
13
host/helper.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import socket
|
||||
from contextlib import closing
|
||||
|
||||
|
||||
def find_free_port():
|
||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
||||
try:
|
||||
s.bind(('', 0))
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
except Exception:
|
||||
return None
|
||||
else:
|
||||
return s.getsockname()[1]
|
25
host/main.py
25
host/main.py
|
@ -1,6 +1,5 @@
|
|||
import argparse
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import time
|
||||
|
||||
from etcd3_wrapper import Etcd3Wrapper
|
||||
|
@ -10,13 +9,17 @@ from config import (vm_pool, request_pool,
|
|||
etcd_client, running_vms,
|
||||
etcd_wrapper_args, etcd_wrapper_kwargs,
|
||||
HostPool, env_vars)
|
||||
|
||||
from .helper import find_free_port
|
||||
from . import virtualmachine
|
||||
from host import logger
|
||||
|
||||
def update_heartbeat(host):
|
||||
|
||||
def update_heartbeat(hostname):
|
||||
"""Update Last HeartBeat Time for :param hostname: in etcd"""
|
||||
client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs)
|
||||
host_pool = HostPool(client, env_vars.get('HOST_PREFIX'))
|
||||
this_host = next(filter(lambda h: h.hostname == host, host_pool.hosts), None)
|
||||
this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
|
||||
|
||||
while True:
|
||||
this_host.update_heartbeat()
|
||||
|
@ -35,17 +38,22 @@ def maintenance(host):
|
|||
# whether this host vm is successfully migrated. If yes
|
||||
# then we shutdown "vm1" on this host.
|
||||
|
||||
to_be_removed = []
|
||||
for running_vm in running_vms:
|
||||
with vm_pool.get_put(running_vm.key) as vm_entry:
|
||||
if vm_entry.hostname != host.key and not vm_entry.in_migration:
|
||||
running_vm.handle.shutdown()
|
||||
vm_entry.add_log("VM on source host shutdown.")
|
||||
logger.info("VM migration not completed successfully.")
|
||||
to_be_removed.append(running_vm)
|
||||
|
||||
for r in to_be_removed:
|
||||
running_vms.remove(r)
|
||||
|
||||
# To check vm running according to etcd entries
|
||||
alleged_running_vms = vm_pool.by_status("RUNNING", vm_pool.by_host(host.key))
|
||||
|
||||
for vm_entry in alleged_running_vms:
|
||||
_vm = virtualmachine.get_vm(running_vms, vm_entry.key)
|
||||
|
||||
# Whether, the allegedly running vm is in our
|
||||
# running_vms list or not if it is said to be
|
||||
# running on this host but it is not then we
|
||||
|
@ -64,10 +72,6 @@ def maintenance(host):
|
|||
|
||||
|
||||
def main(hostname):
|
||||
assert env_vars.get('WITHOUT_CEPH') and os.path.isdir(env_vars.get('VM_DIR')), (
|
||||
"You have set env_vars.get('WITHOUT_CEPH') to True. So, the vm directory mentioned"
|
||||
" in .env file must exists. But, it don't.")
|
||||
|
||||
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
|
||||
|
||||
host_pool = HostPool(etcd_client, env_vars.get('HOST_PREFIX'))
|
||||
|
@ -99,7 +103,6 @@ def main(hostname):
|
|||
request_event = RequestEntry(request_event)
|
||||
|
||||
if request_event.type == "TIMEOUT":
|
||||
logger.info("Timeout Event")
|
||||
maintenance(host)
|
||||
continue
|
||||
|
||||
|
@ -121,7 +124,7 @@ def main(hostname):
|
|||
virtualmachine.delete(vm_entry)
|
||||
|
||||
elif request_event.type == RequestType.InitVMMigration:
|
||||
virtualmachine.init_migration(vm_entry, host.key)
|
||||
virtualmachine.start(vm_entry, host.key, find_free_port())
|
||||
|
||||
elif request_event.type == RequestType.TransferVM:
|
||||
virtualmachine.transfer(request_event)
|
||||
|
|
|
@ -304,6 +304,7 @@ class QEMUMachine(object):
|
|||
LOG.debug('Command: %r', ' '.join(self._qemu_full_args))
|
||||
if self._iolog:
|
||||
LOG.debug('Output: %r', self._iolog)
|
||||
raise Exception(self._iolog)
|
||||
raise
|
||||
|
||||
def _launch(self):
|
||||
|
|
|
@ -4,27 +4,28 @@
|
|||
# For QEMU Monitor Protocol Commands Information, See
|
||||
# https://qemu.weilnetz.de/doc/qemu-doc.html#pcsys_005fmonitor
|
||||
|
||||
import errno
|
||||
import os
|
||||
import random
|
||||
import subprocess as sp
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
from functools import wraps
|
||||
from os.path import join
|
||||
from os.path import join as join_path
|
||||
from string import Template
|
||||
from typing import Union
|
||||
|
||||
import bitmath
|
||||
import sshtunnel
|
||||
|
||||
from common.helpers import get_ipv4_address
|
||||
from common.helpers import get_ipv6_address
|
||||
from common.request import RequestEntry, RequestType
|
||||
from common.vm import VMEntry, VMStatus
|
||||
from config import etcd_client, request_pool, running_vms, vm_pool, env_vars
|
||||
from config import etcd_client, request_pool, running_vms, vm_pool, env_vars, image_storage_handler
|
||||
from . import qmp
|
||||
from host import logger
|
||||
|
||||
|
||||
class VM:
|
||||
def __init__(self, key, handle, vnc_socket_file):
|
||||
self.key = key # type: str
|
||||
|
@ -106,24 +107,16 @@ def update_radvd_conf(etcd_client):
|
|||
sp.check_output(['systemctl', 'restart', 'radvd'])
|
||||
|
||||
|
||||
def get_start_command_args(
|
||||
vm_entry, vnc_sock_filename: str, migration=False, migration_port=4444,
|
||||
):
|
||||
def get_start_command_args(vm_entry, vnc_sock_filename: str, migration=False, migration_port=None):
|
||||
threads_per_core = 1
|
||||
vm_memory = int(bitmath.parse_string(vm_entry.specs["ram"]).to_MB())
|
||||
vm_memory = int(bitmath.parse_string_unsafe(vm_entry.specs["ram"]).to_MB())
|
||||
vm_cpus = int(vm_entry.specs["cpu"])
|
||||
vm_uuid = vm_entry.uuid
|
||||
vm_networks = vm_entry.network
|
||||
|
||||
if env_vars.get('WITHOUT_CEPH'):
|
||||
command = "-drive file={},format=raw,if=virtio,cache=none".format(
|
||||
os.path.join(env_vars.get('VM_DIR'), vm_uuid)
|
||||
)
|
||||
else:
|
||||
command = "-drive file=rbd:uservms/{},format=raw,if=virtio,cache=none".format(
|
||||
vm_uuid
|
||||
)
|
||||
|
||||
command = "-drive file={},format=raw,if=virtio,cache=none".format(
|
||||
image_storage_handler.qemu_path_string(vm_uuid)
|
||||
)
|
||||
command += " -device virtio-rng-pci -vnc unix:{}".format(vnc_sock_filename)
|
||||
command += " -m {} -smp cores={},threads={}".format(
|
||||
vm_memory, vm_cpus, threads_per_core
|
||||
|
@ -131,7 +124,7 @@ def get_start_command_args(
|
|||
command += " -name {}".format(vm_uuid)
|
||||
|
||||
if migration:
|
||||
command += " -incoming tcp:0:{}".format(migration_port)
|
||||
command += " -incoming tcp:[::]:{}".format(migration_port)
|
||||
|
||||
tap = None
|
||||
for network_and_mac in vm_networks:
|
||||
|
@ -154,7 +147,7 @@ def get_start_command_args(
|
|||
return command.split(" ")
|
||||
|
||||
|
||||
def create_vm_object(vm_entry, migration=False, migration_port=4444):
|
||||
def create_vm_object(vm_entry, migration=False, migration_port=None):
|
||||
# NOTE: If migration suddenly stop working, having different
|
||||
# VNC unix filename on source and destination host can
|
||||
# be a possible cause of it.
|
||||
|
@ -198,61 +191,19 @@ def need_running_vm(func):
|
|||
|
||||
|
||||
def create(vm_entry: VMEntry):
|
||||
vm_hdd = int(bitmath.parse_string(vm_entry.specs["os-ssd"]).to_MB())
|
||||
|
||||
if env_vars.get('WITHOUT_CEPH'):
|
||||
_command_to_create = [
|
||||
"cp",
|
||||
os.path.join(env_vars.get('IMAGE_DIR'), vm_entry.image_uuid),
|
||||
os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid),
|
||||
]
|
||||
|
||||
_command_to_extend = [
|
||||
"qemu-img",
|
||||
"resize",
|
||||
"-f", "raw",
|
||||
os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid),
|
||||
"{}M".format(vm_hdd),
|
||||
]
|
||||
if image_storage_handler.is_vm_image_exists(vm_entry.uuid):
|
||||
# File Already exists. No Problem Continue
|
||||
logger.debug("Image for vm %s exists", vm_entry.uuid)
|
||||
else:
|
||||
_command_to_create = [
|
||||
"rbd",
|
||||
"clone",
|
||||
"images/{}@protected".format(vm_entry.image_uuid),
|
||||
"uservms/{}".format(vm_entry.uuid),
|
||||
]
|
||||
|
||||
_command_to_extend = [
|
||||
"rbd",
|
||||
"resize",
|
||||
"uservms/{}".format(vm_entry.uuid),
|
||||
"--size",
|
||||
vm_hdd,
|
||||
]
|
||||
|
||||
try:
|
||||
sp.check_output(_command_to_create)
|
||||
except sp.CalledProcessError as e:
|
||||
if e.returncode == errno.EEXIST:
|
||||
logger.debug("Image for vm %s exists", vm_entry.uuid)
|
||||
# File Already exists. No Problem Continue
|
||||
return
|
||||
|
||||
# This exception catches all other exceptions
|
||||
# i.e FileNotFound (BaseImage), pool Does Not Exists etc.
|
||||
logger.exception(e)
|
||||
|
||||
vm_entry.status = "ERROR"
|
||||
else:
|
||||
try:
|
||||
sp.check_output(_command_to_extend)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
else:
|
||||
logger.info("New VM Created")
|
||||
vm_hdd = int(bitmath.parse_string_unsafe(vm_entry.specs["os-ssd"]).to_MB())
|
||||
if image_storage_handler.make_vm_image(src=vm_entry.image_uuid, dest=vm_entry.uuid):
|
||||
if not image_storage_handler.resize_vm_image(path=vm_entry.uuid, size=vm_hdd):
|
||||
vm_entry.status = "ERROR"
|
||||
else:
|
||||
logger.info("New VM Created")
|
||||
|
||||
|
||||
def start(vm_entry: VMEntry):
|
||||
def start(vm_entry: VMEntry, destination_host_key=None, migration_port=None):
|
||||
_vm = get_vm(running_vms, vm_entry.key)
|
||||
|
||||
# VM already running. No need to proceed further.
|
||||
|
@ -260,8 +211,12 @@ def start(vm_entry: VMEntry):
|
|||
logger.info("VM %s already running", vm_entry.uuid)
|
||||
return
|
||||
else:
|
||||
create(vm_entry)
|
||||
launch_vm(vm_entry)
|
||||
if destination_host_key:
|
||||
launch_vm(vm_entry, migration=True, migration_port=migration_port,
|
||||
destination_host_key=destination_host_key)
|
||||
else:
|
||||
create(vm_entry)
|
||||
launch_vm(vm_entry)
|
||||
|
||||
|
||||
@need_running_vm
|
||||
|
@ -278,18 +233,9 @@ def stop(vm_entry):
|
|||
def delete(vm_entry):
|
||||
logger.info("Deleting VM | %s", vm_entry)
|
||||
stop(vm_entry)
|
||||
path_without_protocol = vm_entry.path[vm_entry.path.find(":") + 1:]
|
||||
|
||||
if env_vars.get('WITHOUT_CEPH'):
|
||||
vm_deletion_command = ["rm", os.path.join(env_vars.get('VM_DIR'), vm_entry.uuid)]
|
||||
else:
|
||||
vm_deletion_command = ["rbd", "rm", path_without_protocol]
|
||||
|
||||
try:
|
||||
sp.check_output(vm_deletion_command)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
else:
|
||||
r_status = image_storage_handler.delete_vm_image(vm_entry.uuid)
|
||||
if r_status:
|
||||
etcd_client.client.delete(vm_entry.key)
|
||||
|
||||
|
||||
|
@ -301,15 +247,16 @@ def transfer(request_event):
|
|||
_host, _port = request_event.parameters["host"], request_event.parameters["port"]
|
||||
_uuid = request_event.uuid
|
||||
_destination = request_event.destination_host_key
|
||||
vm = get_vm(running_vms, join(env_vars.get('VM_PREFIX'), _uuid))
|
||||
vm = get_vm(running_vms, join_path(env_vars.get('VM_PREFIX'), _uuid))
|
||||
|
||||
if vm:
|
||||
tunnel = sshtunnel.SSHTunnelForwarder(
|
||||
(_host, 22),
|
||||
_host,
|
||||
ssh_username=env_vars.get("ssh_username"),
|
||||
ssh_pkey=env_vars.get("ssh_pkey"),
|
||||
ssh_private_key_password=env_vars.get("ssh_private_key_password"),
|
||||
remote_bind_address=("127.0.0.1", _port),
|
||||
ssh_proxy_enabled=True,
|
||||
ssh_proxy=(_host, 22)
|
||||
)
|
||||
try:
|
||||
tunnel.start()
|
||||
|
@ -317,7 +264,7 @@ def transfer(request_event):
|
|||
logger.exception("Couldn't establish connection to (%s, 22)", _host)
|
||||
else:
|
||||
vm.handle.command(
|
||||
"migrate", uri="tcp:{}:{}".format(_host, tunnel.local_bind_port)
|
||||
"migrate", uri="tcp:0.0.0.0:{}".format(tunnel.local_bind_port)
|
||||
)
|
||||
|
||||
status = vm.handle.command("query-migrate")["status"]
|
||||
|
@ -340,38 +287,22 @@ def transfer(request_event):
|
|||
tunnel.close()
|
||||
|
||||
|
||||
def init_migration(vm_entry, destination_host_key):
|
||||
# This function would run on destination host i.e host on which the vm
|
||||
# would be transferred after migration.
|
||||
# This host would be responsible for starting VM that would receive
|
||||
# state of VM running on source host.
|
||||
|
||||
_vm = get_vm(running_vms, vm_entry.key)
|
||||
|
||||
if _vm:
|
||||
# VM already running. No need to proceed further.
|
||||
logger.info("%s Already running", _vm.key)
|
||||
return
|
||||
|
||||
launch_vm(vm_entry, migration=True, migration_port=4444,
|
||||
destination_host_key=destination_host_key)
|
||||
|
||||
|
||||
def launch_vm(vm_entry, migration=False, migration_port=None, destination_host_key=None):
|
||||
logger.info("Starting %s", vm_entry.key)
|
||||
|
||||
vm = create_vm_object(vm_entry, migration=migration, migration_port=migration_port)
|
||||
try:
|
||||
vm.handle.launch()
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
except Exception:
|
||||
logger.exception("Error Occured while starting VM")
|
||||
vm.handle.shutdown()
|
||||
|
||||
if migration:
|
||||
# We don't care whether MachineError or any other error occurred
|
||||
vm.handle.shutdown()
|
||||
pass
|
||||
else:
|
||||
# Error during typical launch of a vm
|
||||
vm_entry.add_log("Error Occurred while starting VM")
|
||||
vm.handle.shutdown()
|
||||
vm_entry.declare_killed()
|
||||
vm_pool.put(vm_entry)
|
||||
else:
|
||||
|
@ -383,7 +314,7 @@ def launch_vm(vm_entry, migration=False, migration_port=None, destination_host_k
|
|||
r = RequestEntry.from_scratch(
|
||||
type=RequestType.TransferVM,
|
||||
hostname=vm_entry.hostname,
|
||||
parameters={"host": get_ipv4_address(), "port": 4444},
|
||||
parameters={"host": get_ipv6_address(), "port": migration_port},
|
||||
uuid=vm_entry.uuid,
|
||||
destination_host_key=destination_host_key,
|
||||
request_prefix=env_vars.get("REQUEST_PREFIX")
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from config import etcd_client, env_vars
|
||||
from os.path import join as join_path
|
||||
from config import etcd_client, env_vars, image_storage_handler
|
||||
from imagescanner import logger
|
||||
|
||||
|
||||
|
@ -20,20 +20,6 @@ def qemu_img_type(path):
|
|||
|
||||
|
||||
def main():
|
||||
# If you are using env_vars.get('WITHOUT_CEPH') FLAG in .env
|
||||
# then please make sure that env_vars.get('IMAGE_DIR') directory
|
||||
# exists otherwise this script would fail
|
||||
if env_vars.get('WITHOUT_CEPH') and not os.path.isdir(env_vars.get('IMAGE_DIR')):
|
||||
print("You have set env_vars.get('WITHOUT_CEPH') to True. So,"
|
||||
"the {} must exists. But, it don't".format(env_vars.get('IMAGE_DIR')))
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
subprocess.check_output(['which', 'qemu-img'])
|
||||
except Exception:
|
||||
print("qemu-img missing")
|
||||
sys.exit(1)
|
||||
|
||||
# We want to get images entries that requests images to be created
|
||||
images = etcd_client.get_prefix(env_vars.get('IMAGE_PREFIX'), value_in_json=True)
|
||||
images_to_be_created = list(filter(lambda im: im.value['status'] == 'TO_BE_CREATED', images))
|
||||
|
@ -44,7 +30,7 @@ def main():
|
|||
image_owner = image.value['owner']
|
||||
image_filename = image.value['filename']
|
||||
image_store_name = image.value['store_name']
|
||||
image_full_path = os.path.join(env_vars.get('BASE_DIR'), image_owner, image_filename)
|
||||
image_full_path = join_path(env_vars.get('BASE_DIR'), image_owner, image_filename)
|
||||
|
||||
image_stores = etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True)
|
||||
user_image_store = next(filter(
|
||||
|
@ -58,43 +44,25 @@ def main():
|
|||
logger.exception(e)
|
||||
else:
|
||||
# At least our basic data is available
|
||||
|
||||
qemu_img_convert_command = ["qemu-img", "convert", "-f", "qcow2",
|
||||
"-O", "raw", image_full_path, "image.raw"]
|
||||
|
||||
if env_vars.get('WITHOUT_CEPH'):
|
||||
image_import_command = ["mv", "image.raw", os.path.join(env_vars.get('IMAGE_DIR'), image_uuid)]
|
||||
snapshot_creation_command = ["true"]
|
||||
snapshot_protect_command = ["true"]
|
||||
else:
|
||||
image_import_command = ["rbd", "import", "image.raw",
|
||||
"{}/{}".format(image_store_pool, image_uuid)]
|
||||
snapshot_creation_command = ["rbd", "snap", "create",
|
||||
"{}/{}@protected".format(image_store_pool, image_uuid)]
|
||||
snapshot_protect_command = ["rbd", "snap", "protect",
|
||||
"{}/{}@protected".format(image_store_pool, image_uuid)]
|
||||
|
||||
# First check whether the image is qcow2
|
||||
|
||||
if qemu_img_type(image_full_path) == "qcow2":
|
||||
try:
|
||||
# Convert .qcow2 to .raw
|
||||
subprocess.check_output(qemu_img_convert_command)
|
||||
|
||||
# Import image either to ceph/filesystem
|
||||
subprocess.check_output(image_import_command)
|
||||
|
||||
# Create and Protect Snapshot
|
||||
subprocess.check_output(snapshot_creation_command)
|
||||
subprocess.check_output(snapshot_protect_command)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
|
||||
else:
|
||||
# Everything is successfully done
|
||||
image.value["status"] = "CREATED"
|
||||
etcd_client.put(image.key, json.dumps(image.value))
|
||||
# Import and Protect
|
||||
r_status = image_storage_handler.import_image(src="image.raw",
|
||||
dest=image_uuid,
|
||||
protect=True)
|
||||
if r_status:
|
||||
# Everything is successfully done
|
||||
image.value["status"] = "CREATED"
|
||||
etcd_client.put(image.key, json.dumps(image.value))
|
||||
|
||||
else:
|
||||
# The user provided image is either not found or of invalid format
|
||||
image.value["status"] = "INVALID_IMAGE"
|
||||
|
|
33
sanity_checks.py
Normal file
33
sanity_checks.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
import sys
|
||||
import subprocess as sp
|
||||
|
||||
from os.path import isdir
|
||||
from config import env_vars
|
||||
|
||||
|
||||
def check():
|
||||
#########################
|
||||
# ucloud-image-scanner #
|
||||
#########################
|
||||
if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('IMAGE_DIR')):
|
||||
print("You have set STORAGE_BACKEND to filesystem. So,"
|
||||
"the {} must exists. But, it don't".format(env_vars.get('IMAGE_DIR')))
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
sp.check_output(['which', 'qemu-img'])
|
||||
except Exception:
|
||||
print("qemu-img missing")
|
||||
sys.exit(1)
|
||||
|
||||
###############
|
||||
# ucloud-host #
|
||||
###############
|
||||
|
||||
if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('VM_DIR')):
|
||||
print("You have set STORAGE_BACKEND to filesystem. So, the vm directory mentioned"
|
||||
" in .env file must exists. But, it don't.")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
check()
|
|
@ -23,16 +23,16 @@ def remaining_resources(host_specs, vms_specs):
|
|||
|
||||
for component in _vms_specs:
|
||||
if isinstance(_vms_specs[component], str):
|
||||
_vms_specs[component] = int(bitmath.parse_string(_vms_specs[component]).to_MB())
|
||||
_vms_specs[component] = int(bitmath.parse_string_unsafe(_vms_specs[component]).to_MB())
|
||||
elif isinstance(_vms_specs[component], list):
|
||||
_vms_specs[component] = map(lambda x: int(bitmath.parse_string(x).to_MB()), _vms_specs[component])
|
||||
_vms_specs[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _vms_specs[component])
|
||||
_vms_specs[component] = reduce(lambda x, y: x + y, _vms_specs[component], 0)
|
||||
|
||||
for component in _remaining:
|
||||
if isinstance(_remaining[component], str):
|
||||
_remaining[component] = int(bitmath.parse_string(_remaining[component]).to_MB())
|
||||
_remaining[component] = int(bitmath.parse_string_unsafe(_remaining[component]).to_MB())
|
||||
elif isinstance(_remaining[component], list):
|
||||
_remaining[component] = map(lambda x: int(bitmath.parse_string(x).to_MB()), _remaining[component])
|
||||
_remaining[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _remaining[component])
|
||||
_remaining[component] = reduce(lambda x, y: x + y, _remaining[component], 0)
|
||||
|
||||
_remaining.subtract(_vms_specs)
|
||||
|
|
|
@ -23,8 +23,6 @@ def main():
|
|||
]:
|
||||
for request_event in request_iterator:
|
||||
request_entry = RequestEntry(request_event)
|
||||
logger.debug("%s, %s", request_entry.key, request_entry.value)
|
||||
|
||||
# Never Run time critical mechanism inside timeout
|
||||
# mechanism because timeout mechanism only comes
|
||||
# when no other event is happening. It means under
|
||||
|
@ -33,10 +31,10 @@ def main():
|
|||
|
||||
# Detect hosts that are dead and set their status
|
||||
# to "DEAD", and their VMs' status to "KILLED"
|
||||
logger.debug("TIMEOUT event occured")
|
||||
dead_hosts = dead_host_detection()
|
||||
logger.debug("Dead hosts: %s", dead_hosts)
|
||||
dead_host_mitigation(dead_hosts)
|
||||
if dead_hosts:
|
||||
logger.debug("Dead hosts: %s", dead_hosts)
|
||||
dead_host_mitigation(dead_hosts)
|
||||
|
||||
# If there are VMs that weren't assigned a host
|
||||
# because there wasn't a host available which
|
||||
|
@ -52,6 +50,8 @@ def main():
|
|||
request_pool.put(r)
|
||||
|
||||
elif request_entry.type == RequestType.ScheduleVM:
|
||||
logger.debug("%s, %s", request_entry.key, request_entry.value)
|
||||
|
||||
vm_entry = vm_pool.get(request_entry.uuid)
|
||||
if vm_entry is None:
|
||||
logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid))
|
||||
|
@ -67,7 +67,7 @@ def main():
|
|||
hosts=[host_pool.get(request_entry.destination)])
|
||||
except NoSuitableHostFound:
|
||||
logger.info("Requested destination host doesn't have enough capacity"
|
||||
"to hold %s", vm_entry.uuid)
|
||||
"to hold %s" % vm_entry.uuid)
|
||||
else:
|
||||
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
|
||||
uuid=request_entry.uuid,
|
||||
|
|
47
ucloud.py
47
ucloud.py
|
@ -3,6 +3,7 @@ import multiprocessing as mp
|
|||
import logging
|
||||
|
||||
from os.path import join as join_path
|
||||
from sanity_checks import check
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_parser = argparse.ArgumentParser(prog='ucloud',
|
||||
|
@ -21,30 +22,36 @@ if __name__ == "__main__":
|
|||
format="%(name)s %(asctime)s: %(levelname)s - %(message)s",
|
||||
datefmt="%d-%b-%y %H:%M:%S",
|
||||
)
|
||||
try:
|
||||
check()
|
||||
|
||||
if args.component == 'api':
|
||||
from api.main import main
|
||||
if args.component == 'api':
|
||||
from api.main import main
|
||||
|
||||
main()
|
||||
elif args.component == 'host':
|
||||
from host.main import main
|
||||
main()
|
||||
elif args.component == 'host':
|
||||
from host.main import main
|
||||
|
||||
hostname = args.component_args
|
||||
mp.set_start_method('spawn')
|
||||
main(*hostname)
|
||||
elif args.component == 'scheduler':
|
||||
from scheduler.main import main
|
||||
hostname = args.component_args
|
||||
mp.set_start_method('spawn')
|
||||
main(*hostname)
|
||||
elif args.component == 'scheduler':
|
||||
from scheduler.main import main
|
||||
|
||||
main()
|
||||
elif args.component == 'filescanner':
|
||||
from filescanner.main import main
|
||||
main()
|
||||
elif args.component == 'filescanner':
|
||||
from filescanner.main import main
|
||||
|
||||
main()
|
||||
elif args.component == 'imagescanner':
|
||||
from imagescanner.main import main
|
||||
main()
|
||||
elif args.component == 'imagescanner':
|
||||
from imagescanner.main import main
|
||||
|
||||
main()
|
||||
elif args.component == 'metadata':
|
||||
from metadata.main import main
|
||||
main()
|
||||
elif args.component == 'metadata':
|
||||
from metadata.main import main
|
||||
|
||||
main()
|
||||
main()
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
print(e)
|
Loading…
Reference in a new issue