Efforts to make ucloud a python package

This commit is contained in:
ahmadbilalkhalid 2019-12-03 15:40:41 +05:00
commit 1e7300b56e
71 changed files with 241 additions and 1043 deletions

0
ucloud/__init__.py Normal file
View file

12
ucloud/api/README.md Executable file
View file

@ -0,0 +1,12 @@
# ucloud-api
[![Project Status: WIP Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
## Installation
**Make sure you have Python >= 3.5 and Pipenv installed.**
1. Clone the repository and `cd` into it.
2. Run the following commands
- `pipenv install`
- `pipenv shell`
- `python main.py`

3
ucloud/api/__init__.py Normal file
View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

53
ucloud/api/common_fields.py Executable file
View file

@ -0,0 +1,53 @@
import os
from ucloud.config import etcd_client, env_vars
class Optional:
pass
class Field:
def __init__(self, _name, _type, _value=None):
self.name = _name
self.value = _value
self.type = _type
self.__errors = []
def validation(self):
return True
def is_valid(self):
if self.value == KeyError:
self.add_error("'{}' field is a required field".format(self.name))
else:
if isinstance(self.value, Optional):
pass
elif not isinstance(self.value, self.type):
self.add_error("Incorrect Type for '{}' field".format(self.name))
else:
self.validation()
if self.__errors:
return False
return True
def get_errors(self):
return self.__errors
def add_error(self, error):
self.__errors.append(error)
class VmUUIDField(Field):
def __init__(self, data):
self.uuid = data.get("uuid", KeyError)
super().__init__("uuid", str, self.uuid)
self.validation = self.vm_uuid_validation
def vm_uuid_validation(self):
r = etcd_client.get(os.path.join(env_vars.get('VM_PREFIX'), self.uuid))
if not r:
self.add_error("VM with uuid {} does not exists".format(self.uuid))

View file

@ -0,0 +1,15 @@
import json
import os
from uuid import uuid4
from ucloud.config import etcd_client, env_vars
data = {
"is_public": True,
"type": "ceph",
"name": "images",
"description": "first ever public image-store",
"attributes": {"list": [], "key": [], "pool": "images"},
}
etcd_client.put(os.path.join(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data))

164
ucloud/api/helper.py Executable file
View file

@ -0,0 +1,164 @@
import binascii
import ipaddress
import random
import subprocess as sp
import requests
from pyotp import TOTP
from ucloud.config import vm_pool, env_vars
def check_otp(name, realm, token):
try:
data = {
"auth_name": env_vars.get("AUTH_NAME"),
"auth_token": TOTP(env_vars.get("AUTH_SEED")).now(),
"auth_realm": env_vars.get("AUTH_REALM"),
"name": name,
"realm": realm,
"token": token,
}
except binascii.Error:
return 400
response = requests.post(
"{OTP_SERVER}{OTP_VERIFY_ENDPOINT}".format(
OTP_SERVER=env_vars.get("OTP_SERVER", ""),
OTP_VERIFY_ENDPOINT=env_vars.get("OTP_VERIFY_ENDPOINT", "verify/"),
),
json=data,
)
return response.status_code
def resolve_vm_name(name, owner):
"""Return UUID of Virtual Machine of name == name and owner == owner
Input: name of vm, owner of vm.
Output: uuid of vm if found otherwise None
"""
result = next(
filter(
lambda vm: vm.value["owner"] == owner and vm.value["name"] == name,
vm_pool.vms,
),
None,
)
if result:
return result.key.split("/")[-1]
return None
def resolve_image_name(name, etcd_client):
"""Return image uuid given its name and its store
* If the provided name is not in correct format
i.e {store_name}:{image_name} return ValueError
* If no such image found then return KeyError
"""
seperator = ":"
# Ensure, user/program passed valid name that is of type string
try:
store_name_and_image_name = name.split(seperator)
"""
Examples, where it would work and where it would raise exception
"images:alpine" --> ["images", "alpine"]
"images" --> ["images"] it would raise Exception as non enough value to unpack
"images:alpine:meow" --> ["images", "alpine", "meow"] it would raise Exception
as too many values to unpack
"""
store_name, image_name = store_name_and_image_name
except Exception:
raise ValueError("Image name not in correct format i.e {store_name}:{image_name}")
images = etcd_client.get_prefix(env_vars.get('IMAGE_PREFIX'), value_in_json=True)
# Try to find image with name == image_name and store_name == store_name
try:
image = next(filter(lambda im: im.value['name'] == image_name
and im.value['store_name'] == store_name, images))
except StopIteration:
raise KeyError("No image with name {} found.".format(name))
else:
image_uuid = image.key.split('/')[-1]
return image_uuid
def random_bytes(num=6):
return [random.randrange(256) for _ in range(num)]
def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt='%02x'):
mac = random_bytes()
if oui:
if type(oui) == str:
oui = [int(chunk) for chunk in oui.split(separator)]
mac = oui + random_bytes(num=6 - len(oui))
else:
if multicast:
mac[0] |= 1 # set bit 0
else:
mac[0] &= ~1 # clear bit 0
if uaa:
mac[0] &= ~(1 << 1) # clear bit 1
else:
mac[0] |= 1 << 1 # set bit 1
return separator.join(byte_fmt % b for b in mac)
def get_ip_addr(mac_address, device):
"""Return IP address of a device provided its mac address / link local address
and the device with which it is connected.
For Example, if we call get_ip_addr(mac_address="52:54:00:12:34:56", device="br0")
the following two scenarios can happen
1. It would return None if we can't be able to find device whose mac_address is equal
to the arg:mac_address or the mentioned arg:device does not exists or the ip address
we found is local.
2. It would return ip_address of device whose mac_address is equal to arg:mac_address
and is connected/neighbor of arg:device
"""
try:
output = sp.check_output(['ip', '-6', 'neigh', 'show', 'dev', device], stderr=sp.PIPE)
except sp.CalledProcessError:
return None
else:
result = []
output = output.strip().decode("utf-8")
output = output.split("\n")
for entry in output:
entry = entry.split()
if entry:
ip = ipaddress.ip_address(entry[0])
mac = entry[2]
if ip.is_global and mac_address == mac:
result.append(ip)
return result
def mac2ipv6(mac, prefix):
# only accept MACs separated by a colon
parts = mac.split(":")
# modify parts to match IPv6 value
parts.insert(3, "ff")
parts.insert(4, "fe")
parts[0] = "%x" % (int(parts[0], 16) ^ 2)
# format output
ipv6_parts = [str(0)] * 4
for i in range(0, len(parts), 2):
ipv6_parts.append("".join(parts[i:i + 2]))
lower_part = ipaddress.IPv6Address(":".join(ipv6_parts))
prefix = ipaddress.IPv6Address(prefix)
return str(prefix + int(lower_part))

517
ucloud/api/main.py Normal file
View file

@ -0,0 +1,517 @@
import json
import pynetbox
from uuid import uuid4
from os.path import join as join_path
from flask import Flask, request
from flask_restful import Resource, Api
from ucloud.common import counters
from ucloud.common.vm import VMStatus
from ucloud.common.request import RequestEntry, RequestType
from ucloud.config import (etcd_client, request_pool, vm_pool, host_pool, env_vars, image_storage_handler)
from . import schemas
from .helper import generate_mac, mac2ipv6
from ucloud.api import logger
app = Flask(__name__)
api = Api(app)
class CreateVM(Resource):
"""API Request to Handle Creation of VM"""
@staticmethod
def post():
data = request.json
validator = schemas.CreateVMSchema(data)
if validator.is_valid():
vm_uuid = uuid4().hex
vm_key = join_path(env_vars.get("VM_PREFIX"), vm_uuid)
specs = {
"cpu": validator.specs["cpu"],
"ram": validator.specs["ram"],
"os-ssd": validator.specs["os-ssd"],
"hdd": validator.specs["hdd"],
}
macs = [generate_mac() for _ in range(len(data["network"]))]
tap_ids = [counters.increment_etcd_counter(etcd_client, "/v1/counter/tap")
for _ in range(len(data["network"]))]
vm_entry = {
"name": data["vm_name"],
"owner": data["name"],
"owner_realm": data["realm"],
"specs": specs,
"hostname": "",
"status": VMStatus.stopped,
"image_uuid": validator.image_uuid,
"log": [],
"vnc_socket": "",
"network": list(zip(data["network"], macs, tap_ids)),
"metadata": {"ssh-keys": []},
}
etcd_client.put(vm_key, vm_entry, value_in_json=True)
# Create ScheduleVM Request
r = RequestEntry.from_scratch(
type=RequestType.ScheduleVM, uuid=vm_uuid,
request_prefix=env_vars.get("REQUEST_PREFIX")
)
request_pool.put(r)
return {"message": "VM Creation Queued"}, 200
return validator.get_errors(), 400
class VmStatus(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.VMStatusSchema(data)
if validator.is_valid():
vm = vm_pool.get(
join_path(env_vars.get("VM_PREFIX"), data["uuid"])
)
vm_value = vm.value.copy()
vm_value["ip"] = []
for network_mac_and_tap in vm.network:
network_name, mac, tap = network_mac_and_tap
network = etcd_client.get(
join_path(
env_vars.get("NETWORK_PREFIX"),
data["name"],
network_name,
),
value_in_json=True,
)
ipv6_addr = network.value.get("ipv6").split("::")[0] + "::"
vm_value["ip"].append(mac2ipv6(mac, ipv6_addr))
vm.value = vm_value
return vm.value
else:
return validator.get_errors(), 400
class CreateImage(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.CreateImageSchema(data)
if validator.is_valid():
file_entry = etcd_client.get(
join_path(env_vars.get("FILE_PREFIX"), data["uuid"])
)
file_entry_value = json.loads(file_entry.value)
image_entry_json = {
"status": "TO_BE_CREATED",
"owner": file_entry_value["owner"],
"filename": file_entry_value["filename"],
"name": data["name"],
"store_name": data["image_store"],
"visibility": "public",
}
etcd_client.put(
join_path(env_vars.get("IMAGE_PREFIX"), data["uuid"]),
json.dumps(image_entry_json),
)
return {"message": "Image queued for creation."}
return validator.get_errors(), 400
class ListPublicImages(Resource):
@staticmethod
def get():
images = etcd_client.get_prefix(
env_vars.get("IMAGE_PREFIX"), value_in_json=True
)
r = {
"images": []
}
for image in images:
image_key = "{}:{}".format(
image.value["store_name"], image.value["name"]
)
r["images"].append(
{"name": image_key, "status": image.value["status"]}
)
return r, 200
class VMAction(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.VmActionSchema(data)
if validator.is_valid():
vm_entry = vm_pool.get(
join_path(env_vars.get("VM_PREFIX"), data["uuid"])
)
action = data["action"]
if action == "start":
action = "schedule"
if action == "delete" and vm_entry.hostname == "":
if image_storage_handler.is_vm_image_exists(vm_entry.uuid):
r_status = image_storage_handler.delete_vm_image(vm_entry.uuid)
if r_status:
etcd_client.client.delete(vm_entry.key)
return {"message": "VM successfully deleted"}
else:
logger.error("Some Error Occurred while deleting VM")
return {"message": "VM deletion unsuccessfull"}
else:
etcd_client.client.delete(vm_entry.key)
return {"message": "VM successfully deleted"}
r = RequestEntry.from_scratch(
type="{}VM".format(action.title()),
uuid=data["uuid"],
hostname=vm_entry.hostname,
request_prefix=env_vars.get("REQUEST_PREFIX")
)
request_pool.put(r)
return {"message": "VM {} Queued".format(action.title())}, 200
else:
return validator.get_errors(), 400
class VMMigration(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.VmMigrationSchema(data)
if validator.is_valid():
vm = vm_pool.get(data["uuid"])
r = RequestEntry.from_scratch(
type=RequestType.ScheduleVM,
uuid=vm.uuid,
destination=join_path(
env_vars.get("HOST_PREFIX"), validator.destination.value
),
migration=True,
request_prefix=env_vars.get("REQUEST_PREFIX")
)
request_pool.put(r)
return {"message": "VM Migration Initialization Queued"}, 200
else:
return validator.get_errors(), 400
class ListUserVM(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.OTPSchema(data)
if validator.is_valid():
vms = etcd_client.get_prefix(
env_vars.get("VM_PREFIX"), value_in_json=True
)
return_vms = []
user_vms = filter(lambda v: v.value["owner"] == data["name"], vms)
for vm in user_vms:
return_vms.append(
{
"name": vm.value["name"],
"vm_uuid": vm.key.split("/")[-1],
"specs": vm.value["specs"],
"status": vm.value["status"],
"hostname": vm.value["hostname"],
# "mac": vm.value["mac"],
"vnc_socket": None
if vm.value.get("vnc_socket", None) is None
else vm.value["vnc_socket"],
}
)
if return_vms:
return {"message": return_vms}, 200
return {"message": "No VM found"}, 404
else:
return validator.get_errors(), 400
class ListUserFiles(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.OTPSchema(data)
if validator.is_valid():
files = etcd_client.get_prefix(
env_vars.get("FILE_PREFIX"), value_in_json=True
)
return_files = []
user_files = list(
filter(lambda f: f.value["owner"] == data["name"], files)
)
for file in user_files:
return_files.append(
{
"filename": file.value["filename"],
"uuid": file.key.split("/")[-1],
}
)
return {"message": return_files}, 200
else:
return validator.get_errors(), 400
class CreateHost(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.CreateHostSchema(data)
if validator.is_valid():
host_key = join_path(env_vars.get("HOST_PREFIX"), uuid4().hex)
host_entry = {
"specs": data["specs"],
"hostname": data["hostname"],
"status": "DEAD",
"last_heartbeat": "",
}
etcd_client.put(host_key, host_entry, value_in_json=True)
return {"message": "Host Created"}, 200
return validator.get_errors(), 400
class ListHost(Resource):
@staticmethod
def get():
hosts = host_pool.hosts
r = {
host.key: {
"status": host.status,
"specs": host.specs,
"hostname": host.hostname,
}
for host in hosts
}
return r, 200
class GetSSHKeys(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.GetSSHSchema(data)
if validator.is_valid():
if not validator.key_name.value:
# {user_prefix}/{realm}/{name}/key/
etcd_key = join_path(
env_vars.get('USER_PREFIX'),
data["realm"],
data["name"],
"key",
)
etcd_entry = etcd_client.get_prefix(
etcd_key, value_in_json=True
)
keys = {
key.key.split("/")[-1]: key.value for key in etcd_entry
}
return {"keys": keys}
else:
# {user_prefix}/{realm}/{name}/key/{key_name}
etcd_key = join_path(
env_vars.get('USER_PREFIX'),
data["realm"],
data["name"],
"key",
data["key_name"],
)
etcd_entry = etcd_client.get(etcd_key, value_in_json=True)
if etcd_entry:
return {
"keys": {
etcd_entry.key.split("/")[-1]: etcd_entry.value
}
}
else:
return {"keys": {}}
else:
return validator.get_errors(), 400
class AddSSHKey(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.AddSSHSchema(data)
if validator.is_valid():
# {user_prefix}/{realm}/{name}/key/{key_name}
etcd_key = join_path(
env_vars.get("USER_PREFIX"),
data["realm"],
data["name"],
"key",
data["key_name"],
)
etcd_entry = etcd_client.get(etcd_key, value_in_json=True)
if etcd_entry:
return {
"message": "Key with name '{}' already exists".format(
data["key_name"]
)
}
else:
# Key Not Found. It implies user' haven't added any key yet.
etcd_client.put(etcd_key, data["key"], value_in_json=True)
return {"message": "Key added successfully"}
else:
return validator.get_errors(), 400
class RemoveSSHKey(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.RemoveSSHSchema(data)
if validator.is_valid():
# {user_prefix}/{realm}/{name}/key/{key_name}
etcd_key = join_path(
env_vars.get("USER_PREFIX"),
data["realm"],
data["name"],
"key",
data["key_name"],
)
etcd_entry = etcd_client.get(etcd_key, value_in_json=True)
if etcd_entry:
etcd_client.client.delete(etcd_key)
return {"message": "Key successfully removed."}
else:
return {
"message": "No Key with name '{}' Exists at all.".format(
data["key_name"]
)
}
else:
return validator.get_errors(), 400
class CreateNetwork(Resource):
@staticmethod
def post():
data = request.json
validator = schemas.CreateNetwork(data)
if validator.is_valid():
network_entry = {
"id": counters.increment_etcd_counter(
etcd_client, "/v1/counter/vxlan"
),
"type": data["type"],
}
if validator.user.value:
nb = pynetbox.api(
url=env_vars.get("NETBOX_URL"),
token=env_vars.get("NETBOX_TOKEN"),
)
nb_prefix = nb.ipam.prefixes.get(
prefix=env_vars.get("PREFIX")
)
prefix = nb_prefix.available_prefixes.create(
data={
"prefix_length": env_vars.get(
"PREFIX_LENGTH", cast=int
),
"description": '{}\'s network "{}"'.format(
data["name"], data["network_name"]
),
"is_pool": True,
}
)
network_entry["ipv6"] = prefix["prefix"]
else:
network_entry["ipv6"] = "fd00::/64"
network_key = join_path(
env_vars.get("NETWORK_PREFIX"),
data["name"],
data["network_name"],
)
etcd_client.put(network_key, network_entry, value_in_json=True)
return {"message": "Network successfully added."}
else:
return validator.get_errors(), 400
class ListUserNetwork(Resource):
@staticmethod
def get():
data = request.json
validator = schemas.OTPSchema(data)
if validator.is_valid():
prefix = join_path(
env_vars.get("NETWORK_PREFIX"), data["name"]
)
networks = etcd_client.get_prefix(prefix, value_in_json=True)
user_networks = []
for net in networks:
net.value["name"] = net.key.split("/")[-1]
user_networks.append(net.value)
return {"networks": user_networks}, 200
else:
return validator.get_errors(), 400
api.add_resource(CreateVM, "/vm/create")
api.add_resource(VmStatus, "/vm/status")
api.add_resource(VMAction, "/vm/action")
api.add_resource(VMMigration, "/vm/migrate")
api.add_resource(CreateImage, "/image/create")
api.add_resource(ListPublicImages, "/image/list-public")
api.add_resource(ListUserVM, "/user/vms")
api.add_resource(ListUserFiles, "/user/files")
api.add_resource(ListUserNetwork, "/user/networks")
api.add_resource(AddSSHKey, "/user/add-ssh")
api.add_resource(RemoveSSHKey, "/user/remove-ssh")
api.add_resource(GetSSHKeys, "/user/get-ssh")
api.add_resource(CreateHost, "/host/create")
api.add_resource(ListHost, "/host/list")
api.add_resource(CreateNetwork, "/network/create")
def main():
image_stores = list(etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True))
if len(image_stores) == 0:
data = {
"is_public": True,
"type": "ceph",
"name": "images",
"description": "first ever public image-store",
"attributes": {"list": [], "key": [], "pool": "images"},
}
etcd_client.put(join_path(env_vars.get('IMAGE_STORE_PREFIX'), uuid4().hex), json.dumps(data))
app.run(host="::", debug=True)
if __name__ == "__main__":
main()

456
ucloud/api/schemas.py Executable file
View file

@ -0,0 +1,456 @@
"""
This module contain classes thats validates and intercept/modify
data coming from ucloud-cli (user)
It was primarily developed as an alternative to argument parser
of Flask_Restful which is going to be deprecated. I also tried
marshmallow for that purpose but it was an overkill (because it
do validation + serialization + deserialization) and little
inflexible for our purpose.
"""
# TODO: Fix error message when user's mentioned VM (referred by name)
# does not exists.
#
# Currently, it says uuid is a required field.
import json
import os
import bitmath
from ucloud.common.host import HostStatus
from ucloud.common.vm import VMStatus
from ucloud.config import etcd_client, env_vars, vm_pool, host_pool
from . import helper
from .common_fields import Field, VmUUIDField
from .helper import check_otp, resolve_vm_name
class BaseSchema:
def __init__(self, data, fields=None):
_ = data # suppress linter warning
self.__errors = []
if fields is None:
self.fields = []
else:
self.fields = fields
def validation(self):
# custom validation is optional
return True
def is_valid(self):
for field in self.fields:
field.is_valid()
self.add_field_errors(field)
for parent in self.__class__.__bases__:
try:
parent.validation(self)
except AttributeError:
pass
if not self.__errors:
self.validation()
if self.__errors:
return False
return True
def get_errors(self):
return {"message": self.__errors}
def add_field_errors(self, field: Field):
self.__errors += field.get_errors()
def add_error(self, error):
self.__errors.append(error)
class OTPSchema(BaseSchema):
def __init__(self, data: dict, fields=None):
self.name = Field("name", str, data.get("name", KeyError))
self.realm = Field("realm", str, data.get("realm", KeyError))
self.token = Field("token", str, data.get("token", KeyError))
_fields = [self.name, self.realm, self.token]
if fields:
_fields += fields
super().__init__(data=data, fields=_fields)
def validation(self):
if check_otp(self.name.value, self.realm.value, self.token.value) != 200:
self.add_error("Wrong Credentials")
########################## Image Operations ###############################################
class CreateImageSchema(BaseSchema):
def __init__(self, data):
# Fields
self.uuid = Field("uuid", str, data.get("uuid", KeyError))
self.name = Field("name", str, data.get("name", KeyError))
self.image_store = Field("image_store", str, data.get("image_store", KeyError))
# Validations
self.uuid.validation = self.file_uuid_validation
self.image_store.validation = self.image_store_name_validation
# All Fields
fields = [self.uuid, self.name, self.image_store]
super().__init__(data, fields)
def file_uuid_validation(self):
file_entry = etcd_client.get(os.path.join(env_vars.get('FILE_PREFIX'), self.uuid.value))
if file_entry is None:
self.add_error(
"Image File with uuid '{}' Not Found".format(self.uuid.value)
)
def image_store_name_validation(self):
image_stores = list(etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX')))
image_store = next(
filter(
lambda s: json.loads(s.value)["name"] == self.image_store.value,
image_stores,
),
None,
)
if not image_store:
self.add_error("Store '{}' does not exists".format(self.image_store.value))
# Host Operations
class CreateHostSchema(OTPSchema):
def __init__(self, data):
self.parsed_specs = {}
# Fields
self.specs = Field("specs", dict, data.get("specs", KeyError))
self.hostname = Field("hostname", str, data.get("hostname", KeyError))
# Validation
self.specs.validation = self.specs_validation
fields = [self.hostname, self.specs]
super().__init__(data=data, fields=fields)
def specs_validation(self):
ALLOWED_BASE = 10
_cpu = self.specs.value.get('cpu', KeyError)
_ram = self.specs.value.get('ram', KeyError)
_os_ssd = self.specs.value.get('os-ssd', KeyError)
_hdd = self.specs.value.get('hdd', KeyError)
if KeyError in [_cpu, _ram, _os_ssd, _hdd]:
self.add_error("You must specify CPU, RAM and OS-SSD in your specs")
return None
try:
parsed_ram = bitmath.parse_string_unsafe(_ram)
parsed_os_ssd = bitmath.parse_string_unsafe(_os_ssd)
if parsed_ram.base != ALLOWED_BASE:
self.add_error("Your specified RAM is not in correct units")
if parsed_os_ssd.base != ALLOWED_BASE:
self.add_error("Your specified OS-SSD is not in correct units")
if _cpu < 1:
self.add_error("CPU must be atleast 1")
if parsed_ram < bitmath.GB(1):
self.add_error("RAM must be atleast 1 GB")
if parsed_os_ssd < bitmath.GB(10):
self.add_error("OS-SSD must be atleast 10 GB")
parsed_hdd = []
for hdd in _hdd:
_parsed_hdd = bitmath.parse_string_unsafe(hdd)
if _parsed_hdd.base != ALLOWED_BASE:
self.add_error("Your specified HDD is not in correct units")
break
else:
parsed_hdd.append(str(_parsed_hdd))
except ValueError:
# TODO: Find some good error message
self.add_error("Specs are not correct.")
else:
if self.get_errors():
self.specs = {
'cpu': _cpu,
'ram': str(parsed_ram),
'os-ssd': str(parsed_os_ssd),
'hdd': parsed_hdd
}
def validation(self):
if self.realm.value != "ungleich-admin":
self.add_error("Invalid Credentials/Insufficient Permission")
# VM Operations
class CreateVMSchema(OTPSchema):
def __init__(self, data):
self.parsed_specs = {}
# Fields
self.specs = Field("specs", dict, data.get("specs", KeyError))
self.vm_name = Field("vm_name", str, data.get("vm_name", KeyError))
self.image = Field("image", str, data.get("image", KeyError))
self.network = Field("network", list, data.get("network", KeyError))
# Validation
self.image.validation = self.image_validation
self.vm_name.validation = self.vm_name_validation
self.specs.validation = self.specs_validation
self.network.validation = self.network_validation
fields = [self.vm_name, self.image, self.specs, self.network]
super().__init__(data=data, fields=fields)
def image_validation(self):
try:
image_uuid = helper.resolve_image_name(self.image.value, etcd_client)
except Exception as e:
self.add_error(str(e))
else:
self.image_uuid = image_uuid
def vm_name_validation(self):
if resolve_vm_name(name=self.vm_name.value, owner=self.name.value):
self.add_error(
'VM with same name "{}" already exists'.format(self.vm_name.value)
)
def network_validation(self):
_network = self.network.value
if _network:
for net in _network:
network = etcd_client.get(os.path.join(env_vars.get('NETWORK_PREFIX'),
self.name.value,
net), value_in_json=True)
if not network:
self.add_error("Network with name {} does not exists" \
.format(net))
def specs_validation(self):
ALLOWED_BASE = 10
_cpu = self.specs.value.get('cpu', KeyError)
_ram = self.specs.value.get('ram', KeyError)
_os_ssd = self.specs.value.get('os-ssd', KeyError)
_hdd = self.specs.value.get('hdd', KeyError)
if KeyError in [_cpu, _ram, _os_ssd, _hdd]:
self.add_error("You must specify CPU, RAM and OS-SSD in your specs")
return None
try:
parsed_ram = bitmath.parse_string_unsafe(_ram)
parsed_os_ssd = bitmath.parse_string_unsafe(_os_ssd)
if parsed_ram.base != ALLOWED_BASE:
self.add_error("Your specified RAM is not in correct units")
if parsed_os_ssd.base != ALLOWED_BASE:
self.add_error("Your specified OS-SSD is not in correct units")
if _cpu < 1:
self.add_error("CPU must be atleast 1")
if parsed_ram < bitmath.GB(1):
self.add_error("RAM must be atleast 1 GB")
if parsed_os_ssd < bitmath.GB(1):
self.add_error("OS-SSD must be atleast 1 GB")
parsed_hdd = []
for hdd in _hdd:
_parsed_hdd = bitmath.parse_string_unsafe(hdd)
if _parsed_hdd.base != ALLOWED_BASE:
self.add_error("Your specified HDD is not in correct units")
break
else:
parsed_hdd.append(str(_parsed_hdd))
except ValueError:
# TODO: Find some good error message
self.add_error("Specs are not correct.")
else:
if self.get_errors():
self.specs = {
'cpu': _cpu,
'ram': str(parsed_ram),
'os-ssd': str(parsed_os_ssd),
'hdd': parsed_hdd
}
class VMStatusSchema(OTPSchema):
def __init__(self, data):
data["uuid"] = (
resolve_vm_name(
name=data.get("vm_name", None),
owner=(data.get("in_support_of", None) or data.get("name", None)),
)
or KeyError
)
self.uuid = VmUUIDField(data)
fields = [self.uuid]
super().__init__(data, fields)
def validation(self):
vm = vm_pool.get(self.uuid.value)
if not (
vm.value["owner"] == self.name.value or self.realm.value == "ungleich-admin"
):
self.add_error("Invalid User")
class VmActionSchema(OTPSchema):
def __init__(self, data):
data["uuid"] = (
resolve_vm_name(
name=data.get("vm_name", None),
owner=(data.get("in_support_of", None) or data.get("name", None)),
)
or KeyError
)
self.uuid = VmUUIDField(data)
self.action = Field("action", str, data.get("action", KeyError))
self.action.validation = self.action_validation
_fields = [self.uuid, self.action]
super().__init__(data=data, fields=_fields)
def action_validation(self):
allowed_actions = ["start", "stop", "delete"]
if self.action.value not in allowed_actions:
self.add_error(
"Invalid Action. Allowed Actions are {}".format(allowed_actions)
)
def validation(self):
vm = vm_pool.get(self.uuid.value)
if not (
vm.value["owner"] == self.name.value or self.realm.value == "ungleich-admin"
):
self.add_error("Invalid User")
if (
self.action.value == "start"
and vm.status == VMStatus.running
and vm.hostname != ""
):
self.add_error("VM Already Running")
if self.action.value == "stop":
if vm.status == VMStatus.stopped:
self.add_error("VM Already Stopped")
elif vm.status != VMStatus.running:
self.add_error("Cannot stop non-running VM")
class VmMigrationSchema(OTPSchema):
def __init__(self, data):
data["uuid"] = (
resolve_vm_name(
name=data.get("vm_name", None),
owner=(data.get("in_support_of", None) or data.get("name", None)),
)
or KeyError
)
self.uuid = VmUUIDField(data)
self.destination = Field("destination", str, data.get("destination", KeyError))
self.destination.validation = self.destination_validation
fields = [self.destination]
super().__init__(data=data, fields=fields)
def destination_validation(self):
hostname = self.destination.value
host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
if not host:
self.add_error("No Such Host ({}) exists".format(self.destination.value))
elif host.status != HostStatus.alive:
self.add_error("Destination Host is dead")
else:
self.destination.value = host.key
def validation(self):
vm = vm_pool.get(self.uuid.value)
if not (
vm.value["owner"] == self.name.value or self.realm.value == "ungleich-admin"
):
self.add_error("Invalid User")
if vm.status != VMStatus.running:
self.add_error("Can't migrate non-running VM")
if vm.hostname == os.path.join(env_vars.get('HOST_PREFIX'), self.destination.value):
self.add_error("Destination host couldn't be same as Source Host")
class AddSSHSchema(OTPSchema):
def __init__(self, data):
self.key_name = Field("key_name", str, data.get("key_name", KeyError))
self.key = Field("key", str, data.get("key_name", KeyError))
fields = [self.key_name, self.key]
super().__init__(data=data, fields=fields)
class RemoveSSHSchema(OTPSchema):
def __init__(self, data):
self.key_name = Field("key_name", str, data.get("key_name", KeyError))
fields = [self.key_name]
super().__init__(data=data, fields=fields)
class GetSSHSchema(OTPSchema):
def __init__(self, data):
self.key_name = Field("key_name", str, data.get("key_name", None))
fields = [self.key_name]
super().__init__(data=data, fields=fields)
class CreateNetwork(OTPSchema):
def __init__(self, data):
self.network_name = Field("network_name", str, data.get("network_name", KeyError))
self.type = Field("type", str, data.get("type", KeyError))
self.user = Field("user", bool, bool(data.get("user", False)))
self.network_name.validation = self.network_name_validation
self.type.validation = self.network_type_validation
fields = [self.network_name, self.type, self.user]
super().__init__(data, fields=fields)
def network_name_validation(self):
network = etcd_client.get(os.path.join(env_vars.get('NETWORK_PREFIX'),
self.name.value,
self.network_name.value),
value_in_json=True)
if network:
self.add_error("Network with name {} already exists" \
.format(self.network_name.value))
def network_type_validation(self):
supported_network_types = ["vxlan"]
if self.type.value not in supported_network_types:
self.add_error("Unsupported Network Type. Supported network types are {}".format(supported_network_types))

View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

26
ucloud/common/classes.py Normal file
View file

@ -0,0 +1,26 @@
from etcd3_wrapper import EtcdEntry
class SpecificEtcdEntryBase:
def __init__(self, e: EtcdEntry):
self.key = e.key
for k in e.value.keys():
self.__setattr__(k, e.value[k])
def original_keys(self):
r = dict(self.__dict__)
if "key" in r:
del r["key"]
return r
@property
def value(self):
return self.original_keys()
@value.setter
def value(self, v):
self.__dict__ = v
def __repr__(self):
return str(dict(self.__dict__))

21
ucloud/common/counters.py Normal file
View file

@ -0,0 +1,21 @@
from etcd3_wrapper import Etcd3Wrapper
def increment_etcd_counter(etcd_client: Etcd3Wrapper, key):
kv = etcd_client.get(key)
if kv:
counter = int(kv.value)
counter = counter + 1
else:
counter = 1
etcd_client.put(key, str(counter))
return counter
def get_etcd_counter(etcd_client: Etcd3Wrapper, key):
kv = etcd_client.get(key)
if kv:
return int(kv.value)
return None

54
ucloud/common/helpers.py Normal file
View file

@ -0,0 +1,54 @@
import logging
import socket
import requests
import json
from ipaddress import ip_address
from os.path import join as join_path
def create_package_loggers(packages, base_path, mode="a"):
loggers = {}
for pkg in packages:
logger = logging.getLogger(pkg)
logger_handler = logging.FileHandler(
join_path(base_path, "{}.txt".format(pkg)),
mode=mode
)
logger.setLevel(logging.DEBUG)
logger_handler.setFormatter(logging.Formatter(fmt="%(asctime)s: %(levelname)s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S"))
logger.addHandler(logger_handler)
loggers[pkg] = logger
# TODO: Should be removed as soon as migration
# mechanism is finalized inside ucloud
def get_ipv4_address():
# If host is connected to internet
# Return IPv4 address of machine
# Otherwise, return 127.0.0.1
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
try:
s.connect(("8.8.8.8", 80))
except socket.timeout:
address = "127.0.0.1"
except Exception as e:
logging.getLogger().exception(e)
address = "127.0.0.1"
else:
address = s.getsockname()[0]
return address
def get_ipv6_address():
try:
r = requests.get("https://api6.ipify.org?format=json")
content = json.loads(r.content.decode("utf-8"))
ip = ip_address(content["ip"]).exploded
except Exception as e:
logging.exception(e)
else:
return ip

67
ucloud/common/host.py Normal file
View file

@ -0,0 +1,67 @@
import time
from datetime import datetime
from os.path import join
from typing import List
from .classes import SpecificEtcdEntryBase
class HostStatus:
"""Possible Statuses of ucloud host."""
alive = "ALIVE"
dead = "DEAD"
class HostEntry(SpecificEtcdEntryBase):
"""Represents Host Entry Structure and its supporting methods."""
def __init__(self, e):
self.specs = None # type: dict
self.hostname = None # type: str
self.status = None # type: str
self.last_heartbeat = None # type: str
super().__init__(e)
def update_heartbeat(self):
self.status = HostStatus.alive
self.last_heartbeat = time.strftime("%Y-%m-%d %H:%M:%S")
def is_alive(self):
last_heartbeat = datetime.strptime(self.last_heartbeat, "%Y-%m-%d %H:%M:%S")
delta = datetime.now() - last_heartbeat
if delta.total_seconds() > 60:
return False
return True
def declare_dead(self):
self.status = HostStatus.dead
self.last_heartbeat = time.strftime("%Y-%m-%d %H:%M:%S")
class HostPool:
def __init__(self, etcd_client, host_prefix):
self.client = etcd_client
self.prefix = host_prefix
@property
def hosts(self) -> List[HostEntry]:
_hosts = self.client.get_prefix(self.prefix, value_in_json=True)
return [HostEntry(host) for host in _hosts]
def get(self, key):
if not key.startswith(self.prefix):
key = join(self.prefix, key)
v = self.client.get(key, value_in_json=True)
if v:
return HostEntry(v)
return None
def put(self, obj: HostEntry):
self.client.put(obj.key, obj.value, value_in_json=True)
def by_status(self, status, _hosts=None):
if _hosts is None:
_hosts = self.hosts
return list(filter(lambda x: x.status == status, _hosts))

46
ucloud/common/request.py Normal file
View file

@ -0,0 +1,46 @@
import json
from os.path import join
from uuid import uuid4
from etcd3_wrapper.etcd3_wrapper import PsuedoEtcdEntry
from .classes import SpecificEtcdEntryBase
class RequestType:
CreateVM = "CreateVM"
ScheduleVM = "ScheduleVM"
StartVM = "StartVM"
StopVM = "StopVM"
InitVMMigration = "InitVMMigration"
TransferVM = "TransferVM"
DeleteVM = "DeleteVM"
class RequestEntry(SpecificEtcdEntryBase):
def __init__(self, e):
self.type = None # type: str
self.migration = None # type: bool
self.destination = None # type: str
self.uuid = None # type: str
self.hostname = None # type: str
super().__init__(e)
@classmethod
def from_scratch(cls, request_prefix, **kwargs):
e = PsuedoEtcdEntry(join(request_prefix, uuid4().hex),
value=json.dumps(kwargs).encode("utf-8"), value_in_json=True)
return cls(e)
class RequestPool:
def __init__(self, etcd_client, request_prefix):
self.client = etcd_client
self.prefix = request_prefix
def put(self, obj: RequestEntry):
if not obj.key.startswith(self.prefix):
obj.key = join(self.prefix, obj.key)
self.client.put(obj.key, obj.value, value_in_json=True)

View file

@ -0,0 +1,158 @@
import shutil
import subprocess as sp
import os
import stat
from abc import ABC
from . import logger
from os.path import join as join_path
class ImageStorageHandler(ABC):
def __init__(self, image_base, vm_base):
self.image_base = image_base
self.vm_base = vm_base
def import_image(self, image_src, image_dest, protect=False):
"""Put an image at the destination
:param src: An Image file
:param dest: A path where :param src: is to be put.
:param protect: If protect is true then the dest is protect (readonly etc)
The obj must exist on filesystem.
"""
raise NotImplementedError()
def make_vm_image(self, image_path, path):
"""Copy image from src to dest
:param src: A path
:param dest: A path
src and destination must be on same storage system i.e both on file system or both on CEPH etc.
"""
raise NotImplementedError()
def resize_vm_image(self, path, size):
"""Resize image located at :param path:
:param path: The file which is to be resized
:param size: Size must be in Megabytes
"""
raise NotImplementedError()
def delete_vm_image(self, path):
raise NotImplementedError()
def execute_command(self, command, report=True):
command = list(map(str, command))
try:
output = sp.check_output(command, stderr=sp.PIPE)
except Exception as e:
if report:
print(e)
logger.exception(e)
return False
return True
def vm_path_string(self, path):
raise NotImplementedError()
def qemu_path_string(self, path):
raise NotImplementedError()
def is_vm_image_exists(self, path):
raise NotImplementedError()
class FileSystemBasedImageStorageHandler(ImageStorageHandler):
def import_image(self, src, dest, protect=False):
dest = join_path(self.image_base, dest)
try:
shutil.copy(src, dest)
if protect:
os.chmod(dest, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
except Exception as e:
logger.exception(e)
return False
return True
def make_vm_image(self, src, dest):
src = join_path(self.image_base, src)
dest = join_path(self.vm_base, dest)
try:
shutil.copy(src, dest)
except Exception as e:
logger.exception(e)
return False
return True
def resize_vm_image(self, path, size):
path = join_path(self.vm_base, path)
command = ["qemu-img", "resize", "-f", "raw", path, "{}M".format(size)]
if self.execute_command(command):
return True
else:
self.delete_vm_image(path)
return False
def delete_vm_image(self, path):
path = join_path(self.vm_base, path)
try:
os.remove(path)
except Exception as e:
logger.exception(e)
return False
return True
def vm_path_string(self, path):
return join_path(self.vm_base, path)
def qemu_path_string(self, path):
return self.vm_path_string(path)
def is_vm_image_exists(self, path):
path = join_path(self.vm_base, path)
command = ["ls", path]
return self.execute_command(command, report=False)
class CEPHBasedImageStorageHandler(ImageStorageHandler):
def import_image(self, src, dest, protect=False):
dest = join_path(self.image_base, dest)
command = ["rbd", "import", src, dest]
if protect:
snap_create_command = ["rbd", "snap", "create", "{}@protected".format(dest)]
snap_protect_command = ["rbd", "snap", "protect", "{}@protected".format(dest)]
return self.execute_command(command) and self.execute_command(snap_create_command) and\
self.execute_command(snap_protect_command)
return self.execute_command(command)
def make_vm_image(self, src, dest):
src = join_path(self.image_base, src)
dest = join_path(self.vm_base, dest)
command = ["rbd", "clone", "{}@protected".format(src), dest]
return self.execute_command(command)
def resize_vm_image(self, path, size):
path = join_path(self.vm_base, path)
command = ["rbd", "resize", path, "--size", size]
return self.execute_command(command)
def delete_vm_image(self, path):
path = join_path(self.vm_base, path)
command = ["rbd", "rm", path]
return self.execute_command(command)
def vm_path_string(self, path):
return join_path(self.vm_base, path)
def qemu_path_string(self, path):
return "rbd:{}".format(self.vm_path_string(path))
def is_vm_image_exists(self, path):
path = join_path(self.vm_base, path)
command = ["rbd", "info", path]
return self.execute_command(command, report=False)

90
ucloud/common/vm.py Normal file
View file

@ -0,0 +1,90 @@
from contextlib import contextmanager
from datetime import datetime
from os.path import join
from .classes import SpecificEtcdEntryBase
class VMStatus:
stopped = "STOPPED" # After requested_shutdown
killed = "KILLED" # either host died or vm died itself
running = "RUNNING"
error = "ERROR" # An error occurred that cannot be resolved automatically
class VMEntry(SpecificEtcdEntryBase):
def __init__(self, e):
self.owner = None # type: str
self.specs = None # type: dict
self.hostname = None # type: str
self.status = None # type: str
self.image_uuid = None # type: str
self.log = None # type: list
self.in_migration = None # type: bool
super().__init__(e)
@property
def uuid(self):
return self.key.split("/")[-1]
def declare_killed(self):
self.hostname = ""
self.in_migration = False
if self.status == VMStatus.running:
self.status = VMStatus.killed
def declare_stopped(self):
self.hostname = ""
self.in_migration = False
self.status = VMStatus.stopped
def add_log(self, msg):
self.log = self.log[:5]
self.log.append("{} - {}".format(datetime.now().isoformat(), msg))
class VmPool:
def __init__(self, etcd_client, vm_prefix):
self.client = etcd_client
self.prefix = vm_prefix
@property
def vms(self):
_vms = self.client.get_prefix(self.prefix, value_in_json=True)
return [VMEntry(vm) for vm in _vms]
def by_host(self, host, _vms=None):
if _vms is None:
_vms = self.vms
return list(filter(lambda x: x.hostname == host, _vms))
def by_status(self, status, _vms=None):
if _vms is None:
_vms = self.vms
return list(filter(lambda x: x.status == status, _vms))
def except_status(self, status, _vms=None):
if _vms is None:
_vms = self.vms
return list(filter(lambda x: x.status != status, _vms))
def get(self, key):
if not key.startswith(self.prefix):
key = join(self.prefix, key)
v = self.client.get(key, value_in_json=True)
if v:
return VMEntry(v)
return None
def put(self, obj: VMEntry):
self.client.put(obj.key, obj.value, value_in_json=True)
@contextmanager
def get_put(self, key) -> VMEntry:
# Updates object at key on exit
obj = self.get(key)
yield obj
if obj:
self.put(obj)

36
ucloud/config.py Normal file
View file

@ -0,0 +1,36 @@
from etcd3_wrapper import Etcd3Wrapper
from ucloud.common.host import HostPool
from ucloud.common.request import RequestPool
from ucloud.common.vm import VmPool
from ucloud.common.storage_handlers import FileSystemBasedImageStorageHandler, CEPHBasedImageStorageHandler
from decouple import Config, RepositoryEnv
env_vars = Config(RepositoryEnv('/etc/ucloud/ucloud.conf'))
etcd_wrapper_args = ()
etcd_wrapper_kwargs = {
'host': env_vars.get('ETCD_URL', 'localhost'),
'port': env_vars.get('ETCD_PORT', 2379),
'ca_cert': env_vars.get('CA_CERT', None),
'cert_cert': env_vars.get('CERT_CERT', None),
'cert_key': env_vars.get('CERT_KEY', None)
}
etcd_client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs)
host_pool = HostPool(etcd_client, env_vars.get('HOST_PREFIX'))
vm_pool = VmPool(etcd_client, env_vars.get('VM_PREFIX'))
request_pool = RequestPool(etcd_client, env_vars.get('REQUEST_PREFIX'))
running_vms = []
__storage_backend = env_vars.get("STORAGE_BACKEND")
if __storage_backend == "filesystem":
image_storage_handler = FileSystemBasedImageStorageHandler(vm_base=env_vars.get("VM_DIR"),
image_base=env_vars.get("IMAGE_DIR"))
elif __storage_backend == "ceph":
image_storage_handler = CEPHBasedImageStorageHandler(vm_base="ssd", image_base="ssd")
else:
raise Exception("Unknown Image Storage Handler")

22
ucloud/docs/Makefile Normal file
View file

@ -0,0 +1,22 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source/
BUILDDIR = build/
DESTINATION=root@staticweb.ungleich.ch:/home/services/www/ungleichstatic/staticcms.ungleich.ch/www/ucloud/
.PHONY: all build clean
publish: build permissions
rsync -av $(BUILDDIR) $(DESTINATION)
permissions: build
find $(BUILDDIR) -type f -exec chmod 0644 {} \;
find $(BUILDDIR) -type d -exec chmod 0755 {} \;
build:
$(SPHINXBUILD) "$(SOURCEDIR)" "$(BUILDDIR)"

0
ucloud/docs/__init__.py Normal file
View file

View file

View file

@ -0,0 +1,53 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'ucloud'
copyright = '2019, ungleich'
author = 'ungleich'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx_rtd_theme',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

View file

@ -0,0 +1,44 @@
graph LR
style ucloud fill:#FFD2FC
style cron fill:#FFF696
style infrastructure fill:#BDF0FF
subgraph ucloud[ucloud]
ucloud-cli[CLI]-->ucloud-api[API]
ucloud-api-->ucloud-scheduler[Scheduler]
ucloud-api-->ucloud-imagescanner[Image Scanner]
ucloud-api-->ucloud-host[Host]
ucloud-scheduler-->ucloud-host
ucloud-host-->need-networking{VM need Networking}
need-networking-->|Yes| networking-scripts
need-networking-->|No| VM[Virtual Machine]
need-networking-->|SLAAC?| radvd
networking-scripts-->VM
networking-scripts--Create Networks Devices-->networking-scripts
subgraph cron[Cron Jobs]
ucloud-imagescanner
ucloud-filescanner[File Scanner]
ucloud-filescanner--Track User files-->ucloud-filescanner
end
subgraph infrastructure[Infrastructure]
radvd
etcd
networking-scripts[Networking Scripts]
ucloud-imagescanner-->image-store
image-store{Image Store}
image-store-->|CEPH| ceph
image-store-->|FILE| file-system
ceph[CEPH]
file-system[File System]
end
subgraph virtual-machine[Virtual Machine]
VM
VM-->ucloud-init
end
subgraph metadata-group[Metadata Server]
metadata-->ucloud-init
ucloud-init<-->metadata
end
end

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 37 KiB

View file

@ -0,0 +1,27 @@
.. ucloud documentation master file, created by
sphinx-quickstart on Mon Nov 11 19:08:16 2019.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to ucloud's documentation!
==================================
.. toctree::
:maxdepth: 2
:caption: Contents:
introduction/introduction
introduction/installation
usage/usage-for-admins
usage/usage-for-users
usage/how-to-create-an-os-image-for-ucloud
theory/summary
misc/todo
troubleshooting/installation-troubleshooting
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View file

@ -0,0 +1,296 @@
Installation
============
This guides includes two type of installation
* File System as Image Storage + Level 2 Network without IPAM and Routing
* CEPH as Image Storage + Level 2 Network with automatic IPAM and Routing
(using Router Advertisement + Netbox)
The guide will explicitly mention a section/subsection if it is exclusive to any
one of the above mentioned scenario.
.. note::
The instructions assumes the following things
* User is **root**.
* Base Directory is :file:`/root/`.
Alpine
------
.. note::
Python Wheel (Binary) Packages does not support Alpine Linux as it is
using musl libc instead of glibc. Therefore, expect longer installation
times than other linux distributions.
Enable Edge Repos, Update and Upgrade
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. warning::
The below commands would overwrite your repositories sources and
upgrade all packages and their dependencies to match those available
in edge repos. **So, be warned**
.. code-block:: sh
:linenos:
cat > /etc/apk/repositories << EOF
http://dl-cdn.alpinelinux.org/alpine/edge/main
http://dl-cdn.alpinelinux.org/alpine/edge/community
http://dl-cdn.alpinelinux.org/alpine/edge/testing
EOF
apk update
apk upgrade
reboot
Install Dependencies
~~~~~~~~~~~~~~~~~~~~
.. note::
The installation and configuration of a production grade etcd cluster
is out of scope of this manual. So, we will install etcd with default
configuration.
.. code-block:: sh
:linenos:
apk add git python3 alpine-sdk python3-dev etcd etcd-ctl openntpd \
libffi-dev openssl-dev make py3-protobuf py3-tempita chrony
pip3 install pipenv
**Install QEMU (For Filesystem based Installation)**
.. code-block:: sh
apk add qemu qemu-system-x86_64 qemu-img
**Install QEMU/CEPH/radvd (For CEPH based Installation)**
.. code-block:: sh
$(git clone https://code.ungleich.ch/ahmedbilal/qemu-with-rbd-alpine.git && cd qemu-with-rbd-alpine && apk add apks/*.apk --allow-untrusted)
apk add ceph radvd
Syncronize Date/Time
~~~~~~~~~~~~~~~~~~~~
.. code-block:: sh
:linenos:
service chronyd start
rc-update add chronyd
Start etcd and enable it
~~~~~~~~~~~~~~~~~~~~~~~~
.. note::
The following :command:`curl` statement shouldn't be run once
etcd is fixed in alpine repos.
.. code-block:: sh
:linenos:
curl https://raw.githubusercontent.com/etcd-io/etcd/release-3.4/etcd.conf.yml.sample -o /etc/etcd/conf.yml
service etcd start
rc-update add etcd
Install uotp
~~~~~~~~~~~~
.. code-block:: sh
:linenos:
git clone https://code.ungleich.ch/ungleich-public/uotp.git
cd uotp
mv .env.sample .env
pipenv --three --site-packages
pipenv install
pipenv run python app.py
Run :code:`$(cd scripts && pipenv run python get-admin.py)` to get
admin seed. A sample output
.. code-block:: json
{
"seed": "FYTVQ72A2CJJ4TB4",
"realm": ["ungleich-admin"]
}
Now, run :code:`pipenv run python scripts/create-auth.py FYTVQ72A2CJJ4TB4`
(Replace **FYTVQ72A2CJJ4TB4** with your admin seed obtained in previous step).
A sample output is as below. It shows seed of auth.
.. code-block:: json
{
"message": "Account Created",
"name": "auth",
"realm": ["ungleich-auth"],
"seed": "XZLTUMX26TRAZOXC"
}
.. note::
Please note both **admin** and **auth** seeds as we would need them in setting up ucloud.
Install and configure ucloud
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: sh
:linenos:
git clone https://code.ungleich.ch/ucloud/ucloud.git
cd ucloud
pipenv --three --site-packages
pipenv install
**Filesystem based Installation**
You just need to update **AUTH_SEED** in the below code to match your auth's seed.
.. code-block:: sh
:linenos:
mkdir /etc/ucloud
cat > /etc/ucloud/ucloud.conf << EOF
AUTH_NAME=auth
AUTH_SEED=XZLTUMX26TRAZOXC
AUTH_REALM=ungleich-auth
REALM_ALLOWED = ["ungleich-admin", "ungleich-user"]
OTP_SERVER="http://127.0.0.1:8000/"
ETCD_URL=localhost
STORAGE_BACKEND=filesystem
BASE_DIR=/var/www
IMAGE_DIR=/var/image
VM_DIR=/var/vm
VM_PREFIX=/v1/vm/
HOST_PREFIX=/v1/host/
REQUEST_PREFIX=/v1/request/
FILE_PREFIX=/v1/file/
IMAGE_PREFIX=/v1/image/
IMAGE_STORE_PREFIX=/v1/image_store/
USER_PREFIX=/v1/user/
NETWORK_PREFIX=/v1/network/
ssh_username=meow
ssh_pkey="~/.ssh/id_rsa"
VXLAN_PHY_DEV="eth0"
EOF
**CEPH based Installation**
You need to update the following
* **AUTH_SEED**
* **NETBOX_URL**
* **NETBOX_TOKEN**
* **PREFIX**
* **PREFIX_LENGTH**
.. code-block:: sh
:linenos:
mkdir /etc/ucloud
cat > /etc/ucloud/ucloud.conf << EOF
AUTH_NAME=auth
AUTH_SEED=XZLTUMX26TRAZOXC
AUTH_REALM=ungleich-auth
REALM_ALLOWED = ["ungleich-admin", "ungleich-user"]
OTP_SERVER="http://127.0.0.1:8000/"
ETCD_URL=localhost
STORAGE_BACKEND=ceph
BASE_DIR=/var/www
IMAGE_DIR=/var/image
VM_DIR=/var/vm
VM_PREFIX=/v1/vm/
HOST_PREFIX=/v1/host/
REQUEST_PREFIX=/v1/request/
FILE_PREFIX=/v1/file/
IMAGE_PREFIX=/v1/image/
IMAGE_STORE_PREFIX=/v1/image_store/
USER_PREFIX=/v1/user/
NETWORK_PREFIX=/v1/network/
ssh_username=meow
ssh_pkey="~/.ssh/id_rsa"
VXLAN_PHY_DEV="eth0"
NETBOX_URL="<url-for-your-netbox-installation>"
NETBOX_TOKEN="netbox-token"
PREFIX="your-prefix"
PREFIX_LENGTH="64"
EOF
Install and configure ucloud-cli
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: sh
:linenos:
git clone https://code.ungleich.ch/ucloud/ucloud-cli.git
cd ucloud-cli
pipenv --three --site-packages
pipenv install
cat > ~/.ucloud.conf << EOF
UCLOUD_API_SERVER=http://localhost:5000
EOF
mkdir /var/www/
**Only for Filesystem Based Installation**
.. code-block:: sh
mkdir /var/image/
mkdir /var/vm/
Environment Variables and aliases
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To ease usage of ucloud and its various components put the following in
your shell profile e.g *~/.profile*
.. code-block:: sh
export OTP_NAME=admin
export OTP_REALM=ungleich-admin
export OTP_SEED=FYTVQ72A2CJJ4TB4
alias ucloud='cd /root/ucloud/ && pipenv run python ucloud.py'
alias ucloud-cli='cd /root/ucloud-cli/ && pipenv run python ucloud-cli.py'
alias uotp='cd /root/uotp/ && pipenv run python app.py'
and run :code:`source ~/.profile`

View file

@ -0,0 +1,34 @@
What is ucloud?
===============
**Open** + **Simple** + **Easy to hack** + **IPv6 First**
ucloud is an easy to use cloud management system.
It is an alternative to OpenStack, OpenNebula or Cloudstack.
ucloud is the first cloud management system that puts IPv6 first. ucloud also has an integral ordering process that we missed in existing solutions.
Tech Stack
----------
* Python 3 as main language.
* Flask for APIs.
* JSON for specifications.
* QEMU (+ kvm acceleration) as hypervisor.
* etcd for key/value storage (specifically all metadata e.g Virtual Machine Specifications, Networks Specifications, Images Specifications etc.).
* CEPH for image storage.
* uotp for user authentication.
* netbox for IPAM.
* radvd for Router Advertisement.
Components
----------
* API
* Scheduler
* Host
* File Scanner
* Image Scanner
* Metadata Server
* VM Init Scripts (dubbed as ucloud-init)

View file

@ -0,0 +1,32 @@
TODO
====
Security
--------
* **Check Authentication:** Nico reported that some endpoints
even work without providing token. (e.g ListUserVM)
Refactoring/Feature
-------------------
* Put overrides for **IMAGE_BASE**, **VM_BASE** in **ImageStorageHandler**.
* Expose more details in ListUserFiles.
* Throw KeyError instead of returning None when some key is not found in etcd.
* Create Network Manager
* That would handle tasks like up/down an interface
* Create VXLANs, Bridges, TAPs.
* Remove them when they are no longer used.
Reliability
-----------
* What to do if some command hangs forever? e.g CEPH commands
:code:`rbd ls ssd` etc. hangs forever if CEPH isn't running
or not responding.
* What to do if etcd goes down?
Misc.
-----
* Put "Always use only one StorageHandler"

View file

@ -0,0 +1,98 @@
Summary
=======
.. image:: /images/ucloud.svg
.. code-block::
<cli>
|
|
|
+-------------------------<api>
| |
| |```````````````|```````````````|
| | | |
| <file_scanner> <scheduler> <image_scanner>
| |
| |
+-------------------------<host>
|
|
|
Virtual Machine------<init>------<metadata>
**ucloud-cli** interact with **ucloud-api** to do the following operations:
- Create/Delete/Start/Stop/Migrate/Probe (Status of) Virtual Machines
- Create/Delete Networks
- Add/Get/Delete SSH Keys
- Create OS Image out of a file (tracked by file_scanner)
- List User's files/networks/vms
- Add Host
ucloud can currently stores OS-Images on
* File System
* `CEPH <https://ceph.io/>`_
**ucloud-api** in turns creates appropriate Requests which are taken
by suitable components of ucloud. For Example, if user uses ucloud-cli
to create a VM, **ucloud-api** would create a **ScheduleVMRequest** containing
things like pointer to VM's entry which have specs, networking
configuration of VMs.
**ucloud-scheduler** accepts requests for VM's scheduling and
migration. It finds a host from a list of available host on which
the incoming VM can run and schedules it on that host.
**ucloud-host** runs on host servers i.e servers that
actually runs virtual machines, accepts requests
intended only for them. It creates/delete/start/stop/migrate
virtual machines. It also arrange network resources needed for the
incoming VM.
**ucloud-filescanner** keep tracks of user's files which would be needed
later for creating OS Images.
**ucloud-imagescanner** converts images files from qcow2 format to raw
format which would then be imported into image store.
* In case of **File System**, the converted image would be copied to
:file:`/var/image/` or the path referred by :envvar:`IMAGE_PATH`
environement variable mentioned in :file:`/etc/ucloud/ucloud.conf`.
* In case of **CEPH**, the converted image would be imported into
specific pool (it depends on the image store in which the image
belongs) of CEPH Block Storage.
**ucloud-metadata** provides metadata which is used to contextualize
VMs. When, the VM is created, it is just clone (duplicate) of OS
image from which it is created. So, to differentiate between my
VM and your VM, the VM need to be contextualized. This works
like the following
.. note::
Actually, ucloud-init makes the GET request. You can also try it
yourself using curl but ucloud-init does that for yourself.
* VM make a GET requests http://metadata which resolves to actual
address of metadata server. The metadata server looks at the IPv6
Address of the requester and extracts the MAC Address which is possible
because the IPv6 address is
`IPv6 EUI-64 <https://community.cisco.com/t5/networking-documents/understanding-ipv6-eui-64-bit-address/ta-p/3116953>`_.
Metadata use this MAC address to find the actual VM to which it belongs
and its owner, ssh-keys and much more. Then, metadata return these
details back to the calling VM in JSON format. These details are
then used be the **ucloud-init** which is explained next.
**ucloud-init** gets the metadata from **ucloud-metadata** to contextualize
the VM. Specifically, it gets owner's ssh keys (or any other keys the
owner of VM added to authorized keys for this VM) and put them to ssh
server's (installed on VM) authorized keys so that owner can access
the VM using ssh. It also install softwares that are needed for correct
behavior of VM e.g rdnssd (needed for `SLAAC <https://en.wikipedia.org/wiki/IPv6#Stateless_address_autoconfiguration_(SLAAC)>`_).

View file

@ -0,0 +1,24 @@
Installation Troubleshooting
============================
etcd doesn't start
------------------
.. code-block:: sh
[root@archlinux ~]# systemctl start etcd
Job for etcd.service failed because the control process exited with error code.
See "systemctl status etcd.service" and "journalctl -xe" for details
possible solution
~~~~~~~~~~~~~~~~~
Try :code:`cat /etc/hosts` if its output contain the following
.. code-block:: sh
127.0.0.1 localhost.localdomain localhost
::1 localhost localhost.localdomain
then unfortunately, we can't help you. But, if it doesn't contain the
above you can put the above in :file:`/etc/hosts` to fix the issue.

View file

@ -0,0 +1,53 @@
How to create VM images for ucloud
==================================
Overview
---------
ucloud tries to be least invasise towards VMs and only require
strictly necessary changes for running in a virtualised
environment. This includes configurations for:
* Configuring the network
* Managing access via ssh keys
* Resizing the attached disk(s)
Network configuration
---------------------
All VMs in ucloud are required to support IPv6. The primary network
configuration is always done using SLAAC. A VM thus needs only to be
configured to
* accept router advertisements on all network interfaces
* use the router advertisements to configure the network interfaces
* accept the DNS entries from the router advertisements
Configuring SSH keys
--------------------
To be able to access the VM, ucloud support provisioning SSH keys.
To accept ssh keys in your VM, request the URL
*http://metadata/ssh_keys*. Add the content to the appropriate user's
**authorized_keys** file. Below you find sample code to accomplish
this task:
.. code-block:: sh
tmp=$(mktemp)
curl -s http://metadata/ssk_keys > "$tmp"
touch ~/.ssh/authorized_keys # ensure it exists
cat ~/.ssh/authorized_keys >> "$tmp"
sort "$tmp" | uniq > ~/.ssh/authorized_keys
Disk resize
-----------
In virtualised environments, the disk sizes might grow. The operating
system should detect disks that are bigger than the existing partition
table and resize accordingly. This task is os specific.
ucloud does not support shrinking disks due to the complexity and
intra OS dependencies.

View file

@ -0,0 +1,155 @@
Usage Guide For Administrators
==============================
Start API
----------
.. code-block:: sh
ucloud api
Host Creation
-------------
Currently, we don't have any host (that runs virtual machines).
So, we need to create it by executing the following command
.. code-block:: sh
ucloud-cli host create --hostname ungleich.ch --cpu 32 --ram '32GB' --os-ssd '32GB'
You should see something like the following
.. code-block:: json
{
"message": "Host Created"
}
Start Scheduler
---------------
Scheduler is responsible for scheduling VMs on appropriate host.
.. code-block:: sh
ucloud scheduler
Start Host
----------
Host is responsible for handling the following actions
* Start VM.
* Stop VM.
* Create VM.
* Delete VM.
* Migrate VM.
* Manage Network Resources needed by VMs.
It uses a hypervisor such as QEMU to perform these actions.
To start host we created earlier, execute the following command
.. code-block:: sh
ucloud host ungleich.ch
Create OS Image
---------------
Create ucloud-init ready OS image (Optional)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This step is optional if you just want to test ucloud. However, sooner or later
you want to create OS images with ucloud-init to properly
contexualize VMs.
1. Start a VM with OS image on which you want to install ucloud-init
2. Execute the following command on the started VM
.. code-block:: sh
apk add git
git clone https://code.ungleich.ch/ucloud/ucloud-init.git
cd ucloud-init
sh ./install.sh
3. Congratulations. Your image is now ucloud-init ready.
Upload Sample OS Image
~~~~~~~~~~~~~~~~~~~~~~
Execute the following to get the sample OS image file.
.. code-block:: sh
mkdir /var/www/admin
(cd /var/www/admin && wget https://cloud.ungleich.ch/s/qTb5dFYW5ii8KsD/download)
Run File Scanner and Image Scanner
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Currently, our uploaded file *alpine-untouched.qcow2* is not tracked by ucloud. We can only make
images from tracked files. So, we need to track the file by running File Scanner
.. code-block:: sh
ucloud filescanner
File Scanner would run, scan your uploaded image and track it. You can check whether your image
is successfully tracked by executing the :code:`ucloud-cli user files`, It will return something like the following
.. _list-user-files:
.. code-block:: json
{
"message": [
{
"filename": "alpine-untouched.qcow2",
"uuid": "3f75bd20-45d6-4013-89c4-7fceaedc8dda"
}
]
}
Our file is now being tracked by ucloud. Lets create an OS image using the uploaded file.
An image belongs to an image store. There are two types of store
* Public Image Store
* Private Image Store (Not Implemented Yet)
.. note::
**Quick Quiz** Have we created an image store yet?
The answer is **No, we haven't**. Creating a sample image store is very easy.
Just execute the following command
.. code-block:: sh
(cd ~/ucloud && pipenv run python api/create_image_store.py)
An image store (with name = "images") would be created. Now, we are fully ready for creating our
very own image. Executing the following command to create image using the file uploaded earlier
.. code-block:: sh
ucloud-cli image create-from-file --name alpine --uuid 3f75bd20-45d6-4013-89c4-7fceaedc8dda --image-store-name images
Please note that your **uuid** would be different. See :ref:`List of user files <list-user-files>`.
Now, ucloud have received our request to create an image from file. We have to run Image Scanner to make the image.
.. code-block:: sh
ucloud imagescanner
To make sure, that our image is create run :code:`ucloud-cli image list --public`. You would get
output something like the following
.. code-block:: json
{
"images": [
{
"name": "images:alpine",
"status": "CREATED"
}
]
}

View file

@ -0,0 +1,119 @@
Usage Guide For End Users
=========================
Create VM
---------
The following command would create a Virtual Machine (name: meow)
with following specs
* CPU: 1
* RAM: 1GB
* OS-SSD: 4GB
* OS: Alpine Linux
.. code-block:: sh
ucloud-cli vm create --vm-name meow --cpu 1 --ram '1gb' --os-ssd '4gb' --image images:alpine
.. _how-to-check-vm-status:
Check VM Status
---------------
.. code-block:: sh
ucloud-cli vm status --vm-name meow
.. code-block:: json
{
"hostname": "/v1/host/74c21c332f664972bf5078e8de080eea",
"image_uuid": "3f75bd20-45d6-4013-89c4-7fceaedc8dda",
"in_migration": null,
"log": [
"2019-11-12T09:11:09.800798 - Started successfully"
],
"metadata": {
"ssh-keys": []
},
"name": "meow",
"network": [],
"owner": "admin",
"owner_realm": "ungleich-admin",
"specs": {
"cpu": 1,
"hdd": [],
"os-ssd": "4.0 GB",
"ram": "1.0 GB"
},
"status": "RUNNING",
"vnc_socket": "/tmp/tmpj1k6sdo_"
}
Connect to VM using VNC
-----------------------
We would need **socat** utility and a remote desktop client
e.g Remmina, KRDC etc. We can get the vnc socket path by getting
its status, see :ref:`how-to-check-vm-status`.
.. code-block:: sh
socat TCP-LISTEN:1234,reuseaddr,fork UNIX-CLIENT:/tmp/tmpj1k6sdo_
Then, launch your remote desktop client and connect to vnc://localhost:1234.
Create Network
--------------
Layer 2 Network with sample IPv6 range fd00::/64 (without IPAM and routing)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: sh
ucloud-cli network create --network-name mynet --network-type vxlan
Layer 2 Network with /64 network with automatic IPAM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: sh
ucloud-cli network create --network-name mynet --network-type vxlan --user True
Attach Network to VM
--------------------
Currently, user can only attach network to his/her VM at
the time of creation. A sample command to create VM with
a network is as follow
.. code-block:: sh
ucloud-cli vm create --vm-name meow2 --cpu 1 --ram '1gb' --os-ssd '4gb' --image images:alpine --network mynet
.. _get-list-of-hosts:
Get List of Hosts
-----------------
.. code-block:: sh
ucloud-cli host list
Migrate VM
----------
.. code-block:: sh
ucloud-cli vm migrate --vm-name meow --destination server1.place10
.. option:: --destination
The name of destination host. You can find a list of host
using :ref:`get-list-of-hosts`

View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

126
ucloud/filescanner/main.py Executable file
View file

@ -0,0 +1,126 @@
import glob
import os
import pathlib
import subprocess as sp
import time
from uuid import uuid4
from ucloud.filescanner import logger
from ucloud.config import env_vars, etcd_client
def getxattr(file, attr):
"""Get specified user extended attribute (arg:attr) of a file (arg:file)"""
try:
attr = "user." + attr
value = sp.check_output(['getfattr', file,
'--name', attr,
'--only-values',
'--absolute-names'], stderr=sp.DEVNULL)
value = value.decode("utf-8")
except sp.CalledProcessError as e:
logger.exception(e)
value = None
return value
def setxattr(file, attr, value):
"""Set specified user extended attribute (arg:attr) equal to (arg:value)
of a file (arg:file)"""
attr = "user." + attr
sp.check_output(['setfattr', file,
'--name', attr,
'--value', str(value)])
def sha512sum(file: str):
"""Use sha512sum utility to compute sha512 sum of arg:file
IF arg:file does not exists:
raise FileNotFoundError exception
ELSE IF sum successfully computer:
return computed sha512 sum
ELSE:
return None
"""
if not isinstance(file, str): raise TypeError
try:
output = sp.check_output(["sha512sum", file], stderr=sp.PIPE)
except sp.CalledProcessError as e:
error = e.stderr.decode("utf-8")
if "No such file or directory" in error:
raise FileNotFoundError from None
else:
output = output.decode("utf-8").strip()
output = output.split(" ")
return output[0]
return None
try:
sp.check_output(['which', 'getfattr'])
sp.check_output(['which', 'setfattr'])
except Exception as e:
logger.exception(e)
print('Make sure you have getfattr and setfattr available')
exit(1)
def main():
BASE_DIR = env_vars.get("BASE_DIR")
FILE_PREFIX = env_vars.get("FILE_PREFIX")
# Recursively Get All Files and Folder below BASE_DIR
files = glob.glob("{}/**".format(BASE_DIR), recursive=True)
# Retain only Files
files = list(filter(os.path.isfile, files))
untracked_files = list(
filter(lambda f: not bool(getxattr(f, "user.utracked")), files)
)
tracked_files = list(
filter(lambda f: f not in untracked_files, files)
)
for file in untracked_files:
file_id = uuid4()
# Get Username
owner = pathlib.Path(file).parts[3]
# Get Creation Date of File
# Here, we are assuming that ctime is creation time
# which is mostly not true.
creation_date = time.ctime(os.stat(file).st_ctime)
# Get File Size
size = os.path.getsize(file)
# Compute sha512 sum
sha_sum = sha512sum(file)
# File Path excluding base and username
file_path = pathlib.Path(file).parts[4:]
file_path = os.path.join(*file_path)
# Create Entry
entry_key = os.path.join(FILE_PREFIX, str(file_id))
entry_value = {
"filename": file_path,
"owner": owner,
"sha512sum": sha_sum,
"creation_date": creation_date,
"size": size
}
print("Tracking {}".format(file))
# Insert Entry
etcd_client.put(entry_key, entry_value, value_in_json=True)
setxattr(file, "user.utracked", True)
if __name__ == "__main__":
main()

7
ucloud/hack/README.org Normal file
View file

@ -0,0 +1,7 @@
This directory contains unfinishe hacks / inspirations
* firewalling / networking in ucloud
** automatically route a network per VM - /64?
** nft: one chain per VM on each vm host (?)
*** might have scaling issues?
** firewall rules on each VM host
- mac filtering:

View file

@ -0,0 +1 @@
HOSTNAME=server1.place10

77
ucloud/hack/nftables.conf Normal file
View file

@ -0,0 +1,77 @@
flush ruleset
table bridge filter {
chain prerouting {
type filter hook prerouting priority 0;
policy accept;
ibrname br100 jump netpublic
}
chain netpublic {
icmpv6 type {nd-router-solicit, nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert, nd-redirect } log
}
}
table ip6 filter {
chain forward {
type filter hook forward priority 0;
# this would be nice...
policy drop;
ct state established,related accept;
}
chain prerouting {
type filter hook prerouting priority 0;
policy accept;
# not supporting in here!
iifname vmXXXX jump vmXXXX
iifname vmYYYY jump vmYYYY
iifname brXX jump brXX
iifname vxlan100 jump vxlan100
iifname br100 jump br100
}
# 1. Rules per VM (names: vmXXXXX?
# 2. Rules per network (names: vxlanXXXX, what about non vxlan?)
# 3. Rules per bridge:
# vxlanXX is inside brXX
# This is effectively a network filter
# 4. Kill all malicous traffic:
# - router advertisements from VMs in which they should not announce RAs
chain vxlan100 {
icmpv6 type {nd-router-solicit, nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert, nd-redirect } log
}
chain br100 {
icmpv6 type {nd-router-solicit, nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert, nd-redirect } log
}
chain netpublic {
# drop router advertisements that don't come from us
iifname != vxlanpublic icmpv6 type {nd-router-solicit, nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert, nd-redirect } drop
# icmpv6 type {nd-router-solicit, nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert, nd-redirect } drop
}
# This vlan
chain brXX {
ip6 saddr != 2001:db8:1::/64 drop;
}
chain vmXXXX {
ether saddr != 00:0f:54:0c:11:04 drop;
}
chain vmYYYY {
ether saddr != 00:0f:54:0c:11:05 drop;
}
}

View file

@ -0,0 +1,8 @@
#!/sbin/openrc-run
name="$RC_SVCNAME"
pidfile="/var/run/${name}.pid"
command="$(which pipenv)"
command_args="run python ucloud.py api"
command_background="true"
directory="/root/ucloud"

View file

@ -0,0 +1,8 @@
#!/sbin/openrc-run
name="$RC_SVCNAME"
pidfile="/var/run/${name}.pid"
command="$(which pipenv)"
command_args="run python ucloud.py host ${HOSTNAME}"
command_background="true"
directory="/root/ucloud"

View file

@ -0,0 +1,8 @@
#!/sbin/openrc-run
name="$RC_SVCNAME"
pidfile="/var/run/${name}.pid"
command="$(which pipenv)"
command_args="run python ucloud.py metadata"
command_background="true"
directory="/root/ucloud"

View file

@ -0,0 +1,8 @@
#!/sbin/openrc-run
name="$RC_SVCNAME"
pidfile="/var/run/${name}.pid"
command="$(which pipenv)"
command_args="run python ucloud.py scheduler"
command_background="true"
directory="/root/ucloud"

3
ucloud/host/__init__.py Normal file
View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

13
ucloud/host/helper.py Normal file
View file

@ -0,0 +1,13 @@
import socket
from contextlib import closing
def find_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
try:
s.bind(('', 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
except Exception:
return None
else:
return s.getsockname()[1]

143
ucloud/host/main.py Executable file
View file

@ -0,0 +1,143 @@
import argparse
import multiprocessing as mp
import time
from etcd3_wrapper import Etcd3Wrapper
from ucloud.common.request import RequestEntry, RequestType
from ucloud.config import (vm_pool, request_pool,
etcd_client, running_vms,
etcd_wrapper_args, etcd_wrapper_kwargs,
HostPool, env_vars)
from .helper import find_free_port
from . import virtualmachine
from host import logger
def update_heartbeat(hostname):
"""Update Last HeartBeat Time for :param hostname: in etcd"""
client = Etcd3Wrapper(*etcd_wrapper_args, **etcd_wrapper_kwargs)
host_pool = HostPool(client, env_vars.get('HOST_PREFIX'))
this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
while True:
this_host.update_heartbeat()
host_pool.put(this_host)
time.sleep(10)
def maintenance(host):
# To capture vm running according to running_vms list
# This is to capture successful migration of a VM.
# Suppose, this host is running "vm1" and user initiated
# request to migrate this "vm1" to some other host. On,
# successful migration the destination host would set
# the vm hostname to itself. Thus, we are checking
# whether this host vm is successfully migrated. If yes
# then we shutdown "vm1" on this host.
to_be_removed = []
for running_vm in running_vms:
with vm_pool.get_put(running_vm.key) as vm_entry:
if vm_entry.hostname != host.key and not vm_entry.in_migration:
running_vm.handle.shutdown()
logger.info("VM migration not completed successfully.")
to_be_removed.append(running_vm)
for r in to_be_removed:
running_vms.remove(r)
# To check vm running according to etcd entries
alleged_running_vms = vm_pool.by_status("RUNNING", vm_pool.by_host(host.key))
for vm_entry in alleged_running_vms:
_vm = virtualmachine.get_vm(running_vms, vm_entry.key)
# Whether, the allegedly running vm is in our
# running_vms list or not if it is said to be
# running on this host but it is not then we
# need to shut it down
# This is to capture poweroff/shutdown of a VM
# initiated by user inside VM. OR crash of VM by some
# user running process
if (_vm and not _vm.handle.is_running()) or not _vm:
logger.debug("_vm = %s, is_running() = %s" % (_vm, _vm.handle.is_running()))
vm_entry.add_log("""{} is not running but is said to be running.
So, shutting it down and declare it killed""".format(vm_entry.key))
vm_entry.declare_killed()
vm_pool.put(vm_entry)
if _vm:
running_vms.remove(_vm)
def main(hostname):
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
host_pool = HostPool(etcd_client, env_vars.get('HOST_PREFIX'))
host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
assert host is not None, "No such host with name = {}".format(hostname)
try:
heartbeat_updating_process.start()
except Exception as e:
logger.info("No Need To Go Further. Our heartbeat updating mechanism is not working")
logger.exception(e)
exit(-1)
logger.info("%s Session Started %s", '*' * 5, '*' * 5)
# It is seen that under heavy load, timeout event doesn't come
# in a predictive manner (which is intentional because we give
# higher priority to customer's requests) which delays heart
# beat update which in turn misunderstood by scheduler that the
# host is dead when it is actually alive. So, to ensure that we
# update the heart beat in a predictive manner we start Heart
# beat updating mechanism in separated thread
for events_iterator in [
etcd_client.get_prefix(env_vars.get('REQUEST_PREFIX'), value_in_json=True),
etcd_client.watch_prefix(env_vars.get('REQUEST_PREFIX'), timeout=10, value_in_json=True),
]:
for request_event in events_iterator:
request_event = RequestEntry(request_event)
if request_event.type == "TIMEOUT":
maintenance(host)
continue
# If the event is directed toward me OR I am destination of a InitVMMigration
if request_event.hostname == host.key or request_event.destination == host.key:
logger.debug("VM Request: %s", request_event)
request_pool.client.client.delete(request_event.key)
vm_entry = vm_pool.get(request_event.uuid)
if vm_entry:
if request_event.type == RequestType.StartVM:
virtualmachine.start(vm_entry)
elif request_event.type == RequestType.StopVM:
virtualmachine.stop(vm_entry)
elif request_event.type == RequestType.DeleteVM:
virtualmachine.delete(vm_entry)
elif request_event.type == RequestType.InitVMMigration:
virtualmachine.start(vm_entry, host.key, find_free_port())
elif request_event.type == RequestType.TransferVM:
virtualmachine.transfer(request_event)
else:
logger.info("VM Entry missing")
logger.info("Running VMs %s", running_vms)
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1")
args = argparser.parse_args()
mp.set_start_method('spawn')
main(args.hostname)

537
ucloud/host/qmp/__init__.py Executable file
View file

@ -0,0 +1,537 @@
# QEMU library
#
# Copyright (C) 2015-2016 Red Hat Inc.
# Copyright (C) 2012 IBM Corp.
#
# Authors:
# Fam Zheng <famz@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
#
# Based on qmp.py.
#
import errno
import logging
import os
import shutil
import socket
import subprocess
import tempfile
from . import qmp
LOG = logging.getLogger(__name__)
# Mapping host architecture to any additional architectures it can
# support which often includes its 32 bit cousin.
ADDITIONAL_ARCHES = {
"x86_64": "i386",
"aarch64": "armhf"
}
def kvm_available(target_arch=None):
host_arch = os.uname()[4]
if target_arch and target_arch != host_arch:
if target_arch != ADDITIONAL_ARCHES.get(host_arch):
return False
return os.access("/dev/kvm", os.R_OK | os.W_OK)
class QEMUMachineError(Exception):
"""
Exception called when an error in QEMUMachine happens.
"""
class QEMUMachineAddDeviceError(QEMUMachineError):
"""
Exception raised when a request to add a device can not be fulfilled
The failures are caused by limitations, lack of information or conflicting
requests on the QEMUMachine methods. This exception does not represent
failures reported by the QEMU binary itself.
"""
class MonitorResponseError(qmp.QMPError):
"""
Represents erroneous QMP monitor reply
"""
def __init__(self, reply):
try:
desc = reply["error"]["desc"]
except KeyError:
desc = reply
super(MonitorResponseError, self).__init__(desc)
self.reply = reply
class QEMUMachine(object):
"""
A QEMU VM
Use this object as a context manager to ensure the QEMU process terminates::
with VM(binary) as vm:
...
# vm is guaranteed to be shut down here
"""
def __init__(self, binary, args=None, wrapper=None, name=None,
test_dir="/var/tmp", monitor_address=None,
socket_scm_helper=None):
'''
Initialize a QEMUMachine
@param binary: path to the qemu binary
@param args: list of extra arguments
@param wrapper: list of arguments used as prefix to qemu binary
@param name: prefix for socket and log file names (default: qemu-PID)
@param test_dir: where to create socket and log file
@param monitor_address: address for QMP monitor
@param socket_scm_helper: helper program, required for send_fd_scm()
@note: Qemu process is not started until launch() is used.
'''
if args is None:
args = []
if wrapper is None:
wrapper = []
if name is None:
name = "qemu-%d" % os.getpid()
self._name = name
self._monitor_address = monitor_address
self._vm_monitor = None
self._qemu_log_path = None
self._qemu_log_file = None
self._popen = None
self._binary = binary
self._args = list(args) # Force copy args in case we modify them
self._wrapper = wrapper
self._events = []
self._iolog = None
self._socket_scm_helper = socket_scm_helper
self._qmp = None
self._qemu_full_args = None
self._test_dir = test_dir
self._temp_dir = None
self._launched = False
self._machine = None
self._console_set = False
self._console_device_type = None
self._console_address = None
self._console_socket = None
# just in case logging wasn't configured by the main script:
logging.basicConfig(level=logging.DEBUG)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.shutdown()
return False
# This can be used to add an unused monitor instance.
def add_monitor_null(self):
self._args.append('-monitor')
self._args.append('null')
def add_fd(self, fd, fdset, opaque, opts=''):
"""
Pass a file descriptor to the VM
"""
options = ['fd=%d' % fd,
'set=%d' % fdset,
'opaque=%s' % opaque]
if opts:
options.append(opts)
# This did not exist before 3.4, but since then it is
# mandatory for our purpose
if hasattr(os, 'set_inheritable'):
os.set_inheritable(fd, True)
self._args.append('-add-fd')
self._args.append(','.join(options))
return self
# Exactly one of fd and file_path must be given.
# (If it is file_path, the helper will open that file and pass its
# own fd)
def send_fd_scm(self, fd=None, file_path=None):
# In iotest.py, the qmp should always use unix socket.
assert self._qmp.is_scm_available()
if self._socket_scm_helper is None:
raise QEMUMachineError("No path to socket_scm_helper set")
if not os.path.exists(self._socket_scm_helper):
raise QEMUMachineError("%s does not exist" %
self._socket_scm_helper)
# This did not exist before 3.4, but since then it is
# mandatory for our purpose
if hasattr(os, 'set_inheritable'):
os.set_inheritable(self._qmp.get_sock_fd(), True)
if fd is not None:
os.set_inheritable(fd, True)
fd_param = ["%s" % self._socket_scm_helper,
"%d" % self._qmp.get_sock_fd()]
if file_path is not None:
assert fd is None
fd_param.append(file_path)
else:
assert fd is not None
fd_param.append(str(fd))
devnull = open(os.path.devnull, 'rb')
proc = subprocess.Popen(fd_param, stdin=devnull, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, close_fds=False)
output = proc.communicate()[0]
if output:
LOG.debug(output)
return proc.returncode
@staticmethod
def _remove_if_exists(path):
"""
Remove file object at path if it exists
"""
try:
os.remove(path)
except OSError as exception:
if exception.errno == errno.ENOENT:
return
raise
def is_running(self):
return self._popen is not None and self._popen.poll() is None
def exitcode(self):
if self._popen is None:
return None
return self._popen.poll()
def get_pid(self):
if not self.is_running():
return None
return self._popen.pid
def _load_io_log(self):
if self._qemu_log_path is not None:
with open(self._qemu_log_path, "r") as iolog:
self._iolog = iolog.read()
def _base_args(self):
if isinstance(self._monitor_address, tuple):
moncdev = "socket,id=mon,host=%s,port=%s" % (
self._monitor_address[0],
self._monitor_address[1])
else:
moncdev = 'socket,id=mon,path=%s' % self._vm_monitor
args = ['-chardev', moncdev,
'-mon', 'chardev=mon,mode=control']
if self._machine is not None:
args.extend(['-machine', self._machine])
if self._console_set:
self._console_address = os.path.join(self._temp_dir,
self._name + "-console.sock")
chardev = ('socket,id=console,path=%s,server,nowait' %
self._console_address)
args.extend(['-chardev', chardev])
if self._console_device_type is None:
args.extend(['-serial', 'chardev:console'])
else:
device = '%s,chardev=console' % self._console_device_type
args.extend(['-device', device])
return args
def _pre_launch(self):
self._temp_dir = tempfile.mkdtemp(dir=self._test_dir)
if self._monitor_address is not None:
self._vm_monitor = self._monitor_address
else:
self._vm_monitor = os.path.join(self._temp_dir,
self._name + "-monitor.sock")
self._qemu_log_path = os.path.join(self._temp_dir, self._name + ".log")
self._qemu_log_file = open(self._qemu_log_path, 'wb')
self._qmp = qmp.QEMUMonitorProtocol(self._vm_monitor,
server=True)
def _post_launch(self):
self._qmp.accept()
def _post_shutdown(self):
if self._qemu_log_file is not None:
self._qemu_log_file.close()
self._qemu_log_file = None
self._qemu_log_path = None
if self._console_socket is not None:
self._console_socket.close()
self._console_socket = None
if self._temp_dir is not None:
shutil.rmtree(self._temp_dir)
self._temp_dir = None
def launch(self):
"""
Launch the VM and make sure we cleanup and expose the
command line/output in case of exception
"""
if self._launched:
raise QEMUMachineError('VM already launched')
self._iolog = None
self._qemu_full_args = None
try:
self._launch()
self._launched = True
except:
self.shutdown()
LOG.debug('Error launching VM')
if self._qemu_full_args:
LOG.debug('Command: %r', ' '.join(self._qemu_full_args))
if self._iolog:
LOG.debug('Output: %r', self._iolog)
raise Exception(self._iolog)
raise
def _launch(self):
"""
Launch the VM and establish a QMP connection
"""
devnull = open(os.path.devnull, 'rb')
self._pre_launch()
self._qemu_full_args = (self._wrapper + [self._binary] +
self._base_args() + self._args)
LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
self._popen = subprocess.Popen(self._qemu_full_args,
stdin=devnull,
stdout=self._qemu_log_file,
stderr=subprocess.STDOUT,
shell=False,
close_fds=False)
self._post_launch()
def wait(self):
"""
Wait for the VM to power off
"""
self._popen.wait()
self._qmp.close()
self._load_io_log()
self._post_shutdown()
def shutdown(self):
"""
Terminate the VM and clean up
"""
if self.is_running():
try:
self._qmp.cmd('quit')
self._qmp.close()
except:
self._popen.kill()
self._popen.wait()
self._load_io_log()
self._post_shutdown()
exitcode = self.exitcode()
if exitcode is not None and exitcode < 0:
msg = 'qemu received signal %i: %s'
if self._qemu_full_args:
command = ' '.join(self._qemu_full_args)
else:
command = ''
LOG.warn(msg, -exitcode, command)
self._launched = False
def qmp(self, cmd, conv_keys=True, **args):
"""
Invoke a QMP command and return the response dict
"""
qmp_args = dict()
for key, value in args.items():
if conv_keys:
qmp_args[key.replace('_', '-')] = value
else:
qmp_args[key] = value
return self._qmp.cmd(cmd, args=qmp_args)
def command(self, cmd, conv_keys=True, **args):
"""
Invoke a QMP command.
On success return the response dict.
On failure raise an exception.
"""
reply = self.qmp(cmd, conv_keys, **args)
if reply is None:
raise qmp.QMPError("Monitor is closed")
if "error" in reply:
raise MonitorResponseError(reply)
return reply["return"]
def get_qmp_event(self, wait=False):
"""
Poll for one queued QMP events and return it
"""
if len(self._events) > 0:
return self._events.pop(0)
return self._qmp.pull_event(wait=wait)
def get_qmp_events(self, wait=False):
"""
Poll for queued QMP events and return a list of dicts
"""
events = self._qmp.get_events(wait=wait)
events.extend(self._events)
del self._events[:]
self._qmp.clear_events()
return events
@staticmethod
def event_match(event, match=None):
"""
Check if an event matches optional match criteria.
The match criteria takes the form of a matching subdict. The event is
checked to be a superset of the subdict, recursively, with matching
values whenever the subdict values are not None.
This has a limitation that you cannot explicitly check for None values.
Examples, with the subdict queries on the left:
- None matches any object.
- {"foo": None} matches {"foo": {"bar": 1}}
- {"foo": None} matches {"foo": 5}
- {"foo": {"abc": None}} does not match {"foo": {"bar": 1}}
- {"foo": {"rab": 2}} matches {"foo": {"bar": 1, "rab": 2}}
"""
if match is None:
return True
try:
for key in match:
if key in event:
if not QEMUMachine.event_match(event[key], match[key]):
return False
else:
return False
return True
except TypeError:
# either match or event wasn't iterable (not a dict)
return match == event
def event_wait(self, name, timeout=60.0, match=None):
"""
event_wait waits for and returns a named event from QMP with a timeout.
name: The event to wait for.
timeout: QEMUMonitorProtocol.pull_event timeout parameter.
match: Optional match criteria. See event_match for details.
"""
return self.events_wait([(name, match)], timeout)
def events_wait(self, events, timeout=60.0):
"""
events_wait waits for and returns a named event from QMP with a timeout.
events: a sequence of (name, match_criteria) tuples.
The match criteria are optional and may be None.
See event_match for details.
timeout: QEMUMonitorProtocol.pull_event timeout parameter.
"""
def _match(event):
for name, match in events:
if (event['event'] == name and
self.event_match(event, match)):
return True
return False
# Search cached events
for event in self._events:
if _match(event):
self._events.remove(event)
return event
# Poll for new events
while True:
event = self._qmp.pull_event(wait=timeout)
if _match(event):
return event
self._events.append(event)
return None
def get_log(self):
"""
After self.shutdown or failed qemu execution, this returns the output
of the qemu process.
"""
return self._iolog
def add_args(self, *args):
"""
Adds to the list of extra arguments to be given to the QEMU binary
"""
self._args.extend(args)
def set_machine(self, machine_type):
"""
Sets the machine type
If set, the machine type will be added to the base arguments
of the resulting QEMU command line.
"""
self._machine = machine_type
def set_console(self, device_type=None):
"""
Sets the device type for a console device
If set, the console device and a backing character device will
be added to the base arguments of the resulting QEMU command
line.
This is a convenience method that will either use the provided
device type, or default to a "-serial chardev:console" command
line argument.
The actual setting of command line arguments will be be done at
machine launch time, as it depends on the temporary directory
to be created.
@param device_type: the device type, such as "isa-serial". If
None is given (the default value) a "-serial
chardev:console" command line argument will
be used instead, resorting to the machine's
default device type.
"""
self._console_set = True
self._console_device_type = device_type
@property
def console_socket(self):
"""
Returns a socket connected to the console
"""
if self._console_socket is None:
self._console_socket = socket.socket(socket.AF_UNIX,
socket.SOCK_STREAM)
self._console_socket.connect(self._console_address)
return self._console_socket

255
ucloud/host/qmp/qmp.py Executable file
View file

@ -0,0 +1,255 @@
# QEMU Monitor Protocol Python class
#
# Copyright (C) 2009, 2010 Red Hat Inc.
#
# Authors:
# Luiz Capitulino <lcapitulino@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
import errno
import json
import logging
import socket
class QMPError(Exception):
pass
class QMPConnectError(QMPError):
pass
class QMPCapabilitiesError(QMPError):
pass
class QMPTimeoutError(QMPError):
pass
class QEMUMonitorProtocol(object):
#: Logger object for debugging messages
logger = logging.getLogger('QMP')
#: Socket's error class
error = socket.error
#: Socket's timeout
timeout = socket.timeout
def __init__(self, address, server=False):
"""
Create a QEMUMonitorProtocol class.
@param address: QEMU address, can be either a unix socket path (string)
or a tuple in the form ( address, port ) for a TCP
connection
@param server: server mode listens on the socket (bool)
@raise socket.error on socket connection errors
@note No connection is established, this is done by the connect() or
accept() methods
"""
self.__events = []
self.__address = address
self.__sock = self.__get_sock()
self.__sockfile = None
if server:
self.__sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.__sock.bind(self.__address)
self.__sock.listen(1)
def __get_sock(self):
if isinstance(self.__address, tuple):
family = socket.AF_INET
else:
family = socket.AF_UNIX
return socket.socket(family, socket.SOCK_STREAM)
def __negotiate_capabilities(self):
greeting = self.__json_read()
if greeting is None or "QMP" not in greeting:
raise QMPConnectError
# Greeting seems ok, negotiate capabilities
resp = self.cmd('qmp_capabilities')
if "return" in resp:
return greeting
raise QMPCapabilitiesError
def __json_read(self, only_event=False):
while True:
data = self.__sockfile.readline()
if not data:
return
resp = json.loads(data)
if 'event' in resp:
self.logger.debug("<<< %s", resp)
self.__events.append(resp)
if not only_event:
continue
return resp
def __get_events(self, wait=False):
"""
Check for new events in the stream and cache them in __events.
@param wait (bool): block until an event is available.
@param wait (float): If wait is a float, treat it as a timeout value.
@raise QMPTimeoutError: If a timeout float is provided and the timeout
period elapses.
@raise QMPConnectError: If wait is True but no events could be
retrieved or if some other error occurred.
"""
# Check for new events regardless and pull them into the cache:
self.__sock.setblocking(0)
try:
self.__json_read()
except socket.error as err:
if err[0] == errno.EAGAIN:
# No data available
pass
self.__sock.setblocking(1)
# Wait for new events, if needed.
# if wait is 0.0, this means "no wait" and is also implicitly false.
if not self.__events and wait:
if isinstance(wait, float):
self.__sock.settimeout(wait)
try:
ret = self.__json_read(only_event=True)
except socket.timeout:
raise QMPTimeoutError("Timeout waiting for event")
except:
raise QMPConnectError("Error while reading from socket")
if ret is None:
raise QMPConnectError("Error while reading from socket")
self.__sock.settimeout(None)
def connect(self, negotiate=True):
"""
Connect to the QMP Monitor and perform capabilities negotiation.
@return QMP greeting dict
@raise socket.error on socket connection errors
@raise QMPConnectError if the greeting is not received
@raise QMPCapabilitiesError if fails to negotiate capabilities
"""
self.__sock.connect(self.__address)
self.__sockfile = self.__sock.makefile()
if negotiate:
return self.__negotiate_capabilities()
def accept(self):
"""
Await connection from QMP Monitor and perform capabilities negotiation.
@return QMP greeting dict
@raise socket.error on socket connection errors
@raise QMPConnectError if the greeting is not received
@raise QMPCapabilitiesError if fails to negotiate capabilities
"""
self.__sock.settimeout(15)
self.__sock, _ = self.__sock.accept()
self.__sockfile = self.__sock.makefile()
return self.__negotiate_capabilities()
def cmd_obj(self, qmp_cmd):
"""
Send a QMP command to the QMP Monitor.
@param qmp_cmd: QMP command to be sent as a Python dict
@return QMP response as a Python dict or None if the connection has
been closed
"""
self.logger.debug(">>> %s", qmp_cmd)
try:
self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
except socket.error as err:
if err[0] == errno.EPIPE:
return
raise socket.error(err)
resp = self.__json_read()
self.logger.debug("<<< %s", resp)
return resp
def cmd(self, name, args=None, cmd_id=None):
"""
Build a QMP command and send it to the QMP Monitor.
@param name: command name (string)
@param args: command arguments (dict)
@param cmd_id: command id (dict, list, string or int)
"""
qmp_cmd = {'execute': name}
if args:
qmp_cmd['arguments'] = args
if cmd_id:
qmp_cmd['id'] = cmd_id
return self.cmd_obj(qmp_cmd)
def command(self, cmd, **kwds):
"""
Build and send a QMP command to the monitor, report errors if any
"""
ret = self.cmd(cmd, kwds)
if "error" in ret:
raise Exception(ret['error']['desc'])
return ret['return']
def pull_event(self, wait=False):
"""
Pulls a single event.
@param wait (bool): block until an event is available.
@param wait (float): If wait is a float, treat it as a timeout value.
@raise QMPTimeoutError: If a timeout float is provided and the timeout
period elapses.
@raise QMPConnectError: If wait is True but no events could be
retrieved or if some other error occurred.
@return The first available QMP event, or None.
"""
self.__get_events(wait)
if self.__events:
return self.__events.pop(0)
return None
def get_events(self, wait=False):
"""
Get a list of available QMP events.
@param wait (bool): block until an event is available.
@param wait (float): If wait is a float, treat it as a timeout value.
@raise QMPTimeoutError: If a timeout float is provided and the timeout
period elapses.
@raise QMPConnectError: If wait is True but no events could be
retrieved or if some other error occurred.
@return The list of available QMP events.
"""
self.__get_events(wait)
return self.__events
def clear_events(self):
"""
Clear current list of pending events.
"""
self.__events = []
def close(self):
self.__sock.close()
self.__sockfile.close()
def settimeout(self, timeout):
self.__sock.settimeout(timeout)
def get_sock_fd(self):
return self.__sock.fileno()
def is_scm_available(self):
return self.__sock.family == socket.AF_UNIX

337
ucloud/host/virtualmachine.py Executable file
View file

@ -0,0 +1,337 @@
# QEMU Manual
# https://qemu.weilnetz.de/doc/qemu-doc.html
# For QEMU Monitor Protocol Commands Information, See
# https://qemu.weilnetz.de/doc/qemu-doc.html#pcsys_005fmonitor
import os
import random
import subprocess as sp
import tempfile
import time
from functools import wraps
from string import Template
from typing import Union
from os.path import join as join_path
import bitmath
import sshtunnel
from ucloud.common.helpers import get_ipv6_address
from ucloud.common.request import RequestEntry, RequestType
from ucloud.common.vm import VMEntry, VMStatus
from ucloud.config import etcd_client, request_pool, running_vms, vm_pool, env_vars, image_storage_handler
from . import qmp
from host import logger
class VM:
def __init__(self, key, handle, vnc_socket_file):
self.key = key # type: str
self.handle = handle # type: qmp.QEMUMachine
self.vnc_socket_file = vnc_socket_file # type: tempfile.NamedTemporaryFile
def __repr__(self):
return "VM({})".format(self.key)
def create_dev(script, _id, dev, ip=None):
command = [script, _id, dev]
if ip:
command.append(ip)
try:
output = sp.check_output(command, stderr=sp.PIPE)
except Exception as e:
print(e.stderr)
return None
else:
return output.decode("utf-8").strip()
def create_vxlan_br_tap(_id, _dev, tap_id, ip=None):
network_script_base = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'network')
vxlan = create_dev(script=os.path.join(network_script_base, 'create-vxlan.sh'),
_id=_id, dev=_dev)
if vxlan:
bridge = create_dev(script=os.path.join(network_script_base, 'create-bridge.sh'),
_id=_id, dev=vxlan, ip=ip)
if bridge:
tap = create_dev(script=os.path.join(network_script_base, 'create-tap.sh'),
_id=str(tap_id), dev=bridge)
if tap:
return tap
def random_bytes(num=6):
return [random.randrange(256) for _ in range(num)]
def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt='%02x'):
mac = random_bytes()
if oui:
if type(oui) == str:
oui = [int(chunk) for chunk in oui.split(separator)]
mac = oui + random_bytes(num=6 - len(oui))
else:
if multicast:
mac[0] |= 1 # set bit 0
else:
mac[0] &= ~1 # clear bit 0
if uaa:
mac[0] &= ~(1 << 1) # clear bit 1
else:
mac[0] |= 1 << 1 # set bit 1
return separator.join(byte_fmt % b for b in mac)
def update_radvd_conf(etcd_client):
network_script_base = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'network')
networks = {
net.value['ipv6']: net.value['id']
for net in etcd_client.get_prefix('/v1/network/', value_in_json=True)
if net.value.get('ipv6')
}
radvd_template = open(os.path.join(network_script_base,
'radvd-template.conf'), 'r').read()
radvd_template = Template(radvd_template)
content = [radvd_template.safe_substitute(bridge='br{}'.format(networks[net]),
prefix=net)
for net in networks if networks.get(net)]
with open('/etc/radvd.conf', 'w') as radvd_conf:
radvd_conf.writelines(content)
try:
sp.check_output(['systemctl', 'restart', 'radvd'])
except Exception:
sp.check_output(['service', 'radvd', 'restart'])
def get_start_command_args(vm_entry, vnc_sock_filename: str, migration=False, migration_port=None):
threads_per_core = 1
vm_memory = int(bitmath.parse_string_unsafe(vm_entry.specs["ram"]).to_MB())
vm_cpus = int(vm_entry.specs["cpu"])
vm_uuid = vm_entry.uuid
vm_networks = vm_entry.network
command = "-name {}_{}".format(vm_entry.owner, vm_entry.name)
command += " -drive file={},format=raw,if=virtio,cache=none".format(
image_storage_handler.qemu_path_string(vm_uuid)
)
command += " -device virtio-rng-pci -vnc unix:{}".format(vnc_sock_filename)
command += " -m {} -smp cores={},threads={}".format(
vm_memory, vm_cpus, threads_per_core
)
if migration:
command += " -incoming tcp:[::]:{}".format(migration_port)
tap = None
for network_mac_and_tap in vm_networks:
network_name, mac, tap = network_mac_and_tap
_key = os.path.join(env_vars.get('NETWORK_PREFIX'), vm_entry.owner, network_name)
network = etcd_client.get(_key, value_in_json=True)
network_type = network.value["type"]
network_id = str(network.value["id"])
network_ipv6 = network.value["ipv6"]
if network_type == "vxlan":
tap = create_vxlan_br_tap(_id=network_id,
_dev=env_vars.get("VXLAN_PHY_DEV"),
tap_id=tap,
ip=network_ipv6)
update_radvd_conf(etcd_client)
command += " -netdev tap,id=vmnet{net_id},ifname={tap},script=no,downscript=no" \
" -device virtio-net-pci,netdev=vmnet{net_id},mac={mac}" \
.format(tap=tap, net_id=network_id, mac=mac)
return command.split(" ")
def create_vm_object(vm_entry, migration=False, migration_port=None):
# NOTE: If migration suddenly stop working, having different
# VNC unix filename on source and destination host can
# be a possible cause of it.
# REQUIREMENT: Use Unix Socket instead of TCP Port for VNC
vnc_sock_file = tempfile.NamedTemporaryFile()
qemu_args = get_start_command_args(
vm_entry=vm_entry,
vnc_sock_filename=vnc_sock_file.name,
migration=migration,
migration_port=migration_port,
)
qemu_machine = qmp.QEMUMachine("/usr/bin/qemu-system-x86_64", args=qemu_args)
return VM(vm_entry.key, qemu_machine, vnc_sock_file)
def get_vm(vm_list: list, vm_key) -> Union[VM, None]:
return next((vm for vm in vm_list if vm.key == vm_key), None)
def need_running_vm(func):
@wraps(func)
def wrapper(e):
vm = get_vm(running_vms, e.key)
if vm:
try:
status = vm.handle.command("query-status")
logger.debug("VM Status Check - %s", status)
except Exception as exception:
logger.info("%s failed - VM %s %s", func.__name__, e, exception)
else:
return func(e)
return None
else:
logger.info("%s failed because VM %s is not running", func.__name__, e.key)
return None
return wrapper
def create(vm_entry: VMEntry):
if image_storage_handler.is_vm_image_exists(vm_entry.uuid):
# File Already exists. No Problem Continue
logger.debug("Image for vm %s exists", vm_entry.uuid)
else:
vm_hdd = int(bitmath.parse_string_unsafe(vm_entry.specs["os-ssd"]).to_MB())
if image_storage_handler.make_vm_image(src=vm_entry.image_uuid, dest=vm_entry.uuid):
if not image_storage_handler.resize_vm_image(path=vm_entry.uuid, size=vm_hdd):
vm_entry.status = VMStatus.error
else:
logger.info("New VM Created")
def start(vm_entry: VMEntry, destination_host_key=None, migration_port=None):
_vm = get_vm(running_vms, vm_entry.key)
# VM already running. No need to proceed further.
if _vm:
logger.info("VM %s already running" % vm_entry.uuid)
return
else:
logger.info("Trying to start %s" % vm_entry.uuid)
if destination_host_key:
launch_vm(vm_entry, migration=True, migration_port=migration_port,
destination_host_key=destination_host_key)
else:
create(vm_entry)
launch_vm(vm_entry)
@need_running_vm
def stop(vm_entry):
vm = get_vm(running_vms, vm_entry.key)
vm.handle.shutdown()
if not vm.handle.is_running():
vm_entry.add_log("Shutdown successfully")
vm_entry.declare_stopped()
vm_pool.put(vm_entry)
running_vms.remove(vm)
def delete(vm_entry):
logger.info("Deleting VM | %s", vm_entry)
stop(vm_entry)
if image_storage_handler.is_vm_image_exists(vm_entry.uuid):
r_status = image_storage_handler.delete_vm_image(vm_entry.uuid)
if r_status:
etcd_client.client.delete(vm_entry.key)
else:
etcd_client.client.delete(vm_entry.key)
def transfer(request_event):
# This function would run on source host i.e host on which the vm
# is running initially. This host would be responsible for transferring
# vm state to destination host.
_host, _port = request_event.parameters["host"], request_event.parameters["port"]
_uuid = request_event.uuid
_destination = request_event.destination_host_key
vm = get_vm(running_vms, join_path(env_vars.get('VM_PREFIX'), _uuid))
if vm:
tunnel = sshtunnel.SSHTunnelForwarder(
_host,
ssh_username=env_vars.get("ssh_username"),
ssh_pkey=env_vars.get("ssh_pkey"),
remote_bind_address=("127.0.0.1", _port),
ssh_proxy_enabled=True,
ssh_proxy=(_host, 22)
)
try:
tunnel.start()
except sshtunnel.BaseSSHTunnelForwarderError:
logger.exception("Couldn't establish connection to (%s, 22)", _host)
else:
vm.handle.command(
"migrate", uri="tcp:0.0.0.0:{}".format(tunnel.local_bind_port)
)
status = vm.handle.command("query-migrate")["status"]
while status not in ["failed", "completed"]:
time.sleep(2)
status = vm.handle.command("query-migrate")["status"]
with vm_pool.get_put(request_event.uuid) as source_vm:
if status == "failed":
source_vm.add_log("Migration Failed")
elif status == "completed":
# If VM is successfully migrated then shutdown the VM
# on this host and update hostname to destination host key
source_vm.add_log("Successfully migrated")
source_vm.hostname = _destination
running_vms.remove(vm)
vm.handle.shutdown()
source_vm.in_migration = False # VM transfer finished
finally:
tunnel.close()
def launch_vm(vm_entry, migration=False, migration_port=None, destination_host_key=None):
logger.info("Starting %s" % vm_entry.key)
vm = create_vm_object(vm_entry, migration=migration, migration_port=migration_port)
try:
vm.handle.launch()
except Exception:
logger.exception("Error Occured while starting VM")
vm.handle.shutdown()
if migration:
# We don't care whether MachineError or any other error occurred
pass
else:
# Error during typical launch of a vm
vm.handle.shutdown()
vm_entry.declare_killed()
vm_pool.put(vm_entry)
else:
vm_entry.vnc_socket = vm.vnc_socket_file.name
running_vms.append(vm)
if migration:
vm_entry.in_migration = True
r = RequestEntry.from_scratch(
type=RequestType.TransferVM,
hostname=vm_entry.hostname,
parameters={"host": get_ipv6_address(), "port": migration_port},
uuid=vm_entry.uuid,
destination_host_key=destination_host_key,
request_prefix=env_vars.get("REQUEST_PREFIX")
)
request_pool.put(r)
else:
# Typical launching of a vm
vm_entry.status = VMStatus.running
vm_entry.add_log("Started successfully")
vm_pool.put(vm_entry)

View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

78
ucloud/imagescanner/main.py Executable file
View file

@ -0,0 +1,78 @@
import json
import os
import subprocess
from os.path import join as join_path
from ucloud.config import etcd_client, env_vars, image_storage_handler
from ucloud.imagescanner import logger
def qemu_img_type(path):
qemu_img_info_command = ["qemu-img", "info", "--output", "json", path]
try:
qemu_img_info = subprocess.check_output(qemu_img_info_command)
except Exception as e:
logger.exception(e)
return None
else:
qemu_img_info = json.loads(qemu_img_info.decode("utf-8"))
return qemu_img_info["format"]
def main():
# We want to get images entries that requests images to be created
images = etcd_client.get_prefix(env_vars.get('IMAGE_PREFIX'), value_in_json=True)
images_to_be_created = list(filter(lambda im: im.value['status'] == 'TO_BE_CREATED', images))
for image in images_to_be_created:
try:
image_uuid = image.key.split('/')[-1]
image_owner = image.value['owner']
image_filename = image.value['filename']
image_store_name = image.value['store_name']
image_full_path = join_path(env_vars.get('BASE_DIR'), image_owner, image_filename)
image_stores = etcd_client.get_prefix(env_vars.get('IMAGE_STORE_PREFIX'), value_in_json=True)
user_image_store = next(filter(
lambda s, store_name=image_store_name: s.value["name"] == store_name,
image_stores
))
image_store_pool = user_image_store.value['attributes']['pool']
except Exception as e:
logger.exception(e)
else:
# At least our basic data is available
qemu_img_convert_command = ["qemu-img", "convert", "-f", "qcow2",
"-O", "raw", image_full_path, "image.raw"]
if qemu_img_type(image_full_path) == "qcow2":
try:
# Convert .qcow2 to .raw
subprocess.check_output(qemu_img_convert_command)
except Exception as e:
logger.exception(e)
else:
# Import and Protect
r_status = image_storage_handler.import_image(src="image.raw",
dest=image_uuid,
protect=True)
if r_status:
# Everything is successfully done
image.value["status"] = "CREATED"
etcd_client.put(image.key, json.dumps(image.value))
else:
# The user provided image is either not found or of invalid format
image.value["status"] = "INVALID_IMAGE"
etcd_client.put(image.key, json.dumps(image.value))
try:
os.remove("image.raw")
except Exception:
pass
if __name__ == "__main__":
main()

View file

91
ucloud/metadata/main.py Normal file
View file

@ -0,0 +1,91 @@
import os
from flask import Flask, request
from flask_restful import Resource, Api
from ucloud.config import etcd_client, env_vars, vm_pool
app = Flask(__name__)
api = Api(app)
def get_vm_entry(mac_addr):
return next(filter(lambda vm: mac_addr in list(zip(*vm.network))[1], vm_pool.vms), None)
# https://stackoverflow.com/questions/37140846/how-to-convert-ipv6-link-local-address-to-mac-address-in-python
def ipv62mac(ipv6):
# remove subnet info if given
subnet_index = ipv6.find('/')
if subnet_index != -1:
ipv6 = ipv6[:subnet_index]
ipv6_parts = ipv6.split(':')
mac_parts = list()
for ipv6_part in ipv6_parts[-4:]:
while len(ipv6_part) < 4:
ipv6_part = '0' + ipv6_part
mac_parts.append(ipv6_part[:2])
mac_parts.append(ipv6_part[-2:])
# modify parts to match MAC value
mac_parts[0] = '%02x' % (int(mac_parts[0], 16) ^ 2)
del mac_parts[4]
del mac_parts[3]
return ':'.join(mac_parts)
class Root(Resource):
@staticmethod
def get():
data = get_vm_entry(ipv62mac(request.remote_addr))
if not data:
return {'message': 'Metadata for such VM does not exists.'}, 404
else:
# {env_vars.get('USER_PREFIX')}/{realm}/{name}/key
etcd_key = os.path.join(env_vars.get('USER_PREFIX'), data.value['owner_realm'],
data.value['owner'], 'key')
etcd_entry = etcd_client.get_prefix(etcd_key, value_in_json=True)
user_personal_ssh_keys = [key.value for key in etcd_entry]
data.value['metadata']['ssh-keys'] += user_personal_ssh_keys
return data.value['metadata'], 200
@staticmethod
def post():
return {'message': 'Previous Implementation is deprecated.'}
# data = etcd_client.get("/v1/metadata/{}".format(request.remote_addr), value_in_json=True)
# print(data)
# if data:
# for k in request.json:
# if k not in data.value:
# data.value[k] = request.json[k]
# if k.endswith("-list"):
# data.value[k] = [request.json[k]]
# else:
# if k.endswith("-list"):
# data.value[k].append(request.json[k])
# else:
# data.value[k] = request.json[k]
# etcd_client.put("/v1/metadata/{}".format(request.remote_addr),
# data.value, value_in_json=True)
# else:
# data = {}
# for k in request.json:
# data[k] = request.json[k]
# if k.endswith("-list"):
# data[k] = [request.json[k]]
# etcd_client.put("/v1/metadata/{}".format(request.remote_addr),
# data, value_in_json=True)
api.add_resource(Root, '/')
def main():
app.run(debug=True, host="::", port="80")
if __name__ == '__main__':
main()

195
ucloud/network/README Normal file
View file

@ -0,0 +1,195 @@
The network base - experimental
We want to have 1 "main" network for convience.
We want to be able to create networks automatically, once a new
customer is created -> need hooks!
Mapping:
- each network is a "virtual" network. We use vxlan by default, but
could be any technology!
- we need a counter for vxlan mappings / network IDs -> cannot use
Model in etcd:
/v1/networks/
Tests
see
https://vincent.bernat.ch/en/blog/2017-vxlan-linux
# local 2001:db8:1::1 \
netid=100
dev=wlp2s0
dev=wlp0s20f3
ip -6 link add vxlan${netid} type vxlan \
id ${netid} \
dstport 4789 \
group ff05::${netid} \
dev ${dev} \
ttl 5
[root@diamond ~]# ip addr add 2a0a:e5c0:5::1/48 dev vxlan100
root@manager:~/.ssh# ip addr add 2a0a:e5c0:5::2/48 dev vxlan100
root@manager:~/.ssh# ping -c3 2a0a:e5c0:5::1
PING 2a0a:e5c0:5::1(2a0a:e5c0:5::1) 56 data bytes
64 bytes from 2a0a:e5c0:5::1: icmp_seq=1 ttl=64 time=15.6 ms
64 bytes from 2a0a:e5c0:5::1: icmp_seq=2 ttl=64 time=30.3 ms
64 bytes from 2a0a:e5c0:5::1: icmp_seq=3 ttl=64 time=84.4 ms
--- 2a0a:e5c0:5::1 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2003ms
rtt min/avg/max/mdev = 15.580/43.437/84.417/29.594 ms
--> work even via wifi
--------------------------------------------------------------------------------
Creating a network:
1) part of the initialisation / demo data (?)
We should probably provide some demo sets that can easily be used.
2) manual/hook based request
- hosts might have different network interfaces (?)
-> this will make things very tricky -> don't support it
- endpoint needs only support
--------------------------------------------------------------------------------
IPAM
IP address management (IPAM) is related to networks, but needs to be
decoupled to allow pure L2 networks.
From a customer point of view, we probably want to do something like:
- ORDERING an IPv6 network can include creating a virtual network and
an IPAM service
Maybe "orders" should always be the first class citizen and ucloud
internally "hooks" or binds things together.
--------------------------------------------------------------------------------
testing / hacking:
- starting etcd as storage
[18:07] diamond:~% etcdctl put /v1/network/200 "{ some_network }"
OK
[18:08] diamond:~% etcdctl watch -w=json --prefix /v1/network
{"Header":{"cluster_id":14841639068965178418,"member_id":10276657743932975437,"revision":6,"raft_term":2},"Events":[{"kv":{"key":"L3YxL25ldHdvcmsvMjAw","create_revision":5,"mod_revision":6,"version":2,"value":"eyBzb21lX25ldHdvcmsgfQ=="}}],"CompactRevision":0,"Canceled":false,"Created":false}
--------------------------------------------------------------------------------
Flow for using and creating networks:
- a network is created -> entry in etcd is created
-> we need to keep a counter/lock so that 2 processes don't create
the same network [Ahmed]
-> nothing to be done on the hosts
- a VM using a network is created
- a VM using a network is scheduled to some host
- the local "spawn a VM" process needs to check whether there is a
vxlan interface existing -> if no, create it before creating the VM.
-> if no, also create the bridge
-> possibly adjusting the MTU (??)
-> both names should be in hexadecimal (i.e. brff01 or vxlanff01)
--> this way they are consistent with the multicast ipv6 address
--> attention, ip -6 link ... id XXX expects DECIMAL input
--------------------------------------------------------------------------------
If we also supply IPAM:
- ipam needs to be created *after* the network is created
- ipam is likely to be coupled to netbox (?)
--> we need a "get next /64 prefix" function
- when an ipam service is created in etcd, we need to create a new
radvd instance on all routers (this will be a different service on
BSDs)
- we will need to create a new vxlan device on the routers
- we need to create a new / modify radvd.conf
- only after all of the routers reloaded radvd the ipam service is
available!
--------------------------------------------------------------------------------
If the user requests an IPv4 VM:
- we need to get the next free IPv4 address (again, netbox?)
- we need to create a mapping entry on the routers for NAT64
--> this requires the VM to be in a network with IPAM
--> we always assume that the VM embeds itself using EUI64
--------------------------------------------------------------------------------
mac address handling!
Example
--------------------------------------------------------------------------------
TODOs
- create-vxlan-on-dev.sh -> the multicast group
needs to be ff05:: +int(vxlan_id)
--------------------------------------------------------------------------------
Python hints:
>>> vxlan_id = 3400
>>> b = ipaddress.IPv6Network("ff05::/16")
>>> b[vxlan_id]
IPv6Address('ff05::d48')
we need / should assign hex values for vxlan ids in etcd!
--> easier to read
>>> b[0x3400]
IPv6Address('ff05::3400')
--------------------------------------------------------------------------------
Bridge names are limited to 15 characters
Maximum/highest number of vxlan:
>>> 2**24
16777216
>>> (2**25)-1
33554431
>>> b[33554431]
IPv6Address('ff05::1ff:ffff')
Last interface:
br1ffffff
vxlan1ffffff
root@manager:~/ucloud/network# ip -6 link add vxlan1ffffff type vxlan id 33554431 dstport 4789 group ff05::1ff:ffff dev wlp2s0 ttl 5
Error: argument "33554431" is wrong: invalid id
root@manager:~/ucloud/network# ip -6 link add vxlanffffff type vxlan id 16777215 dstport 4789 group ff05::ff:ffff dev wlp2s0 ttl 5
# id needs to be decimal
root@manager:~# ip -6 link add vxlanff01 type vxlan id ff01 dstport 4789 group ff05::ff01 dev ttl 5
Error: argument "ff01" is wrong: invalid id
root@manager:~# ip -6 link add vxlanff01 type vxlan id 65281 dstport 4789 group ff05::ff01 dev wlp2s0 ttl 5

View file

24
ucloud/network/create-bridge.sh Executable file
View file

@ -0,0 +1,24 @@
#!/bin/sh
if [ $# -ne 3 ]; then
echo "$0 brid dev ip"
echo "f.g. $0 100 vxlan100 fd00:/64"
echo "Missing arguments" >&2
exit 1
fi
brid=$1; shift
dev=$1; shift
ip=$1; shift
bridge=br${brid}
sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
if ! ip link show $bridge > /dev/null 2> /dev/null; then
ip link add name $bridge type bridge
ip link set $bridge up
ip link set $dev master $bridge
ip address add $ip dev $bridge
fi
echo $bridge

22
ucloud/network/create-tap.sh Executable file
View file

@ -0,0 +1,22 @@
#!/bin/sh
if [ $# -ne 2 ]; then
echo "$0 tapid dev"
echo "f.g. $0 100 br100"
echo "Missing arguments" >&2
exit 1
fi
tapid=$1; shift
bridge=$1; shift
vxlan=vxlan${tapid}
tap=tap${tapid}
if ! ip link show $tap > /dev/null 2> /dev/null; then
ip tuntap add $tap mode tap user `whoami`
ip link set $tap up
sleep 0.5s
ip link set $tap master $bridge
fi
echo $tap

26
ucloud/network/create-vxlan.sh Executable file
View file

@ -0,0 +1,26 @@
#!/bin/sh
if [ $# -ne 2 ]; then
echo "$0 vxlanid dev"
echo "f.i. $0 100 eno1"
echo "Missing arguments" >&2
exit 1
fi
netid=$1; shift
dev=$1; shift
vxlan=vxlan${netid}
if ! ip link show $vxlan > /dev/null 2> /dev/null; then
ip -6 link add $vxlan type vxlan \
id $netid \
dstport 4789 \
group ff05::$netid \
dev $dev \
ttl 5
ip link set $dev up
ip link set $vxlan up
fi
echo $vxlan

View file

@ -0,0 +1,13 @@
interface $bridge
{
AdvSendAdvert on;
MinRtrAdvInterval 3;
MaxRtrAdvInterval 5;
AdvDefaultLifetime 10;
prefix $prefix { };
RDNSS 2a0a:e5c0:2:1::5 2a0a:e5c0:2:1::6 { AdvRDNSSLifetime 6000; };
DNSSL place6.ungleich.ch { AdvDNSSLLifetime 6000; } ;
};

33
ucloud/sanity_checks.py Normal file
View file

@ -0,0 +1,33 @@
import sys
import subprocess as sp
from os.path import isdir
from ucloud.config import env_vars
def check():
#########################
# ucloud-image-scanner #
#########################
if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('IMAGE_DIR')):
print("You have set STORAGE_BACKEND to filesystem. So,"
"the {} must exists. But, it don't".format(env_vars.get('IMAGE_DIR')))
sys.exit(1)
try:
sp.check_output(['which', 'qemu-img'])
except Exception:
print("qemu-img missing")
sys.exit(1)
###############
# ucloud-host #
###############
if env_vars.get('STORAGE_BACKEND') == 'filesystem' and not isdir(env_vars.get('VM_DIR')):
print("You have set STORAGE_BACKEND to filesystem. So, the vm directory mentioned"
" in .env file must exists. But, it don't.")
sys.exit(1)
if __name__ == "__main__":
check()

View file

@ -0,0 +1,3 @@
import logging
logger = logging.getLogger(__name__)

113
ucloud/scheduler/helper.py Executable file
View file

@ -0,0 +1,113 @@
from collections import Counter
from functools import reduce
import bitmath
from ucloud.common.host import HostStatus
from ucloud.common.request import RequestEntry, RequestType
from ucloud.common.vm import VMStatus
from ucloud.config import vm_pool, host_pool, request_pool, env_vars
def accumulated_specs(vms_specs):
if not vms_specs:
return {}
return reduce((lambda x, y: Counter(x) + Counter(y)), vms_specs)
def remaining_resources(host_specs, vms_specs):
# Return remaining resources host_specs - vms
_vms_specs = Counter(vms_specs)
_remaining = Counter(host_specs)
for component in _vms_specs:
if isinstance(_vms_specs[component], str):
_vms_specs[component] = int(bitmath.parse_string_unsafe(_vms_specs[component]).to_MB())
elif isinstance(_vms_specs[component], list):
_vms_specs[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _vms_specs[component])
_vms_specs[component] = reduce(lambda x, y: x + y, _vms_specs[component], 0)
for component in _remaining:
if isinstance(_remaining[component], str):
_remaining[component] = int(bitmath.parse_string_unsafe(_remaining[component]).to_MB())
elif isinstance(_remaining[component], list):
_remaining[component] = map(lambda x: int(bitmath.parse_string_unsafe(x).to_MB()), _remaining[component])
_remaining[component] = reduce(lambda x, y: x + y, _remaining[component], 0)
_remaining.subtract(_vms_specs)
return _remaining
class NoSuitableHostFound(Exception):
"""Exception when no host found that can host a VM."""
def get_suitable_host(vm_specs, hosts=None):
if hosts is None:
hosts = host_pool.by_status(HostStatus.alive)
for host in hosts:
# Filter them by host_name
vms = vm_pool.by_host(host.key)
# Filter them by status
vms = vm_pool.by_status(VMStatus.running, vms)
running_vms_specs = [vm.specs for vm in vms]
# Accumulate all of their combined specs
running_vms_accumulated_specs = accumulated_specs(running_vms_specs)
# Find out remaining resources after
# host_specs - already running vm_specs
remaining = remaining_resources(host.specs, running_vms_accumulated_specs)
# Find out remaining - new_vm_specs
remaining = remaining_resources(remaining, vm_specs)
if all(map(lambda x: x >= 0, remaining.values())):
return host.key
raise NoSuitableHostFound
def dead_host_detection():
# Bring out your dead! - Monty Python and the Holy Grail
hosts = host_pool.by_status(HostStatus.alive)
dead_hosts_keys = []
for host in hosts:
# Only check those who claims to be alive
if host.status == HostStatus.alive:
if not host.is_alive():
dead_hosts_keys.append(host.key)
return dead_hosts_keys
def dead_host_mitigation(dead_hosts_keys):
for host_key in dead_hosts_keys:
host = host_pool.get(host_key)
host.declare_dead()
vms_hosted_on_dead_host = vm_pool.by_host(host_key)
for vm in vms_hosted_on_dead_host:
vm.declare_killed()
vm_pool.put(vm)
host_pool.put(host)
def assign_host(vm):
vm.hostname = get_suitable_host(vm.specs)
vm_pool.put(vm)
r = RequestEntry.from_scratch(type=RequestType.StartVM,
uuid=vm.uuid,
hostname=vm.hostname,
request_prefix=env_vars.get("REQUEST_PREFIX"))
request_pool.put(r)
vm.log.append("VM scheduled for starting")
return vm.hostname

93
ucloud/scheduler/main.py Executable file
View file

@ -0,0 +1,93 @@
# TODO
# 1. send an email to an email address defined by env['admin-email']
# if resources are finished
# 2. Introduce a status endpoint of the scheduler -
# maybe expose a prometheus compatible output
from ucloud.common.request import RequestEntry, RequestType
from ucloud.config import etcd_client
from ucloud.config import host_pool, request_pool, vm_pool, env_vars
from .helper import (get_suitable_host, dead_host_mitigation, dead_host_detection,
assign_host, NoSuitableHostFound)
from ucloud.scheduler import logger
def main():
logger.info("%s SESSION STARTED %s", '*' * 5, '*' * 5)
pending_vms = []
for request_iterator in [
etcd_client.get_prefix(env_vars.get('REQUEST_PREFIX'), value_in_json=True),
etcd_client.watch_prefix(env_vars.get('REQUEST_PREFIX'), timeout=5, value_in_json=True),
]:
for request_event in request_iterator:
request_entry = RequestEntry(request_event)
# Never Run time critical mechanism inside timeout
# mechanism because timeout mechanism only comes
# when no other event is happening. It means under
# heavy load there would not be a timeout event.
if request_entry.type == "TIMEOUT":
# Detect hosts that are dead and set their status
# to "DEAD", and their VMs' status to "KILLED"
dead_hosts = dead_host_detection()
if dead_hosts:
logger.debug("Dead hosts: %s", dead_hosts)
dead_host_mitigation(dead_hosts)
# If there are VMs that weren't assigned a host
# because there wasn't a host available which
# meets requirement of that VM then we would
# create a new ScheduleVM request for that VM
# on our behalf.
while pending_vms:
pending_vm_entry = pending_vms.pop()
r = RequestEntry.from_scratch(type="ScheduleVM",
uuid=pending_vm_entry.uuid,
hostname=pending_vm_entry.hostname,
request_prefix=env_vars.get("REQUEST_PREFIX"))
request_pool.put(r)
elif request_entry.type == RequestType.ScheduleVM:
logger.debug("%s, %s", request_entry.key, request_entry.value)
vm_entry = vm_pool.get(request_entry.uuid)
if vm_entry is None:
logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid))
continue
etcd_client.client.delete(request_entry.key) # consume Request
# If the Request is about a VM which is labelled as "migration"
# and has a destination
if hasattr(request_entry, "migration") and request_entry.migration \
and hasattr(request_entry, "destination") and request_entry.destination:
try:
get_suitable_host(vm_specs=vm_entry.specs,
hosts=[host_pool.get(request_entry.destination)])
except NoSuitableHostFound:
logger.info("Requested destination host doesn't have enough capacity"
"to hold %s" % vm_entry.uuid)
else:
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
uuid=request_entry.uuid,
destination=request_entry.destination,
request_prefix=env_vars.get("REQUEST_PREFIX"))
request_pool.put(r)
# If the Request is about a VM that just want to get started/created
else:
# assign_host only returns None when we couldn't be able to assign
# a host to a VM because of resource constraints
try:
assign_host(vm_entry)
except NoSuitableHostFound:
vm_entry.add_log("Can't schedule VM. No Resource Left.")
vm_pool.put(vm_entry)
pending_vms.append(vm_entry)
logger.info("No Resource Left. Emailing admin....")
if __name__ == "__main__":
main()

View file

View file

@ -0,0 +1,210 @@
import json
import multiprocessing
import sys
import unittest
from datetime import datetime
from os.path import dirname
BASE_DIR = dirname(dirname(__file__))
sys.path.insert(0, BASE_DIR)
from main import (
accumulated_specs,
remaining_resources,
VmPool,
main,
)
from ucloud.config import etcd_client
class TestFunctions(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.client = etcd_client
cls.host_prefix = "/test/host"
cls.vm_prefix = "/test/vm"
# These deletion could also be in
# tearDown() but it is more appropriate here
# as it enable us to check the ETCD store
# even after test is run
cls.client.client.delete_prefix(cls.host_prefix)
cls.client.client.delete_prefix(cls.vm_prefix)
cls.create_hosts(cls)
cls.create_vms(cls)
cls.p = multiprocessing.Process(
target=main, args=[cls.vm_prefix, cls.host_prefix]
)
cls.p.start()
@classmethod
def tearDownClass(cls):
cls.p.terminate()
def create_hosts(self):
host1 = {
"cpu": 32,
"ram": 128,
"hdd": 1024,
"sdd": 0,
"status": "ALIVE",
"last_heartbeat": datetime.utcnow().isoformat(),
}
host2 = {
"cpu": 16,
"ram": 64,
"hdd": 512,
"sdd": 0,
"status": "ALIVE",
"last_heartbeat": datetime.utcnow().isoformat(),
}
host3 = {
"cpu": 16,
"ram": 32,
"hdd": 256,
"sdd": 256,
"status": "ALIVE",
"last_heartbeat": datetime.utcnow().isoformat(),
}
with self.client.client.lock("lock"):
self.client.put(f"{self.host_prefix}/1", host1, value_in_json=True)
self.client.put(f"{self.host_prefix}/2", host2, value_in_json=True)
self.client.put(f"{self.host_prefix}/3", host3, value_in_json=True)
def create_vms(self):
vm1 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 4, "ram": 8, "hdd": 100, "sdd": 256},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm2 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 16, "ram": 64, "hdd": 512, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm3 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 16, "ram": 32, "hdd": 128, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm4 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 16, "ram": 64, "hdd": 512, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm5 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 2, "ram": 2, "hdd": 10, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm6 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 10, "ram": 22, "hdd": 146, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
vm7 = json.dumps(
{
"owner": "meow",
"specs": {"cpu": 10, "ram": 22, "hdd": 146, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
}
)
self.client.put(f"{self.vm_prefix}/1", vm1)
self.client.put(f"{self.vm_prefix}/2", vm2)
self.client.put(f"{self.vm_prefix}/3", vm3)
self.client.put(f"{self.vm_prefix}/4", vm4)
self.client.put(f"{self.vm_prefix}/5", vm5)
self.client.put(f"{self.vm_prefix}/6", vm6)
self.client.put(f"{self.vm_prefix}/7", vm7)
def test_accumulated_specs(self):
vms = [
{"ssd": 10, "cpu": 4, "ram": 8},
{"hdd": 10, "cpu": 4, "ram": 8},
{"cpu": 8, "ram": 32},
]
self.assertEqual(
accumulated_specs(vms), {"ssd": 10, "cpu": 16, "ram": 48, "hdd": 10}
)
def test_remaining_resources(self):
host_specs = {"ssd": 10, "cpu": 16, "ram": 48, "hdd": 10}
vms_specs = {"ssd": 10, "cpu": 32, "ram": 12, "hdd": 0}
resultant_specs = {"ssd": 0, "cpu": -16, "ram": 36, "hdd": 10}
self.assertEqual(remaining_resources(host_specs, vms_specs),
resultant_specs)
def test_vmpool(self):
self.p.join(1)
vm_pool = VmPool(self.client, self.vm_prefix)
# vm_pool by host
actual = vm_pool.by_host(vm_pool.vms, f"{self.host_prefix}/3")
ground_truth = [
(
f"{self.vm_prefix}/1",
{
"owner": "meow",
"specs": {"cpu": 4, "ram": 8, "hdd": 100, "sdd": 256},
"hostname": f"{self.host_prefix}/3",
"status": "SCHEDULED_DEPLOY",
},
)
]
self.assertEqual(actual[0], ground_truth[0])
# vm_pool by status
actual = vm_pool.by_status(vm_pool.vms, "REQUESTED_NEW")
ground_truth = [
(
f"{self.vm_prefix}/7",
{
"owner": "meow",
"specs": {"cpu": 10, "ram": 22, "hdd": 146, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
},
)
]
self.assertEqual(actual[0], ground_truth[0])
# vm_pool by except status
actual = vm_pool.except_status(vm_pool.vms, "SCHEDULED_DEPLOY")
ground_truth = [
(
f"{self.vm_prefix}/7",
{
"owner": "meow",
"specs": {"cpu": 10, "ram": 22, "hdd": 146, "sdd": 0},
"hostname": "",
"status": "REQUESTED_NEW",
},
)
]
self.assertEqual(actual[0], ground_truth[0])
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,75 @@
import sys
import unittest
from datetime import datetime
from os.path import dirname
BASE_DIR = dirname(dirname(__file__))
sys.path.insert(0, BASE_DIR)
from main import (
dead_host_detection,
dead_host_mitigation,
config
)
class TestDeadHostMechanism(unittest.TestCase):
def setUp(self):
self.client = config.etcd_client
self.host_prefix = "/test/host"
self.vm_prefix = "/test/vm"
self.client.client.delete_prefix(self.host_prefix)
self.client.client.delete_prefix(self.vm_prefix)
self.create_hosts()
def create_hosts(self):
host1 = {
"cpu": 32,
"ram": 128,
"hdd": 1024,
"sdd": 0,
"status": "ALIVE",
"last_heartbeat": datetime.utcnow().isoformat(),
}
host2 = {
"cpu": 16,
"ram": 64,
"hdd": 512,
"sdd": 0,
"status": "ALIVE",
"last_heartbeat": datetime(2011, 1, 1).isoformat(),
}
host3 = {"cpu": 16, "ram": 32, "hdd": 256, "sdd": 256}
host4 = {
"cpu": 16,
"ram": 32,
"hdd": 256,
"sdd": 256,
"status": "DEAD",
"last_heartbeat": datetime(2011, 1, 1).isoformat(),
}
with self.client.client.lock("lock"):
self.client.put(f"{self.host_prefix}/1", host1, value_in_json=True)
self.client.put(f"{self.host_prefix}/2", host2, value_in_json=True)
self.client.put(f"{self.host_prefix}/3", host3, value_in_json=True)
self.client.put(f"{self.host_prefix}/4", host4, value_in_json=True)
def test_dead_host_detection(self):
hosts = self.client.get_prefix(self.host_prefix, value_in_json=True)
deads = dead_host_detection(hosts)
self.assertEqual(deads, ["/test/host/2", "/test/host/3"])
return deads
def test_dead_host_mitigation(self):
deads = self.test_dead_host_detection()
dead_host_mitigation(self.client, deads)
hosts = self.client.get_prefix(self.host_prefix, value_in_json=True)
deads = dead_host_detection(hosts)
self.assertEqual(deads, [])
if __name__ == "__main__":
unittest.main()