ucloud-scheduler/main.py

89 lines
4.1 KiB
Python
Raw Normal View History

2019-06-25 16:39:29 +05:00
# TODO
# 1. send an email to an email address defined by env['admin-email']
2019-06-30 21:30:17 +05:00
# if resources are finished
2019-09-05 13:37:59 +05:00
# 2. Introduce a status endpoint of the scheduler -
2019-06-30 21:30:17 +05:00
# maybe expose a prometheus compatible output
2019-06-25 16:39:29 +05:00
import logging
2019-06-25 16:39:29 +05:00
2019-09-12 22:31:39 +05:00
from ucloud_common.request import RequestEntry, RequestType
2019-09-03 18:06:41 +02:00
from config import etcd_client as client
2019-09-05 13:37:59 +05:00
from config import (host_pool, request_pool, vm_pool, request_prefix)
from helper import (get_suitable_host, dead_host_mitigation, dead_host_detection,
2019-09-05 15:16:26 +05:00
assign_host, NoSuitableHostFound)
2019-06-25 16:39:29 +05:00
2019-09-05 13:37:59 +05:00
def main():
2019-09-12 22:31:39 +05:00
pending_vms = []
2019-09-03 18:06:41 +02:00
2019-09-05 13:37:59 +05:00
for request_iterator in [
2019-09-12 22:31:39 +05:00
client.get_prefix(request_prefix, value_in_json=True),
client.watch_prefix(request_prefix, timeout=5, value_in_json=True),
]:
2019-09-05 13:37:59 +05:00
for request_event in request_iterator:
request_entry = RequestEntry(request_event)
2019-09-12 22:31:39 +05:00
logging.debug("%s, %s", request_entry.key, request_entry.value)
2019-07-27 13:57:51 +05:00
# Never Run time critical mechanism inside timeout
# mechanism because timeout mechanism only comes
# when no other event is happening. It means under
2019-09-05 13:37:59 +05:00
# heavy load there would not be a timeout event.
2019-09-12 21:38:12 +05:00
if request_entry._type == "TIMEOUT":
2019-09-05 13:37:59 +05:00
# Detect hosts that are dead and set their status
# to "DEAD", and their VMs' status to "KILLED"
logging.debug("TIMEOUT event occured")
2019-07-27 13:57:51 +05:00
dead_hosts = dead_host_detection()
2019-09-12 22:31:39 +05:00
logging.debug("Dead hosts: %s", dead_hosts)
2019-07-27 13:57:51 +05:00
dead_host_mitigation(dead_hosts)
2019-09-05 13:37:59 +05:00
# If there are VMs that weren't assigned a host
# because there wasn't a host available which
# meets requirement of that VM then we would
# create a new ScheduleVM request for that VM
# on our behalf.
while pending_vms:
pending_vm_entry = pending_vms.pop()
2019-09-12 21:38:12 +05:00
r = RequestEntry.from_scratch(type="ScheduleVM",
2019-09-05 13:37:59 +05:00
uuid=pending_vm_entry.uuid,
hostname=pending_vm_entry.hostname)
request_pool.put(r)
2019-09-12 21:38:12 +05:00
elif request_entry._type == RequestType.ScheduleVM:
2019-09-05 13:37:59 +05:00
vm_entry = vm_pool.get(request_entry.uuid)
client.client.delete(request_entry.key) # consume Request
2019-09-05 15:16:26 +05:00
# If the Request is about a VM which is labelled as "migration"
2019-09-05 13:37:59 +05:00
# and has a destination
if hasattr(request_entry, "migration") and request_entry.migration \
and hasattr(request_entry, "destination") and request_entry.destination:
2019-09-05 15:16:26 +05:00
try:
2019-09-12 22:31:39 +05:00
get_suitable_host(vm_specs=vm_entry.specs,
hosts=[host_pool.get(request_entry.destination)])
2019-09-05 15:16:26 +05:00
except NoSuitableHostFound:
logging.info("Requested destination host doesn't have enough capacity"
2019-09-12 22:31:39 +05:00
"to hold %s", vm_entry.uuid)
2019-09-05 15:16:26 +05:00
else:
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
2019-09-05 13:37:59 +05:00
uuid=request_entry.uuid,
destination=request_entry.destination)
request_pool.put(r)
2019-09-05 15:16:26 +05:00
# If the Request is about a VM that just want to get started/created
else:
2019-09-05 13:37:59 +05:00
# assign_host only returns None when we couldn't be able to assign
# a host to a VM because of resource constraints
2019-09-05 15:16:26 +05:00
try:
assign_host(vm_entry)
except NoSuitableHostFound:
2019-09-05 13:37:59 +05:00
vm_entry.log.append("Can't schedule VM. No Resource Left.")
vm_pool.put(vm_entry)
2019-09-05 13:37:59 +05:00
pending_vms.append(vm_entry)
logging.info("No Resource Left. Emailing admin....")
2019-06-30 21:30:17 +05:00
2019-09-12 22:31:39 +05:00
logging.info("%s SESSION STARTED %s", '*' * 5, '*' * 5)
2019-09-05 13:37:59 +05:00
main()