uncloud/ucloud/scheduler/main.py

96 lines
3.4 KiB
Python
Executable File

# TODO
# 1. send an email to an email address defined by env['admin-email']
# if resources are finished
# 2. Introduce a status endpoint of the scheduler -
# maybe expose a prometheus compatible output
from ucloud.common.request import RequestEntry, RequestType
from ucloud.shared import shared
from ucloud.settings import settings
from .helper import (
get_suitable_host,
dead_host_mitigation,
dead_host_detection,
assign_host,
NoSuitableHostFound,
)
from . import logger
def main():
pending_vms = []
for request_iterator in [
shared.etcd_client.get_prefix(
settings["etcd"]["request_prefix"], value_in_json=True
),
shared.etcd_client.watch_prefix(
settings["etcd"]["request_prefix"],
timeout=5,
value_in_json=True,
),
]:
for request_event in request_iterator:
request_entry = RequestEntry(request_event)
# Never Run time critical mechanism inside timeout
# mechanism because timeout mechanism only comes
# when no other event is happening. It means under
# heavy load there would not be a timeout event.
if request_entry.type == "TIMEOUT":
# Detect hosts that are dead and set their status
# to "DEAD", and their VMs' status to "KILLED"
dead_hosts = dead_host_detection()
if dead_hosts:
logger.debug("Dead hosts: %s", dead_hosts)
dead_host_mitigation(dead_hosts)
# If there are VMs that weren't assigned a host
# because there wasn't a host available which
# meets requirement of that VM then we would
# create a new ScheduleVM request for that VM
# on our behalf.
while pending_vms:
pending_vm_entry = pending_vms.pop()
r = RequestEntry.from_scratch(
type="ScheduleVM",
uuid=pending_vm_entry.uuid,
hostname=pending_vm_entry.hostname,
request_prefix=settings["etcd"][
"request_prefix"
],
)
shared.request_pool.put(r)
elif request_entry.type == RequestType.ScheduleVM:
logger.debug(
"%s, %s", request_entry.key, request_entry.value
)
vm_entry = shared.vm_pool.get(request_entry.uuid)
if vm_entry is None:
logger.info(
"Trying to act on {} but it is deleted".format(
request_entry.uuid
)
)
continue
shared.etcd_client.client.delete(
request_entry.key
) # consume Request
try:
assign_host(vm_entry)
except NoSuitableHostFound:
vm_entry.add_log(
"Can't schedule VM. No Resource Left."
)
shared.vm_pool.put(vm_entry)
pending_vms.append(vm_entry)
logger.info("No Resource Left. Emailing admin....")
if __name__ == "__main__":
main()