uncloud-mravi/uncloud_etcd_based/uncloud/scheduler/main.py

52 lines
2.4 KiB
Python
Raw Normal View History

# TODO
# 1. send an email to an email address defined by env['admin-email']
# if resources are finished
# 2. Introduce a status endpoint of the scheduler -
# maybe expose a prometheus compatible output
2020-01-03 13:38:59 +00:00
import argparse
from uncloud.common.request import RequestEntry, RequestType
from uncloud.common.shared import shared
from uncloud.scheduler import logger
from uncloud.scheduler.helper import (dead_host_mitigation, dead_host_detection,
assign_host, NoSuitableHostFound)
2020-01-03 13:38:59 +00:00
arg_parser = argparse.ArgumentParser('scheduler', add_help=False)
def main(arguments):
# The below while True is neccessary for gracefully handling leadership transfer and temporary
# unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return
# iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons)
# which ends the loop immediately. So, having it inside infinite loop we try again and again to
# get prefix until either success or deamon death comes.
while True:
for request_iterator in [
shared.etcd_client.get_prefix(shared.settings['etcd']['request_prefix'], value_in_json=True,
raise_exception=False),
shared.etcd_client.watch_prefix(shared.settings['etcd']['request_prefix'], value_in_json=True,
raise_exception=False),
]:
for request_event in request_iterator:
dead_host_mitigation(dead_host_detection())
request_entry = RequestEntry(request_event)
if request_entry.type == RequestType.ScheduleVM:
logger.debug('%s, %s', request_entry.key, request_entry.value)
vm_entry = shared.vm_pool.get(request_entry.uuid)
if vm_entry is None:
logger.info('Trying to act on {} but it is deleted'.format(request_entry.uuid))
continue
shared.etcd_client.client.delete(request_entry.key) # consume Request
try:
assign_host(vm_entry)
except NoSuitableHostFound:
vm_entry.add_log('Can\'t schedule VM. No Resource Left.')
shared.vm_pool.put(vm_entry)
logger.info('No Resource Left. Emailing admin....')