uncloud-mravi/uncloud/scheduler/main.py

# TODO
#  1. send an email to an email address defined by env['admin-email']
#     if resources are finished
#  2. Introduce a status endpoint of the scheduler -
#     maybe expose a prometheus compatible output

import argparse

from uncloud.common.request import RequestEntry, RequestType
from uncloud.common.shared import shared
from uncloud.scheduler import logger
from uncloud.scheduler.helper import (dead_host_mitigation, dead_host_detection,
                                      assign_host, NoSuitableHostFound)

arg_parser = argparse.ArgumentParser('scheduler', add_help=False)


def main(arguments):
    # The below while True is neccessary for gracefully handling leadership transfer and temporary
    # unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return
    # iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons)
    # which ends the loop immediately. So, having it inside infinite loop we try again and again to
    # get prefix until either success or deamon death comes.
    while True:
        for request_iterator in [
            shared.etcd_client.get_prefix(shared.settings['etcd']['request_prefix'], value_in_json=True,
                                          raise_exception=False),
            shared.etcd_client.watch_prefix(shared.settings['etcd']['request_prefix'], value_in_json=True,
                                            raise_exception=False),
        ]:
            for request_event in request_iterator:
                dead_host_mitigation(dead_host_detection())
                request_entry = RequestEntry(request_event)

                if request_entry.type == RequestType.ScheduleVM:
                    logger.debug('%s, %s', request_entry.key, request_entry.value)

                    vm_entry = shared.vm_pool.get(request_entry.uuid)
                    if vm_entry is None:
                        logger.info('Trying to act on {} but it is deleted'.format(request_entry.uuid))
                        continue

                    shared.etcd_client.client.delete(request_entry.key)  # consume Request

                    try:
                        assign_host(vm_entry)
                    except NoSuitableHostFound:
                        vm_entry.add_log('Can\'t schedule VM. No Resource Left.')
                        shared.vm_pool.put(vm_entry)

                        logger.info('No Resource Left. Emailing admin....')