# TODO # 1. send an email to an email address defined by env['admin-email'] # if resources are finished # 2. Introduce a status endpoint of the scheduler - # maybe expose a prometheus compatible output import argparse from uncloud.common.request import RequestEntry, RequestType from uncloud.shared import shared from uncloud.settings import settings from .helper import (dead_host_mitigation, dead_host_detection, assign_host, NoSuitableHostFound) from . import logger arg_parser = argparse.ArgumentParser('scheduler', add_help=False) def main(debug=False): for request_iterator in [ shared.etcd_client.get_prefix( settings["etcd"]["request_prefix"], value_in_json=True ), shared.etcd_client.watch_prefix( settings["etcd"]["request_prefix"], timeout=5, value_in_json=True, ), ]: for request_event in request_iterator: request_entry = RequestEntry(request_event) # Never Run time critical mechanism inside timeout # mechanism because timeout mechanism only comes # when no other event is happening. It means under # heavy load there would not be a timeout event. if request_entry.type == "TIMEOUT": # Detect hosts that are dead and set their status # to "DEAD", and their VMs' status to "KILLED" dead_hosts = dead_host_detection() if dead_hosts: logger.debug("Dead hosts: %s", dead_hosts) dead_host_mitigation(dead_hosts) elif request_entry.type == RequestType.ScheduleVM: logger.debug("%s, %s", request_entry.key, request_entry.value) vm_entry = shared.vm_pool.get(request_entry.uuid) if vm_entry is None: logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid)) continue shared.etcd_client.client.delete(request_entry.key) # consume Request try: assign_host(vm_entry) except NoSuitableHostFound: vm_entry.add_log("Can't schedule VM. No Resource Left.") shared.vm_pool.put(vm_entry) logger.info("No Resource Left. Emailing admin....") if __name__ == "__main__": main()