2019-10-25 06:42:40 +00:00
|
|
|
# TODO
|
|
|
|
# 1. send an email to an email address defined by env['admin-email']
|
|
|
|
# if resources are finished
|
|
|
|
# 2. Introduce a status endpoint of the scheduler -
|
|
|
|
# maybe expose a prometheus compatible output
|
|
|
|
|
2020-01-03 13:38:59 +00:00
|
|
|
import argparse
|
|
|
|
|
2020-01-06 07:25:59 +00:00
|
|
|
from uncloud.common.settings import settings
|
2020-01-08 19:40:05 +00:00
|
|
|
from uncloud.common.request import RequestEntry, RequestType
|
|
|
|
from uncloud.common.shared import shared
|
|
|
|
from uncloud.scheduler import logger
|
|
|
|
from uncloud.scheduler.helper import (dead_host_mitigation, dead_host_detection,
|
|
|
|
assign_host, NoSuitableHostFound)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2020-01-03 13:38:59 +00:00
|
|
|
arg_parser = argparse.ArgumentParser('scheduler', add_help=False)
|
|
|
|
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-12-31 13:22:44 +00:00
|
|
|
def main(debug=False):
|
2020-01-08 19:40:05 +00:00
|
|
|
# The below while True is neccessary for gracefully handling leadership transfer and temporary
|
|
|
|
# unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return
|
|
|
|
# iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons)
|
|
|
|
# which ends the loop immediately. So, having it inside infinite loop we try again and again to
|
|
|
|
# get prefix until either success or deamon death comes.
|
|
|
|
while True:
|
|
|
|
for request_iterator in [
|
|
|
|
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True,
|
|
|
|
raise_exception=False),
|
|
|
|
shared.etcd_client.watch_prefix(settings['etcd']['request_prefix'], value_in_json=True,
|
|
|
|
raise_exception=False),
|
|
|
|
]:
|
|
|
|
for request_event in request_iterator:
|
|
|
|
dead_host_mitigation(dead_host_detection())
|
|
|
|
request_entry = RequestEntry(request_event)
|
|
|
|
|
|
|
|
if request_entry.type == RequestType.ScheduleVM:
|
|
|
|
logger.debug('%s, %s', request_entry.key, request_entry.value)
|
|
|
|
|
|
|
|
vm_entry = shared.vm_pool.get(request_entry.uuid)
|
|
|
|
if vm_entry is None:
|
|
|
|
logger.info('Trying to act on {} but it is deleted'.format(request_entry.uuid))
|
|
|
|
continue
|
|
|
|
|
|
|
|
shared.etcd_client.client.delete(request_entry.key) # consume Request
|
|
|
|
|
|
|
|
try:
|
|
|
|
assign_host(vm_entry)
|
|
|
|
except NoSuitableHostFound:
|
|
|
|
vm_entry.add_log('Can\'t schedule VM. No Resource Left.')
|
|
|
|
shared.vm_pool.put(vm_entry)
|
|
|
|
|
|
|
|
logger.info('No Resource Left. Emailing admin....')
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2019-11-02 15:42:24 +00:00
|
|
|
main()
|