forked from uncloud/uncloud
1. mp.set_start_method('spawn') commented out from scripts/uncloud
2. uncloud.shared moved under uncloud.common
3. Refactoring in etcd_wrapper e.g timeout mechanism removed and few other things
4. uncloud-{scheduler,host} now better handle etcd events in their block state (waiting for requests to come)
This commit is contained in:
parent
f8f790e7fc
commit
48efcdf08c
17 changed files with 136 additions and 173 deletions
|
|
@ -6,59 +6,51 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from uncloud.common.request import RequestEntry, RequestType
|
||||
from uncloud.shared import shared
|
||||
from uncloud.common.settings import settings
|
||||
from .helper import (dead_host_mitigation, dead_host_detection, assign_host, NoSuitableHostFound)
|
||||
from . import logger
|
||||
from uncloud.common.request import RequestEntry, RequestType
|
||||
from uncloud.common.shared import shared
|
||||
from uncloud.scheduler import logger
|
||||
from uncloud.scheduler.helper import (dead_host_mitigation, dead_host_detection,
|
||||
assign_host, NoSuitableHostFound)
|
||||
|
||||
arg_parser = argparse.ArgumentParser('scheduler', add_help=False)
|
||||
|
||||
|
||||
def main(debug=False):
|
||||
for request_iterator in [
|
||||
shared.etcd_client.get_prefix(
|
||||
settings["etcd"]["request_prefix"], value_in_json=True
|
||||
),
|
||||
shared.etcd_client.watch_prefix(
|
||||
settings["etcd"]["request_prefix"],
|
||||
timeout=5,
|
||||
value_in_json=True,
|
||||
),
|
||||
]:
|
||||
for request_event in request_iterator:
|
||||
request_entry = RequestEntry(request_event)
|
||||
# Never Run time critical mechanism inside timeout
|
||||
# mechanism because timeout mechanism only comes
|
||||
# when no other event is happening. It means under
|
||||
# heavy load there would not be a timeout event.
|
||||
if request_entry.type == "TIMEOUT":
|
||||
# The below while True is neccessary for gracefully handling leadership transfer and temporary
|
||||
# unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return
|
||||
# iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons)
|
||||
# which ends the loop immediately. So, having it inside infinite loop we try again and again to
|
||||
# get prefix until either success or deamon death comes.
|
||||
while True:
|
||||
for request_iterator in [
|
||||
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True,
|
||||
raise_exception=False),
|
||||
shared.etcd_client.watch_prefix(settings['etcd']['request_prefix'], value_in_json=True,
|
||||
raise_exception=False),
|
||||
]:
|
||||
for request_event in request_iterator:
|
||||
dead_host_mitigation(dead_host_detection())
|
||||
request_entry = RequestEntry(request_event)
|
||||
|
||||
# Detect hosts that are dead and set their status
|
||||
# to "DEAD", and their VMs' status to "KILLED"
|
||||
dead_hosts = dead_host_detection()
|
||||
if dead_hosts:
|
||||
logger.debug("Dead hosts: %s", dead_hosts)
|
||||
dead_host_mitigation(dead_hosts)
|
||||
if request_entry.type == RequestType.ScheduleVM:
|
||||
logger.debug('%s, %s', request_entry.key, request_entry.value)
|
||||
|
||||
elif request_entry.type == RequestType.ScheduleVM:
|
||||
logger.debug("%s, %s", request_entry.key, request_entry.value)
|
||||
vm_entry = shared.vm_pool.get(request_entry.uuid)
|
||||
if vm_entry is None:
|
||||
logger.info('Trying to act on {} but it is deleted'.format(request_entry.uuid))
|
||||
continue
|
||||
|
||||
vm_entry = shared.vm_pool.get(request_entry.uuid)
|
||||
if vm_entry is None:
|
||||
logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid))
|
||||
continue
|
||||
shared.etcd_client.client.delete(request_entry.key) # consume Request
|
||||
|
||||
shared.etcd_client.client.delete(request_entry.key) # consume Request
|
||||
try:
|
||||
assign_host(vm_entry)
|
||||
except NoSuitableHostFound:
|
||||
vm_entry.add_log('Can\'t schedule VM. No Resource Left.')
|
||||
shared.vm_pool.put(vm_entry)
|
||||
|
||||
try:
|
||||
assign_host(vm_entry)
|
||||
except NoSuitableHostFound:
|
||||
vm_entry.add_log("Can't schedule VM. No Resource Left.")
|
||||
shared.vm_pool.put(vm_entry)
|
||||
|
||||
logger.info("No Resource Left. Emailing admin....")
|
||||
logger.info('No Resource Left. Emailing admin....')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue