2019-10-25 06:42:40 +00:00
|
|
|
# TODO
|
|
|
|
# 1. send an email to an email address defined by env['admin-email']
|
|
|
|
# if resources are finished
|
|
|
|
# 2. Introduce a status endpoint of the scheduler -
|
|
|
|
# maybe expose a prometheus compatible output
|
|
|
|
|
2019-12-03 10:40:41 +00:00
|
|
|
from ucloud.common.request import RequestEntry, RequestType
|
2019-12-22 07:26:48 +00:00
|
|
|
from ucloud.shared import shared
|
|
|
|
from ucloud.settings import settings
|
2019-12-30 09:35:07 +00:00
|
|
|
from .helper import (
|
|
|
|
get_suitable_host,
|
|
|
|
dead_host_mitigation,
|
|
|
|
dead_host_detection,
|
|
|
|
assign_host,
|
|
|
|
NoSuitableHostFound,
|
|
|
|
)
|
2019-12-03 11:49:10 +00:00
|
|
|
from . import logger
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
pending_vms = []
|
|
|
|
|
|
|
|
for request_iterator in [
|
2019-12-30 09:35:07 +00:00
|
|
|
shared.etcd_client.get_prefix(
|
|
|
|
settings["etcd"]["request_prefix"], value_in_json=True
|
|
|
|
),
|
|
|
|
shared.etcd_client.watch_prefix(
|
|
|
|
settings["etcd"]["request_prefix"],
|
|
|
|
timeout=5,
|
|
|
|
value_in_json=True,
|
|
|
|
),
|
2019-10-25 06:42:40 +00:00
|
|
|
]:
|
|
|
|
for request_event in request_iterator:
|
|
|
|
request_entry = RequestEntry(request_event)
|
|
|
|
# Never Run time critical mechanism inside timeout
|
|
|
|
# mechanism because timeout mechanism only comes
|
|
|
|
# when no other event is happening. It means under
|
|
|
|
# heavy load there would not be a timeout event.
|
|
|
|
if request_entry.type == "TIMEOUT":
|
|
|
|
|
|
|
|
# Detect hosts that are dead and set their status
|
|
|
|
# to "DEAD", and their VMs' status to "KILLED"
|
|
|
|
dead_hosts = dead_host_detection()
|
2019-11-25 06:52:36 +00:00
|
|
|
if dead_hosts:
|
|
|
|
logger.debug("Dead hosts: %s", dead_hosts)
|
|
|
|
dead_host_mitigation(dead_hosts)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
# If there are VMs that weren't assigned a host
|
|
|
|
# because there wasn't a host available which
|
|
|
|
# meets requirement of that VM then we would
|
|
|
|
# create a new ScheduleVM request for that VM
|
|
|
|
# on our behalf.
|
|
|
|
while pending_vms:
|
|
|
|
pending_vm_entry = pending_vms.pop()
|
2019-12-30 09:35:07 +00:00
|
|
|
r = RequestEntry.from_scratch(
|
|
|
|
type="ScheduleVM",
|
|
|
|
uuid=pending_vm_entry.uuid,
|
|
|
|
hostname=pending_vm_entry.hostname,
|
|
|
|
request_prefix=settings["etcd"][
|
|
|
|
"request_prefix"
|
|
|
|
],
|
|
|
|
)
|
2019-12-22 07:26:48 +00:00
|
|
|
shared.request_pool.put(r)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
elif request_entry.type == RequestType.ScheduleVM:
|
2019-12-30 09:35:07 +00:00
|
|
|
logger.debug(
|
|
|
|
"%s, %s", request_entry.key, request_entry.value
|
|
|
|
)
|
2019-11-25 06:52:36 +00:00
|
|
|
|
2019-12-22 07:26:48 +00:00
|
|
|
vm_entry = shared.vm_pool.get(request_entry.uuid)
|
2019-11-11 18:42:57 +00:00
|
|
|
if vm_entry is None:
|
2019-12-30 09:35:07 +00:00
|
|
|
logger.info(
|
|
|
|
"Trying to act on {} but it is deleted".format(
|
|
|
|
request_entry.uuid
|
|
|
|
)
|
|
|
|
)
|
2019-11-11 18:42:57 +00:00
|
|
|
continue
|
2019-12-30 09:35:07 +00:00
|
|
|
shared.etcd_client.client.delete(
|
|
|
|
request_entry.key
|
|
|
|
) # consume Request
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-12-28 10:39:11 +00:00
|
|
|
try:
|
|
|
|
assign_host(vm_entry)
|
|
|
|
except NoSuitableHostFound:
|
2019-12-30 09:35:07 +00:00
|
|
|
vm_entry.add_log(
|
|
|
|
"Can't schedule VM. No Resource Left."
|
|
|
|
)
|
2019-12-28 10:39:11 +00:00
|
|
|
shared.vm_pool.put(vm_entry)
|
2019-10-25 06:42:40 +00:00
|
|
|
|
2019-12-28 10:39:11 +00:00
|
|
|
pending_vms.append(vm_entry)
|
|
|
|
logger.info("No Resource Left. Emailing admin....")
|
2019-10-25 06:42:40 +00:00
|
|
|
|
|
|
|
|
2019-11-02 15:42:24 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|