uncloud/scheduler/main.py
meow 93dee1c9fc New Features + Refactoring
1. User can now use image name instead of image uuid when creation vm.
   For Example, now user can create an alpine vm using the following
   command
   ```shell
   ucloud-cli vm create --vm-name myvm --cpu 2 --ram '2GB' \
       --os-ssd '10GB' --image images:alpine
   ```
2. Instead of directly running code, code is now placed under a function
   main and is called using the following code
   ```python
   if __name__ == "__main__":
       main()
   ```
3. Multiprocess (Process) is used instead of threading (Thread) to update
   heart beat of host.
4. IP Address of vm is included in vm's status which is retrieved by the
   following command
   ```shell
   ucloud-cli vm status --vm-name myvm
   ```
2019-11-02 20:42:24 +05:00

90 lines
4.1 KiB
Python
Executable file

# TODO
# 1. send an email to an email address defined by env['admin-email']
# if resources are finished
# 2. Introduce a status endpoint of the scheduler -
# maybe expose a prometheus compatible output
import logging
from ucloud_common.request import RequestEntry, RequestType
from config import etcd_client as client
from config import (host_pool, request_pool, vm_pool, request_prefix)
from helper import (get_suitable_host, dead_host_mitigation, dead_host_detection,
assign_host, NoSuitableHostFound)
def main():
logging.info("%s SESSION STARTED %s", '*' * 5, '*' * 5)
pending_vms = []
for request_iterator in [
client.get_prefix(request_prefix, value_in_json=True),
client.watch_prefix(request_prefix, timeout=5, value_in_json=True),
]:
for request_event in request_iterator:
request_entry = RequestEntry(request_event)
logging.debug("%s, %s", request_entry.key, request_entry.value)
# Never Run time critical mechanism inside timeout
# mechanism because timeout mechanism only comes
# when no other event is happening. It means under
# heavy load there would not be a timeout event.
if request_entry.type == "TIMEOUT":
# Detect hosts that are dead and set their status
# to "DEAD", and their VMs' status to "KILLED"
logging.debug("TIMEOUT event occured")
dead_hosts = dead_host_detection()
logging.debug("Dead hosts: %s", dead_hosts)
dead_host_mitigation(dead_hosts)
# If there are VMs that weren't assigned a host
# because there wasn't a host available which
# meets requirement of that VM then we would
# create a new ScheduleVM request for that VM
# on our behalf.
while pending_vms:
pending_vm_entry = pending_vms.pop()
r = RequestEntry.from_scratch(type="ScheduleVM",
uuid=pending_vm_entry.uuid,
hostname=pending_vm_entry.hostname)
request_pool.put(r)
elif request_entry.type == RequestType.ScheduleVM:
vm_entry = vm_pool.get(request_entry.uuid)
client.client.delete(request_entry.key) # consume Request
# If the Request is about a VM which is labelled as "migration"
# and has a destination
if hasattr(request_entry, "migration") and request_entry.migration \
and hasattr(request_entry, "destination") and request_entry.destination:
try:
get_suitable_host(vm_specs=vm_entry.specs,
hosts=[host_pool.get(request_entry.destination)])
except NoSuitableHostFound:
logging.info("Requested destination host doesn't have enough capacity"
"to hold %s", vm_entry.uuid)
else:
r = RequestEntry.from_scratch(type=RequestType.InitVMMigration,
uuid=request_entry.uuid,
destination=request_entry.destination)
request_pool.put(r)
# If the Request is about a VM that just want to get started/created
else:
# assign_host only returns None when we couldn't be able to assign
# a host to a VM because of resource constraints
try:
assign_host(vm_entry)
except NoSuitableHostFound:
vm_entry.log.append("Can't schedule VM. No Resource Left.")
vm_pool.put(vm_entry)
pending_vms.append(vm_entry)
logging.info("No Resource Left. Emailing admin....")
if __name__ == "__main__":
main()