1. mp.set_start_method('spawn') commented out from scripts/uncloud
2. uncloud.shared moved under uncloud.common
3. Refactoring in etcd_wrapper e.g timeout mechanism removed and few other things
4. uncloud-{scheduler,host} now better handle etcd events in their block state (waiting for requests to come)
	
	
This commit is contained in:
		
					parent
					
						
							
								f8f790e7fc
							
						
					
				
			
			
				commit
				
					
						48efcdf08c
					
				
			
		
					 17 changed files with 136 additions and 173 deletions
				
			
		|  | @ -45,7 +45,7 @@ if __name__ == '__main__': | |||
|         # i.e inheriting few things from parent process etcd3 module | ||||
|         # errors out, so the following command configure multiprocessing | ||||
|         # module to not inherit anything from parent. | ||||
|         mp.set_start_method('spawn') | ||||
|         # mp.set_start_method('spawn') | ||||
|         arguments = vars(args) | ||||
|         try: | ||||
|             name = arguments.pop('command') | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| import os | ||||
| 
 | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -3,7 +3,7 @@ import os | |||
| 
 | ||||
| from uuid import uuid4 | ||||
| 
 | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| 
 | ||||
| data = { | ||||
|  |  | |||
|  | @ -1,13 +1,12 @@ | |||
| import binascii | ||||
| import ipaddress | ||||
| import random | ||||
| import subprocess as sp | ||||
| import logging | ||||
| import requests | ||||
| 
 | ||||
| from pyotp import TOTP | ||||
| 
 | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
|  |  | |||
|  | @ -10,11 +10,12 @@ from flask import Flask, request | |||
| from flask_restful import Resource, Api | ||||
| from werkzeug.exceptions import HTTPException | ||||
| 
 | ||||
| from uncloud.common.shared import shared | ||||
| 
 | ||||
| from uncloud.common import counters | ||||
| from uncloud.common.vm import VMStatus | ||||
| from uncloud.common.request import RequestEntry, RequestType | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.shared import shared | ||||
| from . import schemas | ||||
| from .helper import generate_mac, mac2ipv6 | ||||
| from uncloud import UncloudException | ||||
|  |  | |||
|  | @ -21,7 +21,7 @@ import bitmath | |||
| 
 | ||||
| from uncloud.common.host import HostStatus | ||||
| from uncloud.common.vm import VMStatus | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| from . import helper, logger | ||||
| from .common_fields import Field, VmUUIDField | ||||
|  |  | |||
|  | @ -1,24 +1,21 @@ | |||
| import etcd3 | ||||
| import json | ||||
| import queue | ||||
| import copy | ||||
| from uncloud import UncloudException | ||||
| 
 | ||||
| from collections import namedtuple | ||||
| from functools import wraps | ||||
| 
 | ||||
| from . import logger | ||||
| 
 | ||||
| PseudoEtcdMeta = namedtuple("PseudoEtcdMeta", ["key"]) | ||||
| from uncloud import UncloudException | ||||
| from uncloud.common import logger | ||||
| 
 | ||||
| 
 | ||||
| class EtcdEntry: | ||||
|     # key: str | ||||
|     # value: str | ||||
| 
 | ||||
|     def __init__(self, meta, value, value_in_json=False): | ||||
|         self.key = meta.key.decode("utf-8") | ||||
|         self.value = value.decode("utf-8") | ||||
|     def __init__(self, meta_or_key, value, value_in_json=False): | ||||
|         if hasattr(meta_or_key, 'key'): | ||||
|             # if meta has attr 'key' then get it | ||||
|             self.key = meta_or_key.key.decode('utf-8') | ||||
|         else: | ||||
|             # otherwise meta is the 'key' | ||||
|             self.key = meta_or_key | ||||
|         self.value = value.decode('utf-8') | ||||
| 
 | ||||
|         if value_in_json: | ||||
|             self.value = json.loads(self.value) | ||||
|  | @ -29,18 +26,12 @@ def readable_errors(func): | |||
|     def wrapper(*args, **kwargs): | ||||
|         try: | ||||
|             return func(*args, **kwargs) | ||||
|         except etcd3.exceptions.ConnectionFailedError as err: | ||||
|             raise UncloudException( | ||||
|                 "Cannot connect to etcd: is etcd running as configured in uncloud.conf?" | ||||
|             ) | ||||
|         except etcd3.exceptions.ConnectionFailedError: | ||||
|             raise UncloudException('Cannot connect to etcd: is etcd running as configured in uncloud.conf?') | ||||
|         except etcd3.exceptions.ConnectionTimeoutError as err: | ||||
|             raise etcd3.exceptions.ConnectionTimeoutError( | ||||
|                 "etcd connection timeout." | ||||
|             ) from err | ||||
|             raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout.') from err | ||||
|         except Exception: | ||||
|             logger.exception( | ||||
|                 "Some etcd error occured. See syslog for details." | ||||
|             ) | ||||
|             logger.exception('Some etcd error occured. See syslog for details.') | ||||
| 
 | ||||
|     return wrapper | ||||
| 
 | ||||
|  | @ -64,55 +55,39 @@ class Etcd3Wrapper: | |||
|             _value = json.dumps(_value) | ||||
| 
 | ||||
|         if not isinstance(_key, str): | ||||
|             _key = _key.decode("utf-8") | ||||
|             _key = _key.decode('utf-8') | ||||
| 
 | ||||
|         return self.client.put(_key, _value, **kwargs) | ||||
| 
 | ||||
|     @readable_errors | ||||
|     def get_prefix(self, *args, value_in_json=False, **kwargs): | ||||
|         r = self.client.get_prefix(*args, **kwargs) | ||||
|         for entry in r: | ||||
|             e = EtcdEntry(*entry[::-1], value_in_json=value_in_json) | ||||
|             if e.value: | ||||
|                 yield e | ||||
|     def get_prefix(self, *args, value_in_json=False, raise_exception=True, **kwargs): | ||||
|         try: | ||||
|             event_iterator = self.client.get_prefix(*args, **kwargs) | ||||
|             for e in event_iterator: | ||||
|                 yield EtcdEntry(*e[::-1], value_in_json=value_in_json) | ||||
|         except Exception as err: | ||||
|             if raise_exception: | ||||
|                 raise Exception('Exception in etcd_wrapper.get_prefix') from err | ||||
|             else: | ||||
|                 logger.exception('Error in etcd_wrapper') | ||||
|                 return iter([]) | ||||
| 
 | ||||
|     @readable_errors | ||||
|     def watch_prefix(self, key, timeout=0, value_in_json=False): | ||||
|         timeout_event = EtcdEntry( | ||||
|             PseudoEtcdMeta(key=b"TIMEOUT"), | ||||
|             value=str.encode( | ||||
|                 json.dumps({"status": "TIMEOUT", "type": "TIMEOUT"}) | ||||
|             ), | ||||
|             value_in_json=value_in_json, | ||||
|         ) | ||||
| 
 | ||||
|         event_queue = queue.Queue() | ||||
| 
 | ||||
|         def add_event_to_queue(event): | ||||
|             if hasattr(event, "events"): | ||||
|                 for e in event.events: | ||||
|                     if e.value: | ||||
|                         event_queue.put( | ||||
|                             EtcdEntry( | ||||
|                                 e, e.value, value_in_json=value_in_json | ||||
|                             ) | ||||
|                         ) | ||||
| 
 | ||||
|         self.client.add_watch_prefix_callback(key, add_event_to_queue) | ||||
| 
 | ||||
|         while True: | ||||
|             try: | ||||
|                 while True: | ||||
|                     v = event_queue.get(timeout=timeout) | ||||
|                     yield v | ||||
|             except queue.Empty: | ||||
|                 event_queue.put(copy.deepcopy(timeout_event)) | ||||
| 
 | ||||
| 
 | ||||
| class PsuedoEtcdEntry(EtcdEntry): | ||||
|     def __init__(self, key, value, value_in_json=False): | ||||
|         super().__init__( | ||||
|             PseudoEtcdMeta(key=key.encode("utf-8")), | ||||
|             value, | ||||
|             value_in_json=value_in_json, | ||||
|         ) | ||||
|     def watch_prefix(self, key, raise_exception=True, value_in_json=False): | ||||
|         try: | ||||
|             event_iterator, cancel = self.client.watch_prefix(key) | ||||
|             for e in event_iterator: | ||||
|                 if hasattr(e, '_event'): | ||||
|                     e = e._event | ||||
|                     if e.type == e.PUT: | ||||
|                         yield EtcdEntry(e.kv.key, e.kv.value, value_in_json=value_in_json) | ||||
|         except Exception as err: | ||||
|             if raise_exception: | ||||
|                 raise Exception('Exception in etcd_wrapper.get_prefix') from err | ||||
|             else: | ||||
|                 logger.exception('Error in etcd_wrapper.watch_prefix') | ||||
|                 try: | ||||
|                     cancel() | ||||
|                 except Exception: | ||||
|                     pass | ||||
|                 return iter([]) | ||||
|  |  | |||
|  | @ -2,8 +2,8 @@ import json | |||
| from os.path import join | ||||
| from uuid import uuid4 | ||||
| 
 | ||||
| from .etcd_wrapper import PsuedoEtcdEntry | ||||
| from .classes import SpecificEtcdEntryBase | ||||
| from uncloud.common.etcd_wrapper import EtcdEntry | ||||
| from uncloud.common.classes import SpecificEtcdEntryBase | ||||
| 
 | ||||
| 
 | ||||
| class RequestType: | ||||
|  | @ -29,11 +29,8 @@ class RequestEntry(SpecificEtcdEntryBase): | |||
| 
 | ||||
|     @classmethod | ||||
|     def from_scratch(cls, request_prefix, **kwargs): | ||||
|         e = PsuedoEtcdEntry( | ||||
|             join(request_prefix, uuid4().hex), | ||||
|             value=json.dumps(kwargs).encode("utf-8"), | ||||
|             value_in_json=True, | ||||
|         ) | ||||
|         e = EtcdEntry(meta_or_key=join(request_prefix, uuid4().hex), | ||||
|                       value=json.dumps(kwargs).encode('utf-8'), value_in_json=True) | ||||
|         return cls(e) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -2,7 +2,7 @@ import os | |||
| import argparse | ||||
| 
 | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| 
 | ||||
| arg_parser = argparse.ArgumentParser('configure', add_help=False) | ||||
| configure_subparsers = arg_parser.add_subparsers(dest='subcommand') | ||||
|  |  | |||
|  | @ -10,8 +10,7 @@ from uuid import uuid4 | |||
| 
 | ||||
| from . import logger | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.shared import shared | ||||
| 
 | ||||
| from uncloud.common.shared import shared | ||||
| 
 | ||||
| arg_parser = argparse.ArgumentParser('filescanner', add_help=False) | ||||
| arg_parser.add_argument('--hostname', required=True) | ||||
|  |  | |||
|  | @ -5,7 +5,7 @@ import time | |||
| from uuid import uuid4 | ||||
| 
 | ||||
| from uncloud.common.request import RequestEntry, RequestType | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.common.vm import VMStatus | ||||
| from uncloud.vmm import VMM | ||||
|  | @ -72,52 +72,52 @@ def main(hostname, debug=False): | |||
|     except Exception as e: | ||||
|         raise Exception('uncloud-host heartbeat updating mechanism is not working') from e | ||||
| 
 | ||||
|     for events_iterator in [ | ||||
|         shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True), | ||||
|         shared.etcd_client.watch_prefix(settings['etcd']['request_prefix'], timeout=10, value_in_json=True) | ||||
|     ]: | ||||
|         for request_event in events_iterator: | ||||
|             request_event = RequestEntry(request_event) | ||||
|     # The below while True is neccessary for gracefully handling leadership transfer and temporary | ||||
|     # unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return | ||||
|     # iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons) | ||||
|     # which ends the loop immediately. So, having it inside infinite loop we try again and again to | ||||
|     # get prefix until either success or deamon death comes. | ||||
|     while True: | ||||
|         for events_iterator in [ | ||||
|             shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True, | ||||
|                                           raise_exception=False), | ||||
|             shared.etcd_client.watch_prefix(settings['etcd']['request_prefix'], value_in_json=True, | ||||
|                                             raise_exception=False) | ||||
|         ]: | ||||
|             for request_event in events_iterator: | ||||
|                 request_event = RequestEntry(request_event) | ||||
| 
 | ||||
|             if request_event.type == 'TIMEOUT': | ||||
|                 maintenance(host.key) | ||||
| 
 | ||||
|             elif request_event.hostname == host.key: | ||||
|                 logger.debug('VM Request: %s on Host %s', request_event, host.hostname) | ||||
|                 shared.request_pool.client.client.delete(request_event.key) | ||||
|                 vm_entry = shared.etcd_client.get( | ||||
|                     join_path(settings['etcd']['vm_prefix'], request_event.uuid) | ||||
|                 ) | ||||
|                 logger.debug('VM hostname: {}'.format(vm_entry.value)) | ||||
|                 vm = virtualmachine.VM(vm_entry) | ||||
|                 if request_event.type == RequestType.StartVM: | ||||
|                     vm.start() | ||||
|                 if request_event.hostname == host.key: | ||||
|                     logger.debug('VM Request: %s on Host %s', request_event, host.hostname) | ||||
| 
 | ||||
|                 elif request_event.type == RequestType.StopVM: | ||||
|                     vm.stop() | ||||
|                     shared.request_pool.client.client.delete(request_event.key) | ||||
|                     vm_entry = shared.etcd_client.get( | ||||
|                         join_path(settings['etcd']['vm_prefix'], request_event.uuid) | ||||
|                     ) | ||||
| 
 | ||||
|                 elif request_event.type == RequestType.DeleteVM: | ||||
|                     vm.delete() | ||||
|                     logger.debug('VM hostname: {}'.format(vm_entry.value)) | ||||
| 
 | ||||
|                 elif request_event.type == RequestType.InitVMMigration: | ||||
|                     vm.start(destination_host_key=host.key) | ||||
|                     vm = virtualmachine.VM(vm_entry) | ||||
|                     if request_event.type == RequestType.StartVM: | ||||
|                         vm.start() | ||||
| 
 | ||||
|                 elif request_event.type == RequestType.TransferVM: | ||||
|                     destination_host = host_pool.get(request_event.destination_host_key) | ||||
|                     if destination_host: | ||||
|                         vm.migrate( | ||||
|                             destination_host=destination_host.hostname, | ||||
|                             destination_sock_path=request_event.destination_sock_path, | ||||
|                         ) | ||||
|                     else: | ||||
|                         logger.error('Host %s not found!', request_event.destination_host_key) | ||||
|                     elif request_event.type == RequestType.StopVM: | ||||
|                         vm.stop() | ||||
| 
 | ||||
|                     elif request_event.type == RequestType.DeleteVM: | ||||
|                         vm.delete() | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     argparser = argparse.ArgumentParser() | ||||
|     argparser.add_argument( | ||||
|         'hostname', help='Name of this host. e.g uncloud1.ungleich.ch' | ||||
|     ) | ||||
|     args = argparser.parse_args() | ||||
|     mp.set_start_method('spawn') | ||||
|     main(args.hostname) | ||||
|                     elif request_event.type == RequestType.InitVMMigration: | ||||
|                         vm.start(destination_host_key=host.key) | ||||
| 
 | ||||
|                     elif request_event.type == RequestType.TransferVM: | ||||
|                         destination_host = host_pool.get(request_event.destination_host_key) | ||||
|                         if destination_host: | ||||
|                             vm.migrate( | ||||
|                                 destination_host=destination_host.hostname, | ||||
|                                 destination_sock_path=request_event.destination_sock_path, | ||||
|                             ) | ||||
|                         else: | ||||
|                             logger.error('Host %s not found!', request_event.destination_host_key) | ||||
|  |  | |||
|  | @ -16,7 +16,7 @@ from uncloud.common.vm import VMStatus, declare_stopped | |||
| from uncloud.common.network import create_dev, delete_network_interface | ||||
| from uncloud.common.schemas import VMSchema, NetworkSchema | ||||
| from uncloud.host import logger | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.vmm import VMM | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,7 +5,7 @@ import subprocess as sp | |||
| 
 | ||||
| from os.path import join as join_path | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.imagescanner import logger | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -6,7 +6,7 @@ from flask_restful import Resource, Api | |||
| from werkzeug.exceptions import HTTPException | ||||
| 
 | ||||
| from uncloud.common.settings import settings | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| 
 | ||||
| app = Flask(__name__) | ||||
| api = Api(app) | ||||
|  |  | |||
|  | @ -6,7 +6,7 @@ import bitmath | |||
| from uncloud.common.host import HostStatus | ||||
| from uncloud.common.request import RequestEntry, RequestType | ||||
| from uncloud.common.vm import VMStatus | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -6,59 +6,51 @@ | |||
| 
 | ||||
| import argparse | ||||
| 
 | ||||
| from uncloud.common.request import RequestEntry, RequestType | ||||
| from uncloud.shared import shared | ||||
| from uncloud.common.settings import settings | ||||
| from .helper import (dead_host_mitigation, dead_host_detection, assign_host, NoSuitableHostFound) | ||||
| from . import logger | ||||
| from uncloud.common.request import RequestEntry, RequestType | ||||
| from uncloud.common.shared import shared | ||||
| from uncloud.scheduler import logger | ||||
| from uncloud.scheduler.helper import (dead_host_mitigation, dead_host_detection, | ||||
|                                       assign_host, NoSuitableHostFound) | ||||
| 
 | ||||
| arg_parser = argparse.ArgumentParser('scheduler', add_help=False) | ||||
| 
 | ||||
| 
 | ||||
| def main(debug=False): | ||||
|     for request_iterator in [ | ||||
|         shared.etcd_client.get_prefix( | ||||
|             settings["etcd"]["request_prefix"], value_in_json=True | ||||
|         ), | ||||
|         shared.etcd_client.watch_prefix( | ||||
|             settings["etcd"]["request_prefix"], | ||||
|             timeout=5, | ||||
|             value_in_json=True, | ||||
|         ), | ||||
|     ]: | ||||
|         for request_event in request_iterator: | ||||
|             request_entry = RequestEntry(request_event) | ||||
|             # Never Run time critical mechanism inside timeout | ||||
|             # mechanism because timeout mechanism only comes | ||||
|             # when no other event is happening. It means under | ||||
|             # heavy load there would not be a timeout event. | ||||
|             if request_entry.type == "TIMEOUT": | ||||
|     # The below while True is neccessary for gracefully handling leadership transfer and temporary | ||||
|     # unavailability in etcd. Why does it work? It works because the get_prefix,watch_prefix return | ||||
|     # iter([]) that is iterator of empty list on exception (that occur due to above mentioned reasons) | ||||
|     # which ends the loop immediately. So, having it inside infinite loop we try again and again to | ||||
|     # get prefix until either success or deamon death comes. | ||||
|     while True: | ||||
|         for request_iterator in [ | ||||
|             shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True, | ||||
|                                           raise_exception=False), | ||||
|             shared.etcd_client.watch_prefix(settings['etcd']['request_prefix'], value_in_json=True, | ||||
|                                             raise_exception=False), | ||||
|         ]: | ||||
|             for request_event in request_iterator: | ||||
|                 dead_host_mitigation(dead_host_detection()) | ||||
|                 request_entry = RequestEntry(request_event) | ||||
| 
 | ||||
|                 # Detect hosts that are dead and set their status | ||||
|                 # to "DEAD", and their VMs' status to "KILLED" | ||||
|                 dead_hosts = dead_host_detection() | ||||
|                 if dead_hosts: | ||||
|                     logger.debug("Dead hosts: %s", dead_hosts) | ||||
|                     dead_host_mitigation(dead_hosts) | ||||
|                 if request_entry.type == RequestType.ScheduleVM: | ||||
|                     logger.debug('%s, %s', request_entry.key, request_entry.value) | ||||
| 
 | ||||
|             elif request_entry.type == RequestType.ScheduleVM: | ||||
|                 logger.debug("%s, %s", request_entry.key, request_entry.value) | ||||
|                     vm_entry = shared.vm_pool.get(request_entry.uuid) | ||||
|                     if vm_entry is None: | ||||
|                         logger.info('Trying to act on {} but it is deleted'.format(request_entry.uuid)) | ||||
|                         continue | ||||
| 
 | ||||
|                 vm_entry = shared.vm_pool.get(request_entry.uuid) | ||||
|                 if vm_entry is None: | ||||
|                     logger.info("Trying to act on {} but it is deleted".format(request_entry.uuid)) | ||||
|                     continue | ||||
|                     shared.etcd_client.client.delete(request_entry.key)  # consume Request | ||||
| 
 | ||||
|                 shared.etcd_client.client.delete(request_entry.key)  # consume Request | ||||
|                     try: | ||||
|                         assign_host(vm_entry) | ||||
|                     except NoSuitableHostFound: | ||||
|                         vm_entry.add_log('Can\'t schedule VM. No Resource Left.') | ||||
|                         shared.vm_pool.put(vm_entry) | ||||
| 
 | ||||
|                 try: | ||||
|                     assign_host(vm_entry) | ||||
|                 except NoSuitableHostFound: | ||||
|                     vm_entry.add_log("Can't schedule VM. No Resource Left.") | ||||
|                     shared.vm_pool.put(vm_entry) | ||||
| 
 | ||||
|                     logger.info("No Resource Left. Emailing admin....") | ||||
|                         logger.info('No Resource Left. Emailing admin....') | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue