From f980cdb4649f5d805bd939eb9596456e916f8768 Mon Sep 17 00:00:00 2001 From: meow Date: Sun, 29 Dec 2019 23:14:39 +0500 Subject: [PATCH] Better error handling, Efforts to run non-root with occasional sudo --- scripts/ucloud | 39 +++++++++++++++++---------------- ucloud/common/etcd_wrapper.py | 14 +++++++----- ucloud/common/logging.py | 11 ++++++---- ucloud/common/network.py | 15 ++++++++----- ucloud/host/main.py | 6 ++--- ucloud/settings/__init__.py | 11 ++++++++-- ucloud/vmm/__init__.py | 41 +++++++++++++++++++++++++++++------ 7 files changed, 90 insertions(+), 47 deletions(-) diff --git a/scripts/ucloud b/scripts/ucloud index 3ddbb5a..9d05118 100755 --- a/scripts/ucloud +++ b/scripts/ucloud @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - import argparse import logging import importlib @@ -8,13 +7,12 @@ import sys from logging.handlers import SysLogHandler -from ucloud.configure.main import configure_parser from ucloud.common.logging import NoTracebackStreamHandler +from ucloud.configure.main import configure_parser def exception_hook(exc_type, exc_value, exc_traceback): - logger = logging.getLogger(__name__) - logger.error( + logging.getLogger(__name__).error( 'Uncaught exception', exc_info=(exc_type, exc_value, exc_traceback) ) @@ -22,7 +20,25 @@ def exception_hook(exc_type, exc_value, exc_traceback): sys.excepthook = exception_hook + if __name__ == '__main__': + # Setting up root logger + logger = logging.getLogger() + logger.setLevel(logging.INFO) + + syslog_handler = SysLogHandler(address='/dev/log') + syslog_handler.setLevel(logging.DEBUG) + syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s') + syslog_handler.setFormatter(syslog_formatter) + + stream_handler = NoTracebackStreamHandler() + stream_handler.setLevel(logging.INFO) + stream_formatter = logging.Formatter('%(message)s') + stream_handler.setFormatter(stream_formatter) + + logger.addHandler(syslog_handler) + logger.addHandler(stream_handler) + arg_parser = argparse.ArgumentParser() subparsers = arg_parser.add_subparsers(dest="command") @@ -46,21 +62,6 @@ if __name__ == '__main__': if not args.command: arg_parser.print_help() else: - # Setting up root logger - logger = logging.getLogger('ucloud') - - syslog_handler = SysLogHandler(address='/dev/log') - syslog_handler.setLevel(logging.DEBUG) - syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s') - syslog_handler.setFormatter(syslog_formatter) - - stream_handler = NoTracebackStreamHandler() - stream_handler.setLevel(logging.WARNING) - stream_formatter = logging.Formatter('%(message)s') - stream_handler.setFormatter(stream_formatter) - - logger.addHandler(syslog_handler) - logger.addHandler(stream_handler) # if we start etcd in seperate process with default settings # i.e inheriting few things from parent process etcd3 module diff --git a/ucloud/common/etcd_wrapper.py b/ucloud/common/etcd_wrapper.py index eecf4c7..5f464e1 100644 --- a/ucloud/common/etcd_wrapper.py +++ b/ucloud/common/etcd_wrapper.py @@ -29,15 +29,16 @@ def readable_errors(func): try: return func(*args, **kwargs) except etcd3.exceptions.ConnectionFailedError as err: - raise etcd3.exceptions.ConnectionFailedError('etcd connection failed') from err + raise etcd3.exceptions.ConnectionFailedError('etcd connection failed.') from err except etcd3.exceptions.ConnectionTimeoutError as err: - raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout') from err + raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout.') from err except Exception: - logger.exception('Some etcd error occurred') + logger.exception('Some etcd error occured. See syslog for details.') return wrapper class Etcd3Wrapper: + @readable_errors def __init__(self, *args, **kwargs): self.client = etcd3.client(*args, **kwargs) @@ -77,9 +78,10 @@ class Etcd3Wrapper: event_queue = queue.Queue() def add_event_to_queue(event): - for e in event.events: - if e.value: - event_queue.put(EtcdEntry(e, e.value, value_in_json=value_in_json)) + if hasattr(event, 'events'): + for e in event.events: + if e.value: + event_queue.put(EtcdEntry(e, e.value, value_in_json=value_in_json)) self.client.add_watch_prefix_callback(key, add_event_to_queue) diff --git a/ucloud/common/logging.py b/ucloud/common/logging.py index 945f473..ba1e59d 100644 --- a/ucloud/common/logging.py +++ b/ucloud/common/logging.py @@ -7,14 +7,17 @@ class NoTracebackStreamHandler(logging.StreamHandler): info, cache = record.exc_info, record.exc_text record.exc_info, record.exc_text = None, None - if record.levelname == 'WARNING': - color = colorama.Fore.YELLOW - elif record.levelname in ['ERROR', 'EXCEPTION']: + if record.levelname in ['WARNING', 'WARN']: + color = colorama.Fore.LIGHTYELLOW_EX + elif record.levelname == 'ERROR': color = colorama.Fore.LIGHTRED_EX elif record.levelname == 'INFO': - color = colorama.Fore.LIGHTBLUE_EX + color = colorama.Fore.LIGHTGREEN_EX + elif record.levelname == 'CRITICAL': + color = colorama.Fore.LIGHTCYAN_EX else: color = colorama.Fore.WHITE + try: print(color, end='', flush=True) super().handle(record) diff --git a/ucloud/common/network.py b/ucloud/common/network.py index 629e92a..1503446 100644 --- a/ucloud/common/network.py +++ b/ucloud/common/network.py @@ -30,14 +30,14 @@ def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt=' def create_dev(script, _id, dev, ip=None): - command = [script, str(_id), dev] + command = ['sudo', '-p', 'Enter password to create network devices for vm: ', + script, str(_id), dev] if ip: command.append(ip) try: output = sp.check_output(command, stderr=sp.PIPE) - except Exception as e: + except Exception: logger.exception('Creation of interface %s failed.', dev) - print(e) return None else: return output.decode('utf-8').strip() @@ -45,9 +45,14 @@ def create_dev(script, _id, dev, ip=None): def delete_network_interface(iface): try: - sp.check_output(['ip', 'link', 'del', iface]) + sp.check_output( + [ + 'sudo', '-p', 'Enter password to remove {} network device: '.format(iface), + 'ip', 'link', 'del', iface + ], stderr=sp.PIPE + ) except Exception: - logger.exception('Interface Deletion failed') + logger.exception('Interface %s Deletion failed', iface) def find_free_port(): diff --git a/ucloud/host/main.py b/ucloud/host/main.py index 8a7dbe7..b5aeee3 100755 --- a/ucloud/host/main.py +++ b/ucloud/host/main.py @@ -14,10 +14,8 @@ from . import virtualmachine, logger def update_heartbeat(hostname): """Update Last HeartBeat Time for :param hostname: in etcd""" - host_pool = shared.host_pool this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None) - while True: this_host.update_heartbeat() host_pool.put(this_host) @@ -43,7 +41,7 @@ def main(hostname): heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,)) heartbeat_updating_process.start() except Exception as e: - raise e.__class__('ucloud-host heartbeat updating mechanism is not working') from e + raise Exception('ucloud-host heartbeat updating mechanism is not working') from e for events_iterator in [ shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True), @@ -87,7 +85,7 @@ def main(hostname): if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1") + argparser.add_argument("hostname", help="Name of this host. e.g uncloud1.ungleich.ch") args = argparser.parse_args() mp.set_start_method('spawn') main(args.hostname) diff --git a/ucloud/settings/__init__.py b/ucloud/settings/__init__.py index e589485..f9b358e 100644 --- a/ucloud/settings/__init__.py +++ b/ucloud/settings/__init__.py @@ -47,8 +47,15 @@ class Settings(object): } except configparser.Error as err: raise configparser.Error('{} in config file {}'.format(err.message, self.config_file)) from err - - return Etcd3Wrapper(*args, **kwargs) + else: + try: + wrapper = Etcd3Wrapper(*args, **kwargs) + except Exception as err: + logger.error('etcd connection not successfull. Please check your config file.' + '\nDetails: %s\netcd connection parameters: %s', err, kwargs) + sys.exit(1) + else: + return wrapper def read_internal_values(self): self.config_parser.read_dict({ diff --git a/ucloud/vmm/__init__.py b/ucloud/vmm/__init__.py index 1291da4..f85d7a3 100644 --- a/ucloud/vmm/__init__.py +++ b/ucloud/vmm/__init__.py @@ -91,6 +91,14 @@ class VMM: self.vmm_backend = vmm_backend self.socket_dir = os.path.join(self.vmm_backend, 'sock') + if not os.path.isdir(self.vmm_backend): + logger.info('{} does not exists. Creating it...'.format(self.vmm_backend)) + os.makedirs(self.vmm_backend, exist_ok=True) + + if not os.path.isdir(self.socket_dir): + logger.info('{} does not exists. Creating it...'.format(self.socket_dir)) + os.makedirs(self.socket_dir, exist_ok=True) + def is_running(self, uuid): sock_path = os.path.join(self.vmm_backend, uuid) try: @@ -99,8 +107,8 @@ class VMM: recv = sock.recv(4096) except Exception as err: # unix sock doesn't exists or it is closed - logger.info('VM %s sock either don\' exists or it is closed.', uuid, - 'It mean VM is stopped.', exc_info=err) + logger.debug('VM {} sock either don\' exists or it is closed. It mean VM is stopped.'.format(uuid), + exc_info=err) else: # if we receive greetings from qmp it mean VM is running if len(recv) > 0: @@ -120,16 +128,34 @@ class VMM: if self.is_running(uuid): logger.warning('Cannot start VM. It is already running.') else: - qmp_arg = ('-qmp', 'unix:{}/{},server,nowait'.format(self.vmm_backend, uuid)) + qmp_arg = ('-qmp', 'unix:{},server,nowait'.format(join_path(self.vmm_backend, uuid))) vnc_arg = ('-vnc', 'unix:{}'.format(tempfile.NamedTemporaryFile().name)) - command = [self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize'] + command = ['sudo', '-p', 'Enter password to start VM {}: '.format(uuid), + self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize'] try: sp.check_output(command, stderr=sp.PIPE) except sp.CalledProcessError as err: logger.exception('Error occurred while starting VM.\nDetail %s', err.stderr.decode('utf-8')) else: - time.sleep(2) + with suppress(sp.CalledProcessError): + sp.check_output([ + 'sudo', '-p', + 'Enter password to correct permission for uncloud-vmm\'s directory', + 'chmod', '-R', 'o=rwx,g=rwx', self.vmm_backend + ]) + + # TODO: Find some good way to check whether the virtual machine is up and + # running without relying on non-guarenteed ways. + for _ in range(10): + time.sleep(2) + status = self.get_status(uuid) + if status in ['running', 'inmigrate']: + return status + logger.warning('Timeout on VM\'s status. Shutting down VM %s', uuid) + self.stop(uuid) + # TODO: What should we do more. VM can still continue to run in background. + # If we have pid of vm we can kill it using OS. def execute_command(self, uuid, command, **kwargs): # execute_command -> sucess?, output @@ -141,12 +167,12 @@ class VMM: } sock_handle.sendall(json.dumps(command_to_execute).encode('utf-8')) output = file_handle.readline() - except Exception as err: + except Exception: logger.exception('Error occurred while executing command and getting valid output from qmp') else: try: output = json.loads(output) - except: + except Exception: logger.exception('QMP Output isn\'t valid JSON. %s', output) else: return 'return' in output, output @@ -161,6 +187,7 @@ class VMM: if success: return output['return']['status'] else: + # TODO: Think about this for a little more return 'STOPPED' def discover(self):