forked from uncloud/uncloud
Better error handling, Efforts to run non-root with occasional sudo
This commit is contained in:
parent
808271f3e0
commit
f980cdb464
7 changed files with 90 additions and 47 deletions
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import importlib
|
||||
|
@ -8,13 +7,12 @@ import sys
|
|||
|
||||
from logging.handlers import SysLogHandler
|
||||
|
||||
from ucloud.configure.main import configure_parser
|
||||
from ucloud.common.logging import NoTracebackStreamHandler
|
||||
from ucloud.configure.main import configure_parser
|
||||
|
||||
|
||||
def exception_hook(exc_type, exc_value, exc_traceback):
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error(
|
||||
logging.getLogger(__name__).error(
|
||||
'Uncaught exception',
|
||||
exc_info=(exc_type, exc_value, exc_traceback)
|
||||
)
|
||||
|
@ -22,7 +20,25 @@ def exception_hook(exc_type, exc_value, exc_traceback):
|
|||
|
||||
sys.excepthook = exception_hook
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Setting up root logger
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
syslog_handler = SysLogHandler(address='/dev/log')
|
||||
syslog_handler.setLevel(logging.DEBUG)
|
||||
syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s')
|
||||
syslog_handler.setFormatter(syslog_formatter)
|
||||
|
||||
stream_handler = NoTracebackStreamHandler()
|
||||
stream_handler.setLevel(logging.INFO)
|
||||
stream_formatter = logging.Formatter('%(message)s')
|
||||
stream_handler.setFormatter(stream_formatter)
|
||||
|
||||
logger.addHandler(syslog_handler)
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
subparsers = arg_parser.add_subparsers(dest="command")
|
||||
|
||||
|
@ -46,21 +62,6 @@ if __name__ == '__main__':
|
|||
if not args.command:
|
||||
arg_parser.print_help()
|
||||
else:
|
||||
# Setting up root logger
|
||||
logger = logging.getLogger('ucloud')
|
||||
|
||||
syslog_handler = SysLogHandler(address='/dev/log')
|
||||
syslog_handler.setLevel(logging.DEBUG)
|
||||
syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s')
|
||||
syslog_handler.setFormatter(syslog_formatter)
|
||||
|
||||
stream_handler = NoTracebackStreamHandler()
|
||||
stream_handler.setLevel(logging.WARNING)
|
||||
stream_formatter = logging.Formatter('%(message)s')
|
||||
stream_handler.setFormatter(stream_formatter)
|
||||
|
||||
logger.addHandler(syslog_handler)
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
# if we start etcd in seperate process with default settings
|
||||
# i.e inheriting few things from parent process etcd3 module
|
||||
|
|
|
@ -29,15 +29,16 @@ def readable_errors(func):
|
|||
try:
|
||||
return func(*args, **kwargs)
|
||||
except etcd3.exceptions.ConnectionFailedError as err:
|
||||
raise etcd3.exceptions.ConnectionFailedError('etcd connection failed') from err
|
||||
raise etcd3.exceptions.ConnectionFailedError('etcd connection failed.') from err
|
||||
except etcd3.exceptions.ConnectionTimeoutError as err:
|
||||
raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout') from err
|
||||
raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout.') from err
|
||||
except Exception:
|
||||
logger.exception('Some etcd error occurred')
|
||||
logger.exception('Some etcd error occured. See syslog for details.')
|
||||
return wrapper
|
||||
|
||||
|
||||
class Etcd3Wrapper:
|
||||
@readable_errors
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.client = etcd3.client(*args, **kwargs)
|
||||
|
||||
|
@ -77,6 +78,7 @@ class Etcd3Wrapper:
|
|||
event_queue = queue.Queue()
|
||||
|
||||
def add_event_to_queue(event):
|
||||
if hasattr(event, 'events'):
|
||||
for e in event.events:
|
||||
if e.value:
|
||||
event_queue.put(EtcdEntry(e, e.value, value_in_json=value_in_json))
|
||||
|
|
|
@ -7,14 +7,17 @@ class NoTracebackStreamHandler(logging.StreamHandler):
|
|||
info, cache = record.exc_info, record.exc_text
|
||||
record.exc_info, record.exc_text = None, None
|
||||
|
||||
if record.levelname == 'WARNING':
|
||||
color = colorama.Fore.YELLOW
|
||||
elif record.levelname in ['ERROR', 'EXCEPTION']:
|
||||
if record.levelname in ['WARNING', 'WARN']:
|
||||
color = colorama.Fore.LIGHTYELLOW_EX
|
||||
elif record.levelname == 'ERROR':
|
||||
color = colorama.Fore.LIGHTRED_EX
|
||||
elif record.levelname == 'INFO':
|
||||
color = colorama.Fore.LIGHTBLUE_EX
|
||||
color = colorama.Fore.LIGHTGREEN_EX
|
||||
elif record.levelname == 'CRITICAL':
|
||||
color = colorama.Fore.LIGHTCYAN_EX
|
||||
else:
|
||||
color = colorama.Fore.WHITE
|
||||
|
||||
try:
|
||||
print(color, end='', flush=True)
|
||||
super().handle(record)
|
||||
|
|
|
@ -30,14 +30,14 @@ def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt='
|
|||
|
||||
|
||||
def create_dev(script, _id, dev, ip=None):
|
||||
command = [script, str(_id), dev]
|
||||
command = ['sudo', '-p', 'Enter password to create network devices for vm: ',
|
||||
script, str(_id), dev]
|
||||
if ip:
|
||||
command.append(ip)
|
||||
try:
|
||||
output = sp.check_output(command, stderr=sp.PIPE)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception('Creation of interface %s failed.', dev)
|
||||
print(e)
|
||||
return None
|
||||
else:
|
||||
return output.decode('utf-8').strip()
|
||||
|
@ -45,9 +45,14 @@ def create_dev(script, _id, dev, ip=None):
|
|||
|
||||
def delete_network_interface(iface):
|
||||
try:
|
||||
sp.check_output(['ip', 'link', 'del', iface])
|
||||
sp.check_output(
|
||||
[
|
||||
'sudo', '-p', 'Enter password to remove {} network device: '.format(iface),
|
||||
'ip', 'link', 'del', iface
|
||||
], stderr=sp.PIPE
|
||||
)
|
||||
except Exception:
|
||||
logger.exception('Interface Deletion failed')
|
||||
logger.exception('Interface %s Deletion failed', iface)
|
||||
|
||||
|
||||
def find_free_port():
|
||||
|
|
|
@ -14,10 +14,8 @@ from . import virtualmachine, logger
|
|||
|
||||
def update_heartbeat(hostname):
|
||||
"""Update Last HeartBeat Time for :param hostname: in etcd"""
|
||||
|
||||
host_pool = shared.host_pool
|
||||
this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
|
||||
|
||||
while True:
|
||||
this_host.update_heartbeat()
|
||||
host_pool.put(this_host)
|
||||
|
@ -43,7 +41,7 @@ def main(hostname):
|
|||
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
|
||||
heartbeat_updating_process.start()
|
||||
except Exception as e:
|
||||
raise e.__class__('ucloud-host heartbeat updating mechanism is not working') from e
|
||||
raise Exception('ucloud-host heartbeat updating mechanism is not working') from e
|
||||
|
||||
for events_iterator in [
|
||||
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True),
|
||||
|
@ -87,7 +85,7 @@ def main(hostname):
|
|||
|
||||
if __name__ == "__main__":
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1")
|
||||
argparser.add_argument("hostname", help="Name of this host. e.g uncloud1.ungleich.ch")
|
||||
args = argparser.parse_args()
|
||||
mp.set_start_method('spawn')
|
||||
main(args.hostname)
|
||||
|
|
|
@ -47,8 +47,15 @@ class Settings(object):
|
|||
}
|
||||
except configparser.Error as err:
|
||||
raise configparser.Error('{} in config file {}'.format(err.message, self.config_file)) from err
|
||||
|
||||
return Etcd3Wrapper(*args, **kwargs)
|
||||
else:
|
||||
try:
|
||||
wrapper = Etcd3Wrapper(*args, **kwargs)
|
||||
except Exception as err:
|
||||
logger.error('etcd connection not successfull. Please check your config file.'
|
||||
'\nDetails: %s\netcd connection parameters: %s', err, kwargs)
|
||||
sys.exit(1)
|
||||
else:
|
||||
return wrapper
|
||||
|
||||
def read_internal_values(self):
|
||||
self.config_parser.read_dict({
|
||||
|
|
|
@ -91,6 +91,14 @@ class VMM:
|
|||
self.vmm_backend = vmm_backend
|
||||
self.socket_dir = os.path.join(self.vmm_backend, 'sock')
|
||||
|
||||
if not os.path.isdir(self.vmm_backend):
|
||||
logger.info('{} does not exists. Creating it...'.format(self.vmm_backend))
|
||||
os.makedirs(self.vmm_backend, exist_ok=True)
|
||||
|
||||
if not os.path.isdir(self.socket_dir):
|
||||
logger.info('{} does not exists. Creating it...'.format(self.socket_dir))
|
||||
os.makedirs(self.socket_dir, exist_ok=True)
|
||||
|
||||
def is_running(self, uuid):
|
||||
sock_path = os.path.join(self.vmm_backend, uuid)
|
||||
try:
|
||||
|
@ -99,8 +107,8 @@ class VMM:
|
|||
recv = sock.recv(4096)
|
||||
except Exception as err:
|
||||
# unix sock doesn't exists or it is closed
|
||||
logger.info('VM %s sock either don\' exists or it is closed.', uuid,
|
||||
'It mean VM is stopped.', exc_info=err)
|
||||
logger.debug('VM {} sock either don\' exists or it is closed. It mean VM is stopped.'.format(uuid),
|
||||
exc_info=err)
|
||||
else:
|
||||
# if we receive greetings from qmp it mean VM is running
|
||||
if len(recv) > 0:
|
||||
|
@ -120,16 +128,34 @@ class VMM:
|
|||
if self.is_running(uuid):
|
||||
logger.warning('Cannot start VM. It is already running.')
|
||||
else:
|
||||
qmp_arg = ('-qmp', 'unix:{}/{},server,nowait'.format(self.vmm_backend, uuid))
|
||||
qmp_arg = ('-qmp', 'unix:{},server,nowait'.format(join_path(self.vmm_backend, uuid)))
|
||||
vnc_arg = ('-vnc', 'unix:{}'.format(tempfile.NamedTemporaryFile().name))
|
||||
|
||||
command = [self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize']
|
||||
command = ['sudo', '-p', 'Enter password to start VM {}: '.format(uuid),
|
||||
self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize']
|
||||
try:
|
||||
sp.check_output(command, stderr=sp.PIPE)
|
||||
except sp.CalledProcessError as err:
|
||||
logger.exception('Error occurred while starting VM.\nDetail %s', err.stderr.decode('utf-8'))
|
||||
else:
|
||||
with suppress(sp.CalledProcessError):
|
||||
sp.check_output([
|
||||
'sudo', '-p',
|
||||
'Enter password to correct permission for uncloud-vmm\'s directory',
|
||||
'chmod', '-R', 'o=rwx,g=rwx', self.vmm_backend
|
||||
])
|
||||
|
||||
# TODO: Find some good way to check whether the virtual machine is up and
|
||||
# running without relying on non-guarenteed ways.
|
||||
for _ in range(10):
|
||||
time.sleep(2)
|
||||
status = self.get_status(uuid)
|
||||
if status in ['running', 'inmigrate']:
|
||||
return status
|
||||
logger.warning('Timeout on VM\'s status. Shutting down VM %s', uuid)
|
||||
self.stop(uuid)
|
||||
# TODO: What should we do more. VM can still continue to run in background.
|
||||
# If we have pid of vm we can kill it using OS.
|
||||
|
||||
def execute_command(self, uuid, command, **kwargs):
|
||||
# execute_command -> sucess?, output
|
||||
|
@ -141,12 +167,12 @@ class VMM:
|
|||
}
|
||||
sock_handle.sendall(json.dumps(command_to_execute).encode('utf-8'))
|
||||
output = file_handle.readline()
|
||||
except Exception as err:
|
||||
except Exception:
|
||||
logger.exception('Error occurred while executing command and getting valid output from qmp')
|
||||
else:
|
||||
try:
|
||||
output = json.loads(output)
|
||||
except:
|
||||
except Exception:
|
||||
logger.exception('QMP Output isn\'t valid JSON. %s', output)
|
||||
else:
|
||||
return 'return' in output, output
|
||||
|
@ -161,6 +187,7 @@ class VMM:
|
|||
if success:
|
||||
return output['return']['status']
|
||||
else:
|
||||
# TODO: Think about this for a little more
|
||||
return 'STOPPED'
|
||||
|
||||
def discover(self):
|
||||
|
|
Loading…
Reference in a new issue