forked from uncloud/uncloud
Better error handling, Efforts to run non-root with occasional sudo
This commit is contained in:
parent
808271f3e0
commit
f980cdb464
7 changed files with 90 additions and 47 deletions
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import importlib
|
import importlib
|
||||||
|
@ -8,13 +7,12 @@ import sys
|
||||||
|
|
||||||
from logging.handlers import SysLogHandler
|
from logging.handlers import SysLogHandler
|
||||||
|
|
||||||
from ucloud.configure.main import configure_parser
|
|
||||||
from ucloud.common.logging import NoTracebackStreamHandler
|
from ucloud.common.logging import NoTracebackStreamHandler
|
||||||
|
from ucloud.configure.main import configure_parser
|
||||||
|
|
||||||
|
|
||||||
def exception_hook(exc_type, exc_value, exc_traceback):
|
def exception_hook(exc_type, exc_value, exc_traceback):
|
||||||
logger = logging.getLogger(__name__)
|
logging.getLogger(__name__).error(
|
||||||
logger.error(
|
|
||||||
'Uncaught exception',
|
'Uncaught exception',
|
||||||
exc_info=(exc_type, exc_value, exc_traceback)
|
exc_info=(exc_type, exc_value, exc_traceback)
|
||||||
)
|
)
|
||||||
|
@ -22,7 +20,25 @@ def exception_hook(exc_type, exc_value, exc_traceback):
|
||||||
|
|
||||||
sys.excepthook = exception_hook
|
sys.excepthook = exception_hook
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# Setting up root logger
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
syslog_handler = SysLogHandler(address='/dev/log')
|
||||||
|
syslog_handler.setLevel(logging.DEBUG)
|
||||||
|
syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s')
|
||||||
|
syslog_handler.setFormatter(syslog_formatter)
|
||||||
|
|
||||||
|
stream_handler = NoTracebackStreamHandler()
|
||||||
|
stream_handler.setLevel(logging.INFO)
|
||||||
|
stream_formatter = logging.Formatter('%(message)s')
|
||||||
|
stream_handler.setFormatter(stream_formatter)
|
||||||
|
|
||||||
|
logger.addHandler(syslog_handler)
|
||||||
|
logger.addHandler(stream_handler)
|
||||||
|
|
||||||
arg_parser = argparse.ArgumentParser()
|
arg_parser = argparse.ArgumentParser()
|
||||||
subparsers = arg_parser.add_subparsers(dest="command")
|
subparsers = arg_parser.add_subparsers(dest="command")
|
||||||
|
|
||||||
|
@ -46,21 +62,6 @@ if __name__ == '__main__':
|
||||||
if not args.command:
|
if not args.command:
|
||||||
arg_parser.print_help()
|
arg_parser.print_help()
|
||||||
else:
|
else:
|
||||||
# Setting up root logger
|
|
||||||
logger = logging.getLogger('ucloud')
|
|
||||||
|
|
||||||
syslog_handler = SysLogHandler(address='/dev/log')
|
|
||||||
syslog_handler.setLevel(logging.DEBUG)
|
|
||||||
syslog_formatter = logging.Formatter('%(pathname)s:%(lineno)d -- %(levelname)-8s %(message)s')
|
|
||||||
syslog_handler.setFormatter(syslog_formatter)
|
|
||||||
|
|
||||||
stream_handler = NoTracebackStreamHandler()
|
|
||||||
stream_handler.setLevel(logging.WARNING)
|
|
||||||
stream_formatter = logging.Formatter('%(message)s')
|
|
||||||
stream_handler.setFormatter(stream_formatter)
|
|
||||||
|
|
||||||
logger.addHandler(syslog_handler)
|
|
||||||
logger.addHandler(stream_handler)
|
|
||||||
|
|
||||||
# if we start etcd in seperate process with default settings
|
# if we start etcd in seperate process with default settings
|
||||||
# i.e inheriting few things from parent process etcd3 module
|
# i.e inheriting few things from parent process etcd3 module
|
||||||
|
|
|
@ -29,15 +29,16 @@ def readable_errors(func):
|
||||||
try:
|
try:
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except etcd3.exceptions.ConnectionFailedError as err:
|
except etcd3.exceptions.ConnectionFailedError as err:
|
||||||
raise etcd3.exceptions.ConnectionFailedError('etcd connection failed') from err
|
raise etcd3.exceptions.ConnectionFailedError('etcd connection failed.') from err
|
||||||
except etcd3.exceptions.ConnectionTimeoutError as err:
|
except etcd3.exceptions.ConnectionTimeoutError as err:
|
||||||
raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout') from err
|
raise etcd3.exceptions.ConnectionTimeoutError('etcd connection timeout.') from err
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Some etcd error occurred')
|
logger.exception('Some etcd error occured. See syslog for details.')
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
class Etcd3Wrapper:
|
class Etcd3Wrapper:
|
||||||
|
@readable_errors
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.client = etcd3.client(*args, **kwargs)
|
self.client = etcd3.client(*args, **kwargs)
|
||||||
|
|
||||||
|
@ -77,9 +78,10 @@ class Etcd3Wrapper:
|
||||||
event_queue = queue.Queue()
|
event_queue = queue.Queue()
|
||||||
|
|
||||||
def add_event_to_queue(event):
|
def add_event_to_queue(event):
|
||||||
for e in event.events:
|
if hasattr(event, 'events'):
|
||||||
if e.value:
|
for e in event.events:
|
||||||
event_queue.put(EtcdEntry(e, e.value, value_in_json=value_in_json))
|
if e.value:
|
||||||
|
event_queue.put(EtcdEntry(e, e.value, value_in_json=value_in_json))
|
||||||
|
|
||||||
self.client.add_watch_prefix_callback(key, add_event_to_queue)
|
self.client.add_watch_prefix_callback(key, add_event_to_queue)
|
||||||
|
|
||||||
|
|
|
@ -7,14 +7,17 @@ class NoTracebackStreamHandler(logging.StreamHandler):
|
||||||
info, cache = record.exc_info, record.exc_text
|
info, cache = record.exc_info, record.exc_text
|
||||||
record.exc_info, record.exc_text = None, None
|
record.exc_info, record.exc_text = None, None
|
||||||
|
|
||||||
if record.levelname == 'WARNING':
|
if record.levelname in ['WARNING', 'WARN']:
|
||||||
color = colorama.Fore.YELLOW
|
color = colorama.Fore.LIGHTYELLOW_EX
|
||||||
elif record.levelname in ['ERROR', 'EXCEPTION']:
|
elif record.levelname == 'ERROR':
|
||||||
color = colorama.Fore.LIGHTRED_EX
|
color = colorama.Fore.LIGHTRED_EX
|
||||||
elif record.levelname == 'INFO':
|
elif record.levelname == 'INFO':
|
||||||
color = colorama.Fore.LIGHTBLUE_EX
|
color = colorama.Fore.LIGHTGREEN_EX
|
||||||
|
elif record.levelname == 'CRITICAL':
|
||||||
|
color = colorama.Fore.LIGHTCYAN_EX
|
||||||
else:
|
else:
|
||||||
color = colorama.Fore.WHITE
|
color = colorama.Fore.WHITE
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print(color, end='', flush=True)
|
print(color, end='', flush=True)
|
||||||
super().handle(record)
|
super().handle(record)
|
||||||
|
|
|
@ -30,14 +30,14 @@ def generate_mac(uaa=False, multicast=False, oui=None, separator=':', byte_fmt='
|
||||||
|
|
||||||
|
|
||||||
def create_dev(script, _id, dev, ip=None):
|
def create_dev(script, _id, dev, ip=None):
|
||||||
command = [script, str(_id), dev]
|
command = ['sudo', '-p', 'Enter password to create network devices for vm: ',
|
||||||
|
script, str(_id), dev]
|
||||||
if ip:
|
if ip:
|
||||||
command.append(ip)
|
command.append(ip)
|
||||||
try:
|
try:
|
||||||
output = sp.check_output(command, stderr=sp.PIPE)
|
output = sp.check_output(command, stderr=sp.PIPE)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logger.exception('Creation of interface %s failed.', dev)
|
logger.exception('Creation of interface %s failed.', dev)
|
||||||
print(e)
|
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return output.decode('utf-8').strip()
|
return output.decode('utf-8').strip()
|
||||||
|
@ -45,9 +45,14 @@ def create_dev(script, _id, dev, ip=None):
|
||||||
|
|
||||||
def delete_network_interface(iface):
|
def delete_network_interface(iface):
|
||||||
try:
|
try:
|
||||||
sp.check_output(['ip', 'link', 'del', iface])
|
sp.check_output(
|
||||||
|
[
|
||||||
|
'sudo', '-p', 'Enter password to remove {} network device: '.format(iface),
|
||||||
|
'ip', 'link', 'del', iface
|
||||||
|
], stderr=sp.PIPE
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('Interface Deletion failed')
|
logger.exception('Interface %s Deletion failed', iface)
|
||||||
|
|
||||||
|
|
||||||
def find_free_port():
|
def find_free_port():
|
||||||
|
|
|
@ -14,10 +14,8 @@ from . import virtualmachine, logger
|
||||||
|
|
||||||
def update_heartbeat(hostname):
|
def update_heartbeat(hostname):
|
||||||
"""Update Last HeartBeat Time for :param hostname: in etcd"""
|
"""Update Last HeartBeat Time for :param hostname: in etcd"""
|
||||||
|
|
||||||
host_pool = shared.host_pool
|
host_pool = shared.host_pool
|
||||||
this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
|
this_host = next(filter(lambda h: h.hostname == hostname, host_pool.hosts), None)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
this_host.update_heartbeat()
|
this_host.update_heartbeat()
|
||||||
host_pool.put(this_host)
|
host_pool.put(this_host)
|
||||||
|
@ -43,7 +41,7 @@ def main(hostname):
|
||||||
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
|
heartbeat_updating_process = mp.Process(target=update_heartbeat, args=(hostname,))
|
||||||
heartbeat_updating_process.start()
|
heartbeat_updating_process.start()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e.__class__('ucloud-host heartbeat updating mechanism is not working') from e
|
raise Exception('ucloud-host heartbeat updating mechanism is not working') from e
|
||||||
|
|
||||||
for events_iterator in [
|
for events_iterator in [
|
||||||
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True),
|
shared.etcd_client.get_prefix(settings['etcd']['request_prefix'], value_in_json=True),
|
||||||
|
@ -87,7 +85,7 @@ def main(hostname):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
argparser = argparse.ArgumentParser()
|
argparser = argparse.ArgumentParser()
|
||||||
argparser.add_argument("hostname", help="Name of this host. e.g /v1/host/1")
|
argparser.add_argument("hostname", help="Name of this host. e.g uncloud1.ungleich.ch")
|
||||||
args = argparser.parse_args()
|
args = argparser.parse_args()
|
||||||
mp.set_start_method('spawn')
|
mp.set_start_method('spawn')
|
||||||
main(args.hostname)
|
main(args.hostname)
|
||||||
|
|
|
@ -47,8 +47,15 @@ class Settings(object):
|
||||||
}
|
}
|
||||||
except configparser.Error as err:
|
except configparser.Error as err:
|
||||||
raise configparser.Error('{} in config file {}'.format(err.message, self.config_file)) from err
|
raise configparser.Error('{} in config file {}'.format(err.message, self.config_file)) from err
|
||||||
|
else:
|
||||||
return Etcd3Wrapper(*args, **kwargs)
|
try:
|
||||||
|
wrapper = Etcd3Wrapper(*args, **kwargs)
|
||||||
|
except Exception as err:
|
||||||
|
logger.error('etcd connection not successfull. Please check your config file.'
|
||||||
|
'\nDetails: %s\netcd connection parameters: %s', err, kwargs)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
return wrapper
|
||||||
|
|
||||||
def read_internal_values(self):
|
def read_internal_values(self):
|
||||||
self.config_parser.read_dict({
|
self.config_parser.read_dict({
|
||||||
|
|
|
@ -91,6 +91,14 @@ class VMM:
|
||||||
self.vmm_backend = vmm_backend
|
self.vmm_backend = vmm_backend
|
||||||
self.socket_dir = os.path.join(self.vmm_backend, 'sock')
|
self.socket_dir = os.path.join(self.vmm_backend, 'sock')
|
||||||
|
|
||||||
|
if not os.path.isdir(self.vmm_backend):
|
||||||
|
logger.info('{} does not exists. Creating it...'.format(self.vmm_backend))
|
||||||
|
os.makedirs(self.vmm_backend, exist_ok=True)
|
||||||
|
|
||||||
|
if not os.path.isdir(self.socket_dir):
|
||||||
|
logger.info('{} does not exists. Creating it...'.format(self.socket_dir))
|
||||||
|
os.makedirs(self.socket_dir, exist_ok=True)
|
||||||
|
|
||||||
def is_running(self, uuid):
|
def is_running(self, uuid):
|
||||||
sock_path = os.path.join(self.vmm_backend, uuid)
|
sock_path = os.path.join(self.vmm_backend, uuid)
|
||||||
try:
|
try:
|
||||||
|
@ -99,8 +107,8 @@ class VMM:
|
||||||
recv = sock.recv(4096)
|
recv = sock.recv(4096)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
# unix sock doesn't exists or it is closed
|
# unix sock doesn't exists or it is closed
|
||||||
logger.info('VM %s sock either don\' exists or it is closed.', uuid,
|
logger.debug('VM {} sock either don\' exists or it is closed. It mean VM is stopped.'.format(uuid),
|
||||||
'It mean VM is stopped.', exc_info=err)
|
exc_info=err)
|
||||||
else:
|
else:
|
||||||
# if we receive greetings from qmp it mean VM is running
|
# if we receive greetings from qmp it mean VM is running
|
||||||
if len(recv) > 0:
|
if len(recv) > 0:
|
||||||
|
@ -120,16 +128,34 @@ class VMM:
|
||||||
if self.is_running(uuid):
|
if self.is_running(uuid):
|
||||||
logger.warning('Cannot start VM. It is already running.')
|
logger.warning('Cannot start VM. It is already running.')
|
||||||
else:
|
else:
|
||||||
qmp_arg = ('-qmp', 'unix:{}/{},server,nowait'.format(self.vmm_backend, uuid))
|
qmp_arg = ('-qmp', 'unix:{},server,nowait'.format(join_path(self.vmm_backend, uuid)))
|
||||||
vnc_arg = ('-vnc', 'unix:{}'.format(tempfile.NamedTemporaryFile().name))
|
vnc_arg = ('-vnc', 'unix:{}'.format(tempfile.NamedTemporaryFile().name))
|
||||||
|
|
||||||
command = [self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize']
|
command = ['sudo', '-p', 'Enter password to start VM {}: '.format(uuid),
|
||||||
|
self.qemu_path, *args, *qmp_arg, *migration_args, *vnc_arg, '-daemonize']
|
||||||
try:
|
try:
|
||||||
sp.check_output(command, stderr=sp.PIPE)
|
sp.check_output(command, stderr=sp.PIPE)
|
||||||
except sp.CalledProcessError as err:
|
except sp.CalledProcessError as err:
|
||||||
logger.exception('Error occurred while starting VM.\nDetail %s', err.stderr.decode('utf-8'))
|
logger.exception('Error occurred while starting VM.\nDetail %s', err.stderr.decode('utf-8'))
|
||||||
else:
|
else:
|
||||||
time.sleep(2)
|
with suppress(sp.CalledProcessError):
|
||||||
|
sp.check_output([
|
||||||
|
'sudo', '-p',
|
||||||
|
'Enter password to correct permission for uncloud-vmm\'s directory',
|
||||||
|
'chmod', '-R', 'o=rwx,g=rwx', self.vmm_backend
|
||||||
|
])
|
||||||
|
|
||||||
|
# TODO: Find some good way to check whether the virtual machine is up and
|
||||||
|
# running without relying on non-guarenteed ways.
|
||||||
|
for _ in range(10):
|
||||||
|
time.sleep(2)
|
||||||
|
status = self.get_status(uuid)
|
||||||
|
if status in ['running', 'inmigrate']:
|
||||||
|
return status
|
||||||
|
logger.warning('Timeout on VM\'s status. Shutting down VM %s', uuid)
|
||||||
|
self.stop(uuid)
|
||||||
|
# TODO: What should we do more. VM can still continue to run in background.
|
||||||
|
# If we have pid of vm we can kill it using OS.
|
||||||
|
|
||||||
def execute_command(self, uuid, command, **kwargs):
|
def execute_command(self, uuid, command, **kwargs):
|
||||||
# execute_command -> sucess?, output
|
# execute_command -> sucess?, output
|
||||||
|
@ -141,12 +167,12 @@ class VMM:
|
||||||
}
|
}
|
||||||
sock_handle.sendall(json.dumps(command_to_execute).encode('utf-8'))
|
sock_handle.sendall(json.dumps(command_to_execute).encode('utf-8'))
|
||||||
output = file_handle.readline()
|
output = file_handle.readline()
|
||||||
except Exception as err:
|
except Exception:
|
||||||
logger.exception('Error occurred while executing command and getting valid output from qmp')
|
logger.exception('Error occurred while executing command and getting valid output from qmp')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
output = json.loads(output)
|
output = json.loads(output)
|
||||||
except:
|
except Exception:
|
||||||
logger.exception('QMP Output isn\'t valid JSON. %s', output)
|
logger.exception('QMP Output isn\'t valid JSON. %s', output)
|
||||||
else:
|
else:
|
||||||
return 'return' in output, output
|
return 'return' in output, output
|
||||||
|
@ -161,6 +187,7 @@ class VMM:
|
||||||
if success:
|
if success:
|
||||||
return output['return']['status']
|
return output['return']['status']
|
||||||
else:
|
else:
|
||||||
|
# TODO: Think about this for a little more
|
||||||
return 'STOPPED'
|
return 'STOPPED'
|
||||||
|
|
||||||
def discover(self):
|
def discover(self):
|
||||||
|
|
Loading…
Reference in a new issue