Merge pull request #507 from darko-poljak/the-good-the-bad-and-the-ugly
The good the bad and the ugly - object prepare/run parallelization
This commit is contained in:
commit
e774fc2b2a
6 changed files with 185 additions and 36 deletions
102
cdist/config.py
102
cdist/config.py
|
@ -27,6 +27,8 @@ import time
|
|||
import itertools
|
||||
import tempfile
|
||||
import socket
|
||||
import multiprocessing
|
||||
from cdist.mputil import mp_pool_run
|
||||
import atexit
|
||||
import shutil
|
||||
|
||||
|
@ -48,7 +50,7 @@ class Config(object):
|
|||
|
||||
self.local = local
|
||||
self.remote = remote
|
||||
self.log = logging.getLogger(self.local.target_host[0])
|
||||
self._open_logger()
|
||||
self.dry_run = dry_run
|
||||
self.jobs = jobs
|
||||
|
||||
|
@ -123,7 +125,6 @@ class Config(object):
|
|||
@classmethod
|
||||
def commandline(cls, args):
|
||||
"""Configure remote system"""
|
||||
import multiprocessing
|
||||
|
||||
# FIXME: Refactor relict - remove later
|
||||
log = logging.getLogger("cdist")
|
||||
|
@ -278,6 +279,14 @@ class Config(object):
|
|||
Iterate over the objects once - helper method for
|
||||
iterate_until_finished
|
||||
"""
|
||||
if self.jobs:
|
||||
objects_changed = self._iterate_once_parallel()
|
||||
else:
|
||||
objects_changed = self._iterate_once_sequential()
|
||||
return objects_changed
|
||||
|
||||
def _iterate_once_sequential(self):
|
||||
self.log.info("Iteration in sequential mode")
|
||||
objects_changed = False
|
||||
|
||||
for cdist_object in self.object_list():
|
||||
|
@ -303,6 +312,95 @@ class Config(object):
|
|||
|
||||
return objects_changed
|
||||
|
||||
def _iterate_once_parallel(self):
|
||||
self.log.info("Iteration in parallel mode in {} jobs".format(
|
||||
self.jobs))
|
||||
objects_changed = False
|
||||
|
||||
cargo = []
|
||||
for cdist_object in self.object_list():
|
||||
if cdist_object.requirements_unfinished(cdist_object.requirements):
|
||||
"""We cannot do anything for this poor object"""
|
||||
continue
|
||||
|
||||
if cdist_object.state == core.CdistObject.STATE_UNDEF:
|
||||
"""Prepare the virgin object"""
|
||||
|
||||
# self.object_prepare(cdist_object)
|
||||
# objects_changed = True
|
||||
cargo.append(cdist_object)
|
||||
|
||||
n = len(cargo)
|
||||
if n == 1:
|
||||
self.log.debug("Only one object, preparing sequentially")
|
||||
self.object_prepare(cargo[0])
|
||||
objects_changed = True
|
||||
elif cargo:
|
||||
self.log.debug("Multiprocessing start method is {}".format(
|
||||
multiprocessing.get_start_method()))
|
||||
self.log.debug(("Starting multiprocessing Pool for {} parallel "
|
||||
"objects preparation".format(n)))
|
||||
args = [
|
||||
(c, ) for c in cargo
|
||||
]
|
||||
mp_pool_run(self.object_prepare, args, jobs=self.jobs)
|
||||
self.log.debug(("Multiprocessing for parallel object "
|
||||
"preparation finished"))
|
||||
objects_changed = True
|
||||
|
||||
del cargo[:]
|
||||
for cdist_object in self.object_list():
|
||||
if cdist_object.requirements_unfinished(cdist_object.requirements):
|
||||
"""We cannot do anything for this poor object"""
|
||||
continue
|
||||
|
||||
if cdist_object.state == core.CdistObject.STATE_PREPARED:
|
||||
if cdist_object.requirements_unfinished(
|
||||
cdist_object.autorequire):
|
||||
"""The previous step created objects we depend on -
|
||||
wait for them
|
||||
"""
|
||||
continue
|
||||
|
||||
# self.object_run(cdist_object)
|
||||
# objects_changed = True
|
||||
cargo.append(cdist_object)
|
||||
|
||||
n = len(cargo)
|
||||
if n == 1:
|
||||
self.log.debug("Only one object, running sequentially")
|
||||
self.object_run(cargo[0])
|
||||
objects_changed = True
|
||||
elif cargo:
|
||||
self.log.debug("Multiprocessing start method is {}".format(
|
||||
multiprocessing.get_start_method()))
|
||||
self.log.debug(("Starting multiprocessing Pool for {} parallel "
|
||||
"object run".format(n)))
|
||||
args = [
|
||||
(c, ) for c in cargo
|
||||
]
|
||||
mp_pool_run(self.object_run, args, jobs=self.jobs)
|
||||
self.log.debug(("Multiprocessing for parallel object "
|
||||
"run finished"))
|
||||
objects_changed = True
|
||||
|
||||
return objects_changed
|
||||
|
||||
def _open_logger(self):
|
||||
self.log = logging.getLogger(self.local.target_host[0])
|
||||
|
||||
# logger is not pickable, so remove it when we pickle
|
||||
def __getstate__(self):
|
||||
state = self.__dict__.copy()
|
||||
if 'log' in state:
|
||||
del state['log']
|
||||
return state
|
||||
|
||||
# recreate logger when we unpickle
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
self._open_logger()
|
||||
|
||||
def iterate_until_finished(self):
|
||||
"""
|
||||
Go through all objects and solve them
|
||||
|
|
|
@ -79,6 +79,9 @@ class CdistType(object):
|
|||
|
||||
_instances = {}
|
||||
|
||||
def __getnewargs__(self):
|
||||
return self.base_path, self.name
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
"""only one instance of each named type may exist"""
|
||||
# name is second argument
|
||||
|
|
|
@ -24,8 +24,7 @@ import logging
|
|||
import os
|
||||
import glob
|
||||
import multiprocessing
|
||||
|
||||
import cdist
|
||||
from cdist.mputil import mp_pool_run
|
||||
|
||||
'''
|
||||
common:
|
||||
|
@ -121,18 +120,12 @@ class Explorer(object):
|
|||
multiprocessing.get_start_method()))
|
||||
self.log.debug(("Starting multiprocessing Pool for global "
|
||||
"explorers run"))
|
||||
with multiprocessing.Pool(self.jobs) as pool:
|
||||
self.log.debug("Starting async for global explorer run")
|
||||
results = [
|
||||
pool.apply_async(self._run_global_explorer, (e, out_path,))
|
||||
for e in self.list_global_explorer_names()
|
||||
]
|
||||
|
||||
self.log.debug("Waiting async results for global explorer runs")
|
||||
for r in results:
|
||||
r.get() # self._run_global_explorer returns None
|
||||
self.log.debug(("Multiprocessing run for global explorers "
|
||||
"finished"))
|
||||
args = [
|
||||
(e, out_path, ) for e in self.list_global_explorer_names()
|
||||
]
|
||||
mp_pool_run(self._run_global_explorer, args, jobs=self.jobs)
|
||||
self.log.debug(("Multiprocessing run for global explorers "
|
||||
"finished"))
|
||||
|
||||
# logger is not pickable, so remove it when we pickle
|
||||
def __getstate__(self):
|
||||
|
|
|
@ -98,7 +98,7 @@ class Manifest(object):
|
|||
self.target_host = target_host
|
||||
self.local = local
|
||||
|
||||
self.log = logging.getLogger(self.target_host[0])
|
||||
self._open_logger()
|
||||
|
||||
self.env = {
|
||||
'PATH': "%s:%s" % (self.local.bin_path, os.environ['PATH']),
|
||||
|
@ -114,6 +114,21 @@ class Manifest(object):
|
|||
if self.log.getEffectiveLevel() == logging.DEBUG:
|
||||
self.env.update({'__cdist_debug': "yes"})
|
||||
|
||||
def _open_logger(self):
|
||||
self.log = logging.getLogger(self.target_host[0])
|
||||
|
||||
# logger is not pickable, so remove it when we pickle
|
||||
def __getstate__(self):
|
||||
state = self.__dict__.copy()
|
||||
if 'log' in state:
|
||||
del state['log']
|
||||
return state
|
||||
|
||||
# recreate logger when we unpickle
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
self._open_logger()
|
||||
|
||||
def env_initial_manifest(self, initial_manifest):
|
||||
env = os.environ.copy()
|
||||
env.update(self.env)
|
||||
|
|
|
@ -31,6 +31,7 @@ import multiprocessing
|
|||
import cdist
|
||||
import cdist.exec.util as exec_util
|
||||
import cdist.util.ipaddr as ipaddr
|
||||
from cdist.mputil import mp_pool_run
|
||||
|
||||
|
||||
def _wrap_addr(addr):
|
||||
|
@ -152,25 +153,16 @@ class Remote(object):
|
|||
multiprocessing.get_start_method()))
|
||||
self.log.debug(("Starting multiprocessing Pool for parallel "
|
||||
"remote transfer"))
|
||||
with multiprocessing.Pool(jobs) as pool:
|
||||
self.log.debug("Starting async for parallel transfer")
|
||||
commands = []
|
||||
for f in glob.glob1(source, '*'):
|
||||
command = self._copy.split()
|
||||
path = os.path.join(source, f)
|
||||
command.extend([path, '{0}:{1}'.format(
|
||||
_wrap_addr(self.target_host[0]), destination)])
|
||||
commands.append(command)
|
||||
results = [
|
||||
pool.apply_async(self._run_command, (cmd,))
|
||||
for cmd in commands
|
||||
]
|
||||
|
||||
self.log.debug("Waiting async results for parallel transfer")
|
||||
for r in results:
|
||||
r.get() # self._run_command returns None
|
||||
self.log.debug(("Multiprocessing for parallel transfer "
|
||||
"finished"))
|
||||
args = []
|
||||
for f in glob.glob1(source, '*'):
|
||||
command = self._copy.split()
|
||||
path = os.path.join(source, f)
|
||||
command.extend([path, '{0}:{1}'.format(
|
||||
_wrap_addr(self.target_host[0]), destination)])
|
||||
args.append((command, ))
|
||||
mp_pool_run(self._run_command, args, jobs=jobs)
|
||||
self.log.debug(("Multiprocessing for parallel transfer "
|
||||
"finished"))
|
||||
|
||||
def run_script(self, script, env=None, return_output=False):
|
||||
"""Run the given script with the given environment on the remote side.
|
||||
|
|
48
cdist/mputil.py
Normal file
48
cdist/mputil.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# 2016 Darko Poljak (darko.poljak at gmail.com)
|
||||
#
|
||||
# This file is part of cdist.
|
||||
#
|
||||
# cdist is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# cdist is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with cdist. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
import multiprocessing
|
||||
import itertools
|
||||
|
||||
|
||||
def mp_pool_run(func, args=None, kwds=None, jobs=multiprocessing.cpu_count()):
|
||||
""" Run func using multiprocessing.Pool with jobs jobs and supplied
|
||||
iterable of args and kwds with one entry for each parallel func
|
||||
instance.
|
||||
Return list of results.
|
||||
"""
|
||||
if args and kwds:
|
||||
fargs = zip(args, kwds)
|
||||
elif args:
|
||||
fargs = zip(args, itertools.repeat({}))
|
||||
elif kwds:
|
||||
fargs = zip(itertools.repeat(()), kwds)
|
||||
else:
|
||||
return [func(), ]
|
||||
|
||||
with multiprocessing.Pool(jobs) as pool:
|
||||
results = [
|
||||
pool.apply_async(func, a, k)
|
||||
for a, k in fargs
|
||||
]
|
||||
retval = [r.get() for r in results]
|
||||
return retval
|
Loading…
Reference in a new issue