transfer all files of a directory at once

instead of calling the copy command once per file in a directory (eg a
type explorer dir), call the copy command only once with all files of
the directory.

batch copy saves 20% of dry-run time on my test manifest in sequential
mode, and 15% in parallel mode.
This commit is contained in:
dhivael 2018-09-21 23:17:41 +02:00
parent d6952543a7
commit c021cd15cd

View file

@ -203,47 +203,21 @@ class Remote(object):
os.remove(tarpath)
used_archiving = True
if not used_archiving:
if jobs:
self._transfer_dir_parallel(source, destination, jobs)
else:
self._transfer_dir_sequential(source, destination)
self._transfer_dir(source, destination)
elif jobs:
raise cdist.Error("Source {} is not a directory".format(source))
else:
self._transfer_file(source, destination)
def _transfer_dir_commands(self, source, destination):
for f in glob.glob1(source, '*'):
def _transfer_dir(self, source, destination):
command = self._copy.split()
for f in glob.glob1(source, '*'):
path = os.path.join(source, f)
command.extend([path, '{0}:{1}'.format(
command.extend([path])
command.extend(['{0}:{1}'.format(
_wrap_addr(self.target_host[0]), destination)])
yield command
def _transfer_dir_sequential(self, source, destination):
for command in self._transfer_dir_commands(source, destination):
self._run_command(command)
def _transfer_dir_parallel(self, source, destination, jobs):
"""Transfer a directory to the remote side in parallel mode."""
self.log.debug("Remote transfer in {} parallel jobs".format(
jobs))
self.log.trace("Multiprocessing start method is {}".format(
multiprocessing.get_start_method()))
self.log.trace(("Starting multiprocessing Pool for parallel "
"remote transfer"))
args = [
(command, )
for command in self._transfer_dir_commands(source, destination)
]
if len(args) == 1:
self.log.debug("Only one dir entry, transfering sequentially")
self._run_command(args[0])
else:
mp_pool_run(self._run_command, args, jobs=jobs)
self.log.trace(("Multiprocessing for parallel transfer "
"finished"))
def run_script(self, script, env=None, return_output=False, stdout=None,
stderr=None):
"""Run the given script with the given environment on the remote side.