Introduce -j option and deprecate -p option.

This commit is contained in:
Darko Poljak 2016-07-21 12:15:37 +02:00
parent 8fbb7ddf27
commit 902a7d667e
4 changed files with 118 additions and 14 deletions

107
ccollect
View file

@ -20,6 +20,11 @@
# Initially written for SyGroup (www.sygroup.ch) # Initially written for SyGroup (www.sygroup.ch)
# Date: Mon Nov 14 11:45:11 CET 2005 # Date: Mon Nov 14 11:45:11 CET 2005
# Simulate ccollect without actually performing any backup;
# conf directory need to be specifed.
# Usually used for debugging/testing locking and parallel execution.
SIMULATE="1"
# Error upon expanding unset variables: # Error upon expanding unset variables:
set -u set -u
@ -41,6 +46,8 @@ CPOSTEXEC="${CDEFAULTS}/post_exec"
CMARKER=".ccollect-marker" CMARKER=".ccollect-marker"
export TMP="$(mktemp "/tmp/${__myname}.XXXXXX")" export TMP="$(mktemp "/tmp/${__myname}.XXXXXX")"
CONTROL_PIPE="/tmp/${__myname}-control-pipe"
VERSION="1.0" VERSION="1.0"
RELEASE="2015-10-10" RELEASE="2015-10-10"
HALF_VERSION="ccollect ${VERSION}" HALF_VERSION="ccollect ${VERSION}"
@ -59,7 +66,10 @@ SDATE="date +%s"
# Locking is done for each source so that only one instance per source # Locking is done for each source so that only one instance per source
# can run. # can run.
# #
LOCKDIR="${CSOURCES}" # Use CCOLLECT_CONF directory for lock files.
# This directory can be set arbitrary so it is writable for user
# executing ccollect.
LOCKDIR="${CCOLLECT_CONF}"
# printf pattern: ccollect_<source>.lock # printf pattern: ccollect_<source>.lock
LOCKFILE_PATTERN="ccollect_%s.lock" LOCKFILE_PATTERN="ccollect_%s.lock"
LOCKFD=4 LOCKFD=4
@ -119,6 +129,7 @@ fi
# unset values # unset values
# #
PARALLEL="" PARALLEL=""
MAX_JOBS=""
USE_ALL="" USE_ALL=""
# #
@ -187,7 +198,9 @@ ${__myname}: [args] <interval name> <sources to backup>
-h, --help: Show this help screen -h, --help: Show this help screen
-a, --all: Backup all sources specified in ${CSOURCES} -a, --all: Backup all sources specified in ${CSOURCES}
-p, --parallel: Parallelise backup processes -j [max], --jobs [max] Specifies the number of jobs to run simultaneously.
If max is not specified then parallelise all jobs.
-p, --parallel: Parallelise backup processes (deprecated from 2.0)
-v, --verbose: Be very verbose (uses set -x) -v, --verbose: Be very verbose (uses set -x)
-V, --version: Print version information -V, --version: Print version information
@ -219,7 +232,24 @@ while [ "$#" -ge 1 ]; do
USE_ALL=1 USE_ALL=1
;; ;;
-p|--parallel) -p|--parallel)
_techo "Warning: -p, --parallel option is deprecated," \
"use -j, --jobs instead."
PARALLEL=1 PARALLEL=1
MAX_JOBS=""
;;
-j|--jobs)
PARALLEL=1
if [ "$#" -ge 2 ]
then
case "$2" in
-*)
;;
*)
MAX_JOBS=$2
shift
;;
esac
fi
;; ;;
-v|--verbose) -v|--verbose)
set -x set -x
@ -242,6 +272,14 @@ while [ "$#" -ge 1 ]; do
shift shift
done done
# check that MAX_JOBS is natural number > 0
# empty string means run all in parallel
echo "${MAX_JOBS}" | awk '/^$/ { exit 0 } /^[1-9][0-9]*$/ { exit 0 } { exit 1 }'
if [ "$?" -ne 0 ]
then
_exit_err "Invalid max jobs value \"${MAX_JOBS}\""
fi
# #
# Setup interval # Setup interval
# #
@ -310,6 +348,26 @@ fi
# #
# Let's do the backup - here begins the real stuff # Let's do the backup - here begins the real stuff
# #
# in PARALLEL mode:
# * create control pipe
# * determine number of jobs to start at once
if [ "${PARALLEL}" ]; then
mkfifo "${CONTROL_PIPE}"
# fd 5 is tied to control pipe
eval "exec 5<>${CONTROL_PIPE}"
TRAPFUNC="${TRAPFUNC}; rm -f \"${CONTROL_PIPE}\""
trap "${TRAPFUNC}" 0 1 2 15
# determine how much parallel jobs to prestart
if [ "${MAX_JOBS}" ] && [ "${MAX_JOBS}" -le "${no_sources}" ]
then
prestart="${MAX_JOBS}"
else
prestart=0
fi
fi
source_no=0 source_no=0
while [ "${source_no}" -lt "${no_sources}" ]; do while [ "${source_no}" -lt "${no_sources}" ]; do
# #
@ -322,8 +380,35 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
# Start ourself, if we want parallel execution # Start ourself, if we want parallel execution
# #
if [ "${PARALLEL}" ]; then if [ "${PARALLEL}" ]; then
if [ "${SIMULATE}" ]
then
# give some time to awk's srand initialized by curr time
sleep 1
fi
if [ ! "${MAX_JOBS}" ]
then
# run all in parallel
"$0" "${INTERVAL}" "${name}" & "$0" "${INTERVAL}" "${name}" &
continue continue
elif [ "${prestart}" -gt 0 ]
then
# run prestart child if pending
{ "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } &
prestart=$((${prestart} - 1))
continue
else
# each time a child finishes we get a line from the pipe
# and then launch another child
while read line
do
{ "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } &
# get out of loop so we can contnue with main loop
# for next source
break
done <&5
continue
fi
fi fi
# #
@ -366,9 +451,20 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
# with error message. # with error message.
# #
lock "${name}" lock "${name}"
if [ "${SIMULATE}" ]
then
rand_low=3
rand_high=10
sleep_time=$(echo '' | awk "{srand(); print int(rand()*($rand_high - $rand_low)) + $rand_low;}")
_techo "simulating backup for ${name}: ${sleep_time} secs ..."
sleep $sleep_time
unlock "${name}"
_techo "Finished backup."
break
fi
# redefine trap to also unlock (rm lockfile) # redefine trap to also unlock (rm lockfile)
TRAPFUNC="${TRAPFUNC} && unlock \"${name}\"" TRAPFUNC="${TRAPFUNC}; unlock \"${name}\""
trap "${TRAPFUNC}" 1 2 15 trap "${TRAPFUNC}" 1 2 15
# #
@ -620,15 +716,19 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
seconds="$((${full_seconds} % 60))" seconds="$((${full_seconds} % 60))"
_techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)" _techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)"
unlock "${name}"
) | add_name ) | add_name
done done
# #
# Be a good parent and wait for our children, if they are running wild parallel # Be a good parent and wait for our children, if they are running wild parallel
# After all children are finished then remove control pipe.
# #
if [ "${PARALLEL}" ]; then if [ "${PARALLEL}" ]; then
_techo "Waiting for children to complete..." _techo "Waiting for children to complete..."
wait wait
rm -f "${CONTROL_PIPE}"
fi fi
# #
@ -645,5 +745,4 @@ if [ -x "${CPOSTEXEC}" ]; then
fi fi
rm -f "${TMP}" rm -f "${TMP}"
unlock "${name}"
_techo "Finished" _techo "Finished"

View file

@ -1189,12 +1189,12 @@ rsync -av -H --delete /mnt/archiv/ "$DDIR/archiv/"
------------------------------------------------------------------------- -------------------------------------------------------------------------
Processes running when doing ccollect -p Processes running when doing ccollect -j
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Truncated output from `ps axuwwwf`: Truncated output from `ps axuwwwf`:
------------------------------------------------------------------------- -------------------------------------------------------------------------
S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -p ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011 S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -j ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011
S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034 S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034
S+ 11:40 0:00 | | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034 S+ 11:40 0:00 | | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034
R+ 11:40 23:40 | | | | | \_ rsync -a --delete --numeric-ids --relative --delete-excluded --link-dest=/home/server/backup/ddba034 R+ 11:40 23:40 | | | | | \_ rsync -a --delete --numeric-ids --relative --delete-excluded --link-dest=/home/server/backup/ddba034

View file

@ -1,3 +1,4 @@
* Introduce -j option for max parallel jobs, deprecate -p (Darko Poljak)
* Add locking (Darko Poljak) * Add locking (Darko Poljak)
* Fix source-is-up check (Nikita Koshikov) * Fix source-is-up check (Nikita Koshikov)
* Fix some minor command line parsing issues (Nico Schottelius) * Fix some minor command line parsing issues (Nico Schottelius)

View file

@ -26,14 +26,18 @@ texinfo or html).
OPTIONS OPTIONS
------- -------
-a, --all::
Backup all sources specified in /etc/ccollect/sources
-h, --help:: -h, --help::
Show the help screen Show the help screen
-p, --parallel:: -j [max], --jobs [max]::
Parallelise backup processes Specifies the number of jobs to run simultaneously.
If max is not specified then parallelise all jobs.
-a, --all:: -p, --parallel::
Backup all sources specified in /etc/ccollect/sources Parallelise backup processes (deprecated from 2.0)
-v, --verbose:: -v, --verbose::
Be very verbose (uses set -x) Be very verbose (uses set -x)