Introduce -j option and deprecate -p option.

This commit is contained in:
Darko Poljak 2016-07-21 12:15:37 +02:00
parent 8fbb7ddf27
commit 902a7d667e
4 changed files with 118 additions and 14 deletions

115
ccollect
View file

@ -20,6 +20,11 @@
# Initially written for SyGroup (www.sygroup.ch)
# Date: Mon Nov 14 11:45:11 CET 2005
# Simulate ccollect without actually performing any backup;
# conf directory need to be specifed.
# Usually used for debugging/testing locking and parallel execution.
SIMULATE="1"
# Error upon expanding unset variables:
set -u
@ -41,6 +46,8 @@ CPOSTEXEC="${CDEFAULTS}/post_exec"
CMARKER=".ccollect-marker"
export TMP="$(mktemp "/tmp/${__myname}.XXXXXX")"
CONTROL_PIPE="/tmp/${__myname}-control-pipe"
VERSION="1.0"
RELEASE="2015-10-10"
HALF_VERSION="ccollect ${VERSION}"
@ -59,7 +66,10 @@ SDATE="date +%s"
# Locking is done for each source so that only one instance per source
# can run.
#
LOCKDIR="${CSOURCES}"
# Use CCOLLECT_CONF directory for lock files.
# This directory can be set arbitrary so it is writable for user
# executing ccollect.
LOCKDIR="${CCOLLECT_CONF}"
# printf pattern: ccollect_<source>.lock
LOCKFILE_PATTERN="ccollect_%s.lock"
LOCKFD=4
@ -119,6 +129,7 @@ fi
# unset values
#
PARALLEL=""
MAX_JOBS=""
USE_ALL=""
#
@ -185,9 +196,11 @@ ${__myname}: [args] <interval name> <sources to backup>
ccollect creates (pseudo) incremental backups
-h, --help: Show this help screen
-a, --all: Backup all sources specified in ${CSOURCES}
-p, --parallel: Parallelise backup processes
-h, --help: Show this help screen
-a, --all: Backup all sources specified in ${CSOURCES}
-j [max], --jobs [max] Specifies the number of jobs to run simultaneously.
If max is not specified then parallelise all jobs.
-p, --parallel: Parallelise backup processes (deprecated from 2.0)
-v, --verbose: Be very verbose (uses set -x)
-V, --version: Print version information
@ -219,7 +232,24 @@ while [ "$#" -ge 1 ]; do
USE_ALL=1
;;
-p|--parallel)
_techo "Warning: -p, --parallel option is deprecated," \
"use -j, --jobs instead."
PARALLEL=1
MAX_JOBS=""
;;
-j|--jobs)
PARALLEL=1
if [ "$#" -ge 2 ]
then
case "$2" in
-*)
;;
*)
MAX_JOBS=$2
shift
;;
esac
fi
;;
-v|--verbose)
set -x
@ -242,6 +272,14 @@ while [ "$#" -ge 1 ]; do
shift
done
# check that MAX_JOBS is natural number > 0
# empty string means run all in parallel
echo "${MAX_JOBS}" | awk '/^$/ { exit 0 } /^[1-9][0-9]*$/ { exit 0 } { exit 1 }'
if [ "$?" -ne 0 ]
then
_exit_err "Invalid max jobs value \"${MAX_JOBS}\""
fi
#
# Setup interval
#
@ -310,6 +348,26 @@ fi
#
# Let's do the backup - here begins the real stuff
#
# in PARALLEL mode:
# * create control pipe
# * determine number of jobs to start at once
if [ "${PARALLEL}" ]; then
mkfifo "${CONTROL_PIPE}"
# fd 5 is tied to control pipe
eval "exec 5<>${CONTROL_PIPE}"
TRAPFUNC="${TRAPFUNC}; rm -f \"${CONTROL_PIPE}\""
trap "${TRAPFUNC}" 0 1 2 15
# determine how much parallel jobs to prestart
if [ "${MAX_JOBS}" ] && [ "${MAX_JOBS}" -le "${no_sources}" ]
then
prestart="${MAX_JOBS}"
else
prestart=0
fi
fi
source_no=0
while [ "${source_no}" -lt "${no_sources}" ]; do
#
@ -322,8 +380,35 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
# Start ourself, if we want parallel execution
#
if [ "${PARALLEL}" ]; then
"$0" "${INTERVAL}" "${name}" &
continue
if [ "${SIMULATE}" ]
then
# give some time to awk's srand initialized by curr time
sleep 1
fi
if [ ! "${MAX_JOBS}" ]
then
# run all in parallel
"$0" "${INTERVAL}" "${name}" &
continue
elif [ "${prestart}" -gt 0 ]
then
# run prestart child if pending
{ "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } &
prestart=$((${prestart} - 1))
continue
else
# each time a child finishes we get a line from the pipe
# and then launch another child
while read line
do
{ "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } &
# get out of loop so we can contnue with main loop
# for next source
break
done <&5
continue
fi
fi
#
@ -366,9 +451,20 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
# with error message.
#
lock "${name}"
if [ "${SIMULATE}" ]
then
rand_low=3
rand_high=10
sleep_time=$(echo '' | awk "{srand(); print int(rand()*($rand_high - $rand_low)) + $rand_low;}")
_techo "simulating backup for ${name}: ${sleep_time} secs ..."
sleep $sleep_time
unlock "${name}"
_techo "Finished backup."
break
fi
# redefine trap to also unlock (rm lockfile)
TRAPFUNC="${TRAPFUNC} && unlock \"${name}\""
TRAPFUNC="${TRAPFUNC}; unlock \"${name}\""
trap "${TRAPFUNC}" 1 2 15
#
@ -620,15 +716,19 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
seconds="$((${full_seconds} % 60))"
_techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)"
unlock "${name}"
) | add_name
done
#
# Be a good parent and wait for our children, if they are running wild parallel
# After all children are finished then remove control pipe.
#
if [ "${PARALLEL}" ]; then
_techo "Waiting for children to complete..."
wait
rm -f "${CONTROL_PIPE}"
fi
#
@ -645,5 +745,4 @@ if [ -x "${CPOSTEXEC}" ]; then
fi
rm -f "${TMP}"
unlock "${name}"
_techo "Finished"

View file

@ -1189,12 +1189,12 @@ rsync -av -H --delete /mnt/archiv/ "$DDIR/archiv/"
-------------------------------------------------------------------------
Processes running when doing ccollect -p
Processes running when doing ccollect -j
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Truncated output from `ps axuwwwf`:
-------------------------------------------------------------------------
S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -p ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011
S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -j ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011
S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034
S+ 11:40 0:00 | | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034
R+ 11:40 23:40 | | | | | \_ rsync -a --delete --numeric-ids --relative --delete-excluded --link-dest=/home/server/backup/ddba034

View file

@ -1,3 +1,4 @@
* Introduce -j option for max parallel jobs, deprecate -p (Darko Poljak)
* Add locking (Darko Poljak)
* Fix source-is-up check (Nikita Koshikov)
* Fix some minor command line parsing issues (Nico Schottelius)

View file

@ -26,14 +26,18 @@ texinfo or html).
OPTIONS
-------
-a, --all::
Backup all sources specified in /etc/ccollect/sources
-h, --help::
Show the help screen
-p, --parallel::
Parallelise backup processes
-j [max], --jobs [max]::
Specifies the number of jobs to run simultaneously.
If max is not specified then parallelise all jobs.
-a, --all::
Backup all sources specified in /etc/ccollect/sources
-p, --parallel::
Parallelise backup processes (deprecated from 2.0)
-v, --verbose::
Be very verbose (uses set -x)