From 902a7d667ed41c2142083a0a64a6c0fe17c36d22 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 21 Jul 2016 12:15:37 +0200 Subject: [PATCH] Introduce -j option and deprecate -p option. --- ccollect | 115 +++++++++++++++++++++++++++++++++++++++--- doc/ccollect.text | 4 +- doc/changes/next | 1 + doc/man/ccollect.text | 12 +++-- 4 files changed, 118 insertions(+), 14 deletions(-) diff --git a/ccollect b/ccollect index 20498b2..de62aa6 100755 --- a/ccollect +++ b/ccollect @@ -20,6 +20,11 @@ # Initially written for SyGroup (www.sygroup.ch) # Date: Mon Nov 14 11:45:11 CET 2005 +# Simulate ccollect without actually performing any backup; +# conf directory need to be specifed. +# Usually used for debugging/testing locking and parallel execution. +SIMULATE="1" + # Error upon expanding unset variables: set -u @@ -41,6 +46,8 @@ CPOSTEXEC="${CDEFAULTS}/post_exec" CMARKER=".ccollect-marker" export TMP="$(mktemp "/tmp/${__myname}.XXXXXX")" +CONTROL_PIPE="/tmp/${__myname}-control-pipe" + VERSION="1.0" RELEASE="2015-10-10" HALF_VERSION="ccollect ${VERSION}" @@ -59,7 +66,10 @@ SDATE="date +%s" # Locking is done for each source so that only one instance per source # can run. # -LOCKDIR="${CSOURCES}" +# Use CCOLLECT_CONF directory for lock files. +# This directory can be set arbitrary so it is writable for user +# executing ccollect. +LOCKDIR="${CCOLLECT_CONF}" # printf pattern: ccollect_.lock LOCKFILE_PATTERN="ccollect_%s.lock" LOCKFD=4 @@ -119,6 +129,7 @@ fi # unset values # PARALLEL="" +MAX_JOBS="" USE_ALL="" # @@ -185,9 +196,11 @@ ${__myname}: [args] ccollect creates (pseudo) incremental backups - -h, --help: Show this help screen - -a, --all: Backup all sources specified in ${CSOURCES} - -p, --parallel: Parallelise backup processes + -h, --help: Show this help screen + -a, --all: Backup all sources specified in ${CSOURCES} + -j [max], --jobs [max] Specifies the number of jobs to run simultaneously. + If max is not specified then parallelise all jobs. + -p, --parallel: Parallelise backup processes (deprecated from 2.0) -v, --verbose: Be very verbose (uses set -x) -V, --version: Print version information @@ -219,7 +232,24 @@ while [ "$#" -ge 1 ]; do USE_ALL=1 ;; -p|--parallel) + _techo "Warning: -p, --parallel option is deprecated," \ + "use -j, --jobs instead." PARALLEL=1 + MAX_JOBS="" + ;; + -j|--jobs) + PARALLEL=1 + if [ "$#" -ge 2 ] + then + case "$2" in + -*) + ;; + *) + MAX_JOBS=$2 + shift + ;; + esac + fi ;; -v|--verbose) set -x @@ -242,6 +272,14 @@ while [ "$#" -ge 1 ]; do shift done +# check that MAX_JOBS is natural number > 0 +# empty string means run all in parallel +echo "${MAX_JOBS}" | awk '/^$/ { exit 0 } /^[1-9][0-9]*$/ { exit 0 } { exit 1 }' +if [ "$?" -ne 0 ] +then + _exit_err "Invalid max jobs value \"${MAX_JOBS}\"" +fi + # # Setup interval # @@ -310,6 +348,26 @@ fi # # Let's do the backup - here begins the real stuff # + +# in PARALLEL mode: +# * create control pipe +# * determine number of jobs to start at once +if [ "${PARALLEL}" ]; then + mkfifo "${CONTROL_PIPE}" + # fd 5 is tied to control pipe + eval "exec 5<>${CONTROL_PIPE}" + TRAPFUNC="${TRAPFUNC}; rm -f \"${CONTROL_PIPE}\"" + trap "${TRAPFUNC}" 0 1 2 15 + + # determine how much parallel jobs to prestart + if [ "${MAX_JOBS}" ] && [ "${MAX_JOBS}" -le "${no_sources}" ] + then + prestart="${MAX_JOBS}" + else + prestart=0 + fi +fi + source_no=0 while [ "${source_no}" -lt "${no_sources}" ]; do # @@ -322,8 +380,35 @@ while [ "${source_no}" -lt "${no_sources}" ]; do # Start ourself, if we want parallel execution # if [ "${PARALLEL}" ]; then - "$0" "${INTERVAL}" "${name}" & - continue + if [ "${SIMULATE}" ] + then + # give some time to awk's srand initialized by curr time + sleep 1 + fi + + if [ ! "${MAX_JOBS}" ] + then + # run all in parallel + "$0" "${INTERVAL}" "${name}" & + continue + elif [ "${prestart}" -gt 0 ] + then + # run prestart child if pending + { "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } & + prestart=$((${prestart} - 1)) + continue + else + # each time a child finishes we get a line from the pipe + # and then launch another child + while read line + do + { "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } & + # get out of loop so we can contnue with main loop + # for next source + break + done <&5 + continue + fi fi # @@ -366,9 +451,20 @@ while [ "${source_no}" -lt "${no_sources}" ]; do # with error message. # lock "${name}" + if [ "${SIMULATE}" ] + then + rand_low=3 + rand_high=10 + sleep_time=$(echo '' | awk "{srand(); print int(rand()*($rand_high - $rand_low)) + $rand_low;}") + _techo "simulating backup for ${name}: ${sleep_time} secs ..." + sleep $sleep_time + unlock "${name}" + _techo "Finished backup." + break + fi # redefine trap to also unlock (rm lockfile) - TRAPFUNC="${TRAPFUNC} && unlock \"${name}\"" + TRAPFUNC="${TRAPFUNC}; unlock \"${name}\"" trap "${TRAPFUNC}" 1 2 15 # @@ -620,15 +716,19 @@ while [ "${source_no}" -lt "${no_sources}" ]; do seconds="$((${full_seconds} % 60))" _techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)" + + unlock "${name}" ) | add_name done # # Be a good parent and wait for our children, if they are running wild parallel +# After all children are finished then remove control pipe. # if [ "${PARALLEL}" ]; then _techo "Waiting for children to complete..." wait + rm -f "${CONTROL_PIPE}" fi # @@ -645,5 +745,4 @@ if [ -x "${CPOSTEXEC}" ]; then fi rm -f "${TMP}" -unlock "${name}" _techo "Finished" diff --git a/doc/ccollect.text b/doc/ccollect.text index 4c54191..474c491 100644 --- a/doc/ccollect.text +++ b/doc/ccollect.text @@ -1189,12 +1189,12 @@ rsync -av -H --delete /mnt/archiv/ "$DDIR/archiv/" ------------------------------------------------------------------------- -Processes running when doing ccollect -p +Processes running when doing ccollect -j ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Truncated output from `ps axuwwwf`: ------------------------------------------------------------------------- - S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -p ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011 + S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily -j ddba034 ddba045 ddba046 ddba047 ddba049 ddna010 ddna011 S+ 11:40 0:00 | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034 S+ 11:40 0:00 | | | | \_ /bin/sh /usr/local/bin/ccollect.sh daily ddba034 R+ 11:40 23:40 | | | | | \_ rsync -a --delete --numeric-ids --relative --delete-excluded --link-dest=/home/server/backup/ddba034 diff --git a/doc/changes/next b/doc/changes/next index 05c1a37..2fee139 100644 --- a/doc/changes/next +++ b/doc/changes/next @@ -1,3 +1,4 @@ + * Introduce -j option for max parallel jobs, deprecate -p (Darko Poljak) * Add locking (Darko Poljak) * Fix source-is-up check (Nikita Koshikov) * Fix some minor command line parsing issues (Nico Schottelius) diff --git a/doc/man/ccollect.text b/doc/man/ccollect.text index 84c1e37..9fc9086 100644 --- a/doc/man/ccollect.text +++ b/doc/man/ccollect.text @@ -26,14 +26,18 @@ texinfo or html). OPTIONS ------- +-a, --all:: + Backup all sources specified in /etc/ccollect/sources + -h, --help:: Show the help screen --p, --parallel:: - Parallelise backup processes +-j [max], --jobs [max]:: + Specifies the number of jobs to run simultaneously. + If max is not specified then parallelise all jobs. --a, --all:: - Backup all sources specified in /etc/ccollect/sources +-p, --parallel:: + Parallelise backup processes (deprecated from 2.0) -v, --verbose:: Be very verbose (uses set -x)