#!/bin/sh # # 2005-2013 Nico Schottelius (nico-ccollect at schottelius.org) # 2016-2019 Darko Poljak (darko.poljak at gmail.com) # # This file is part of ccollect. # # ccollect is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # ccollect is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ccollect. If not, see . # # Initially written for SyGroup (www.sygroup.ch) # Date: Mon Nov 14 11:45:11 CET 2005 # Error upon expanding unset variables: set -u # # Standard variables (stolen from cconf) # __mydir="${0%/*}" __abs_mydir="$(cd "$__mydir" && pwd -P)" __myname=${0##*/} # # where to find our configuration and temporary file # CCOLLECT_CONF="${CCOLLECT_CONF:-/etc/ccollect}" CSOURCES="${CCOLLECT_CONF}/sources" CDEFAULTS="${CCOLLECT_CONF}/defaults" CPREEXEC="${CDEFAULTS}/pre_exec" CPOSTEXEC="${CDEFAULTS}/post_exec" CMARKER=".ccollect-marker" TMP="$(mktemp "/tmp/${__myname}.XXXXXX")" export TMP CONTROL_PIPE="/tmp/${__myname}-control-pipe" VERSION="2.9" RELEASE="2020-05-25" HALF_VERSION="ccollect ${VERSION}" FULL_VERSION="ccollect ${VERSION} (${RELEASE})" # # CDATE: how we use it for naming of the archives # DDATE: how the user should see it in our output (DISPLAY) # CDATE="date +%Y%m%d-%H%M" DDATE="date +%Y-%m-%d-%H:%M:%S" SDATE="date +%s" # # LOCKING: use flock if available, otherwise mkdir # Locking is done for each source so that only one instance per source # can run. # # Use CCOLLECT_CONF directory for lock files. # This directory can be set arbitrary so it is writable for user # executing ccollect. LOCKDIR="${CCOLLECT_CONF}" # printf pattern: ccollect_.lock LOCKFILE_PATTERN="ccollect_%s.lock" LOCKFD=4 # # locking functions using flock # lock_flock() { # $1 = source to backup # shellcheck disable=SC2059 lockfile="${LOCKDIR}/$(printf "${LOCKFILE_PATTERN}" "$1")" eval "exec ${LOCKFD}> '${lockfile}'" flock -n ${LOCKFD} && return 0 || return 1 } unlock_flock() { # $1 = source to backup # shellcheck disable=SC2059 lockfile="${LOCKDIR}/$(printf "${LOCKFILE_PATTERN}" "$1")" eval "exec ${LOCKFD}>&-" rm -f "${lockfile}" } # # locking functions using mkdir (mkdir is atomic) # lock_mkdir() { # $1 = source to backup # shellcheck disable=SC2059 lockfile="${LOCKDIR}/$(printf "${LOCKFILE_PATTERN}" "$1")" mkdir "${lockfile}" && return 0 || return 1 } unlock_mkdir() { # $1 = source to backup # shellcheck disable=SC2059 lockfile="${LOCKDIR}/$(printf "${LOCKFILE_PATTERN}" "$1")" rmdir "${lockfile}" } # # determine locking tool: flock or mkdir # if command -v flock > /dev/null 2>&1 then lockf="lock_flock" unlockf="unlock_flock" else lockf="lock_mkdir" unlockf="unlock_mkdir" fi # # unset values # PARALLEL="" MAX_JOBS="" USE_ALL="" LOGFILE="" SYSLOG="" # e - only errors, a - all output LOGLEVEL="a" LOGONLYERRORS="" # # catch signals # TRAPFUNC="rm -f \"${TMP}\"" # shellcheck disable=SC2064 trap "${TRAPFUNC}" 1 2 15 # # Functions # # check if we are running interactive or non-interactive # see: http://www.tldp.org/LDP/abs/html/intandnonint.html _is_interactive() { [ -t 0 ] || [ -p /dev/stdin ] } # # ssh-"feature": we cannot do '... read ...; ssh ...; < file', # because ssh reads stdin! -n does not work -> does not ask for password # Also allow deletion for files without the given suffix # delete_from_file() { file="$1"; shift suffix="" # It will be set, if deleting incomplete backups. [ $# -eq 1 ] && suffix="$1" && shift # dirs for deletion will be moved to this trash dir inside destination dir # - for fast mv operation trash="$(mktemp -d ".trash.XXXXXX")" while read -r to_remove; do mv "${to_remove}" "${trash}" || _exit_err "Moving ${to_remove} to ${trash} failed." set -- "$@" "${to_remove}" if [ "${suffix}" ]; then to_remove_no_suffix="$(echo "${to_remove}" | sed "s/$suffix\$//")" mv "${to_remove_no_suffix}" "${trash}" || _exit_err "Moving ${to_remove_no_suffix} to ${trash} failed." set -- "$@" "${to_remove_no_suffix}" fi done < "${file}" _techo "Removing $* in ${trash}..." empty_dir=".empty-dir" mkdir "${empty_dir}" || _exit_err "Empty directory ${empty_dir} cannot be created." [ "${VVERBOSE}" ] && echo "Starting: rsync -a --delete ${empty_dir} ${trash}" # rsync needs ending slash for directory content rsync -a --delete "${empty_dir}/" "${trash}/" || _exit_err "Removing $* failed." rmdir "${trash}" || _exit_err "Removing ${trash} directory failed" rmdir "${empty_dir}" || _exit_err "Removing ${empty_dir} directory failed" _techo "Removing $* in ${trash} finished." } display_version() { echo "${FULL_VERSION}" exit 0 } usage() { cat << eof ${__myname}: [args] ccollect creates (pseudo) incremental backups -h, --help: Show this help screen -a, --all: Backup all sources specified in ${CSOURCES} -e, --errors: Log only errors -j [max], --jobs [max] Specifies the number of jobs to run simultaneously. If max is not specified then parallelise all jobs. -l FILE, --logfile FILE Log to specified file -p, --parallel: Parallelise backup processes (deprecated from 2.0) -s, --syslog: Log to syslog with tag ccollect -v, --verbose: Be very verbose (uses set -x) -V, --version: Print version information This is version ${VERSION} released on ${RELEASE}. Retrieve latest ccollect at http://www.nico.schottelius.org/software/ccollect/ eof exit 0 } # locking functions lock() { "${lockf}" "$@" || _exit_err \ "Only one instance of ${__myname} for source \"$1\" can run at one time." } unlock() { "${unlockf}" "$@" } # time displaying echo # stdout version _techo_stdout() { echo "$(${DDATE}): $*" } # syslog version _techo_syslog() { logger -t ccollect "$@" } # specified file version _techo_file() { _techo_stdout "$@" >> "${LOGFILE}" } # determine _techo version before parsing options if _is_interactive then _techof="_techo_stdout" else _techof="_techo_syslog" fi # _techo with determined _techo version _techo() { if [ "${LOGLEVEL}" = "a" ] then # name is exported before calling this function # shellcheck disable=SC2154 set -- ${name:+"[${name}]"} "$@" "${_techof}" "$@" fi } _techo_err() { _techo "Error: $*" } _exit_err() { _techo_err "$@" rm -f "${TMP}" exit 1 } # # Parse options # while [ "$#" -ge 1 ]; do case "$1" in -a|--all) USE_ALL=1 ;; -p|--parallel) _techo "Warning: -p, --parallel option is deprecated," \ "use -j, --jobs instead." PARALLEL=1 MAX_JOBS="" ;; -j|--jobs) PARALLEL=1 if [ "$#" -ge 2 ] then case "$2" in -*) ;; *) MAX_JOBS=$2 shift ;; esac fi ;; -e|--errors) LOGONLYERRORS="1" ;; -l|--logfile) if [ "$#" -ge 2 ] then case "$2" in -*) _exit_err "Missing log file" ;; *) LOGFILE="$2" shift ;; esac else _exit_err "Missing log file" fi ;; -s|--syslog) SYSLOG="1" ;; -v|--verbose) set -x ;; -V|--version) display_version ;; --) # ignore the -- itself shift break ;; -h|--help|-*) usage ;; *) break ;; esac shift done # determine _techo version and logging level after parsing options if [ "${LOGFILE}" ] then _techof="_techo_file" LOGLEVEL="a" elif _is_interactive then if [ "${SYSLOG}" ] then _techof="_techo_syslog" LOGLEVEL="a" else _techof="_techo_stdout" LOGLEVEL="e" fi else _techof="_techo_syslog" LOGLEVEL="a" fi if [ "${LOGFILE}" ] || [ "${SYSLOG}" ] then if [ "${LOGONLYERRORS}" ] then LOGLEVEL="e" fi fi # check that MAX_JOBS is natural number > 0 # empty string means run all in parallel if ! echo "${MAX_JOBS}" | grep -q -E '^[1-9][0-9]*$|^$' then _exit_err "Invalid max jobs value \"${MAX_JOBS}\"" fi # # Setup interval # if [ $# -ge 1 ]; then export INTERVAL="$1" shift else usage fi # # Check for configuraton directory # [ -d "${CCOLLECT_CONF}" ] || _exit_err "No configuration found in " \ "\"${CCOLLECT_CONF}\" (is \$CCOLLECT_CONF properly set?)" # # Create (portable!) source "array" # export no_sources=0 if [ "${USE_ALL}" = 1 ]; then # # Get sources from source configuration # ( cd "${CSOURCES}" && ls -1 > "${TMP}" ) || \ _exit_err "Listing of sources failed. Aborting." while read -r tmp; do eval export "source_${no_sources}=\"${tmp}\"" no_sources=$((no_sources + 1)) done < "${TMP}" else # # Get sources from command line # while [ "$#" -ge 1 ]; do eval "arg=\"\$1\"" shift # arg is assigned in the eval above # shellcheck disable=SC2154 eval export "source_${no_sources}=\"${arg}\"" no_sources="$((no_sources + 1))" done fi # # Need at least ONE source to backup # if [ "${no_sources}" -lt 1 ]; then usage else _techo "${HALF_VERSION}: Beginning backup using interval ${INTERVAL}" fi # # Look for pre-exec command (general) # if [ -x "${CPREEXEC}" ]; then _techo "Executing ${CPREEXEC} ..." "${CPREEXEC}"; ret=$? _techo "Finished ${CPREEXEC} (return code: ${ret})." [ "${ret}" -eq 0 ] || _exit_err "${CPREEXEC} failed. Aborting" fi ################################################################################ # # Let's do the backup - here begins the real stuff # # in PARALLEL mode: # * create control pipe # * determine number of jobs to start at once if [ "${PARALLEL}" ]; then mkfifo "${CONTROL_PIPE}" # fd 5 is tied to control pipe eval "exec 5<>'${CONTROL_PIPE}'" TRAPFUNC="${TRAPFUNC}; rm -f \"${CONTROL_PIPE}\"" # shellcheck disable=SC2064 trap "${TRAPFUNC}" 0 1 2 15 # determine how much parallel jobs to prestart if [ "${MAX_JOBS}" ] then if [ "${MAX_JOBS}" -le "${no_sources}" ] then prestart="${MAX_JOBS}" else prestart="${no_sources}" fi else prestart=0 fi fi source_no=0 while [ "${source_no}" -lt "${no_sources}" ]; do # # Get current source # eval export name=\"\$source_${source_no}\" source_no=$((source_no + 1)) # # Start ourself, if we want parallel execution # if [ "${PARALLEL}" ]; then if [ ! "${MAX_JOBS}" ] then # run all in parallel "$0" "${INTERVAL}" "${name}" & continue elif [ "${prestart}" -gt 0 ] then # run prestart child if pending { "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } & prestart=$((prestart - 1)) continue else # each time a child finishes we get a line from the pipe # and then launch another child while read -r line do { "$0" "${INTERVAL}" "${name}"; printf '\n' >&5; } & # get out of loop so we can contnue with main loop # for next source break done <&5 continue fi fi # # Start subshell for easy log editing # ( backup="${CSOURCES}/${name}" c_source="${backup}/source" c_dest="${backup}/destination" c_pre_exec="${backup}/pre_exec" c_post_exec="${backup}/post_exec" # # Stderr to stdout, so we can produce nice logs # exec 2>&1 # # Record start of backup: internal and for the user # begin_s="$(${SDATE})" _techo "Beginning to backup" # # Standard configuration checks # if [ ! -e "${backup}" ]; then _exit_err "Source \"${backup}\" does not exist." fi # # Configuration _must_ be a directory (cconfig style) # if [ ! -d "${backup}" ]; then _exit_err "\"${backup}\" is not a cconfig-directory. Skipping." fi # # Acquire lock for source. If lock cannot be acquired, lock will exit # with error message. # lock "${name}" # redefine trap to also unlock (rm lockfile) TRAPFUNC="${TRAPFUNC}; unlock \"${name}\"" # shellcheck disable=SC2064 trap "${TRAPFUNC}" 1 2 15 # # First execute pre_exec, which may generate destination or other parameters # if [ -x "${c_pre_exec}" ]; then _techo "Executing ${c_pre_exec} ..." "${c_pre_exec}"; ret="$?" _techo "Finished ${c_pre_exec} (return code ${ret})." [ "${ret}" -eq 0 ] || _exit_err "${c_pre_exec} failed. Skipping." fi # # Read source configuration # for opt in verbose very_verbose summary exclude rsync_options \ delete_incomplete rsync_failure_codes \ mtime quiet_if_down ; do if [ -f "${backup}/${opt}" ] || [ -f "${backup}/no_${opt}" ]; then eval "c_$opt=\"${backup}/$opt\"" else eval "c_$opt=\"${CDEFAULTS}/$opt\"" fi done # # Interval definition: First try source specific, fallback to default # c_interval="$(cat "${backup}/intervals/${INTERVAL}" 2>/dev/null)" if [ -z "${c_interval}" ]; then c_interval="$(cat "${CDEFAULTS}/intervals/${INTERVAL}" 2>/dev/null)" if [ -z "${c_interval}" ]; then _exit_err "No definition for interval \"${INTERVAL}\" found. Skipping." fi fi # # Sort by ctime (default) or mtime (configuration option) # # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_mtime}" ] ; then TSORT="t" else TSORT="tc" fi # # Source configuration checks # if [ ! -f "${c_source}" ]; then _exit_err "Source description \"${c_source}\" is not a file. Skipping." else source=$(cat "${c_source}"); ret="$?" if [ "${ret}" -ne 0 ]; then _exit_err "Source ${c_source} is not readable. Skipping." fi fi # # Destination is a path # if [ ! -f "${c_dest}" ]; then _exit_err "Destination ${c_dest} is not a file. Skipping." else ddir="$(cat "${c_dest}")"; ret="$?" if [ "${ret}" -ne 0 ]; then _exit_err "Destination ${c_dest} is not readable. Skipping." fi fi # # Parameters: ccollect defaults, configuration options, user options # # # Rsync standard options (archive will be added after is-up-check) # set -- "$@" "--delete" "--numeric-ids" "--relative" \ "--delete-excluded" "--sparse" # # Exclude list # # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_exclude}" ]; then set -- "$@" "--exclude-from=${c_exclude}" fi # # Output a summary # # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_summary}" ]; then set -- "$@" "--stats" fi # # Verbosity for rsync, rm, and mkdir # VVERBOSE="" # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_very_verbose}" ]; then set -- "$@" "-vv" VVERBOSE="-v" elif [ -f "${c_verbose}" ]; then set -- "$@" "-v" fi # # Extra options for rsync provided by the user # # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_rsync_options}" ]; then while read -r line; do # Trim line. ln=$(echo "${line}" | awk '{$1=$1;print;}') # Only if ln is non zero length string. # # If ln is empty then rsync '' DEST evaluates # to transfer current directory to DEST which would # with specific options destroy DEST content. if [ -n "${ln}" ] then set -- "$@" "${ln}" fi done < "${c_rsync_options}" fi # # Check: source is up and accepting connections (before deleting old backups!) # if ! rsync "$@" "${source}" >/dev/null 2>"${TMP}" ; then # variable is assigned using eval # shellcheck disable=SC2154 if [ ! -f "${c_quiet_if_down}" ]; then cat "${TMP}" fi _exit_err "Source ${source} is not readable. Skipping." fi # # Add --archive for real backup (looks nice in front) # set -- "--archive" "$@" # # Check: destination exists? # cd "${ddir}" || _exit_err "Cannot change to ${ddir}. Skipping." # # Check incomplete backups (needs echo to remove newlines) # # shellcheck disable=SC2010 ls -1 | grep "${CMARKER}\$" > "${TMP}"; ret=$? if [ "$ret" -eq 0 ]; then _techo "Incomplete backups: $(cat "${TMP}")" # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "${c_delete_incomplete}" ]; then delete_from_file "${TMP}" "${CMARKER}" & fi fi # # Include current time in name, not the time when we began to remove above # destination_name="${INTERVAL}.$(${CDATE}).$$-${source_no}" export destination_name destination_dir="${ddir}/${destination_name}" export destination_dir # # Check: maximum number of backups is reached? # # shellcheck disable=SC2010 count="$(ls -1 | grep -c "^${INTERVAL}\\.")" _techo "Existing backups: ${count} Total keeping backups: ${c_interval}" if [ "${count}" -ge "${c_interval}" ]; then # Use oldest directory as new backup destination directory. # It need not to be deleted, rsync will sync its content. # shellcheck disable=SC2010 oldest_bak=$(ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n 1 || \ _exit_err "Listing oldest backup failed") _techo "Using ${oldest_bak} for destination dir ${destination_dir}" if mv "${oldest_bak}" "${destination_dir}"; then # Touch dest dir so it is not sorted wrong in listings below. ls_rm_exclude=$(basename "${destination_dir}") # We have something to remove only if count > interval. remove="$((count - c_interval))" else _techo_err "Renaming oldest backup ${oldest_bak} to ${destination_dir} failed, removing it." remove="$((count - c_interval + 1))" ls_rm_exclude="" fi if [ "${remove}" -gt 0 ]; then _techo "Removing ${remove} backup(s)..." if [ -z "${ls_rm_exclude}" ]; then # shellcheck disable=SC2010 ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ _exit_err "Listing old backups failed" else # shellcheck disable=SC2010 ls -${TSORT}1r | grep -v "${ls_rm_exclude}" | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ _exit_err "Listing old backups failed" fi delete_from_file "${TMP}" & fi fi # # Check for backup directory to clone from: Always clone from the latest one! # Exclude destination_dir from listing, it can be touched reused and renamed # oldest existing destination directory. # dest_dir_name=$(basename "${destination_dir}") # shellcheck disable=SC2010 last_dir="$(ls -${TSORT}p1 | grep '/$' | grep -v "${dest_dir_name}" | head -n 1)" || \ _exit_err "Failed to list contents of ${ddir}." # # Clone from old backup, if existing # if [ "${last_dir}" ]; then set -- "$@" "--link-dest=${ddir}/${last_dir}" _techo "Hard linking from ${last_dir}" fi # # Mark backup running and go back to original directory # touch "${destination_dir}${CMARKER}" cd "${__abs_mydir}" || _exit_err "Cannot go back to ${__abs_mydir}." # # the rsync part # _techo "Transferring files..." rsync "$@" "${source}" "${destination_dir}"; ret=$? _techo "Finished backup (rsync return code: $ret)." # # export rsync return code, might be useful in post_exec # export rsync_return_code=$ret # # Set modification time (mtime) to current time, if sorting by mtime is enabled # [ -f "$c_mtime" ] && touch "${destination_dir}" # # Check if rsync exit code indicates failure. # fail="" # variable is assigned using eval # shellcheck disable=SC2154 if [ -f "$c_rsync_failure_codes" ]; then while read -r code ; do if [ "$ret" = "$code" ]; then fail=1 fi done <"${c_rsync_failure_codes}" fi # # Remove marking here unless rsync failed. # if [ -z "$fail" ]; then rm "${destination_dir}${CMARKER}" || \ _exit_err "Removing ${destination_dir}${CMARKER} failed." if [ "${ret}" -ne 0 ]; then _techo "Warning: rsync exited non-zero, the backup may be broken (see rsync errors)." fi else _techo "Warning: rsync failed with return code $ret." fi # # Create symlink to newest backup # # shellcheck disable=SC2010 latest_dir="$(ls -${TSORT}p1 "${ddir}" | grep '/$' | head -n 1)" || \ _exit_err "Failed to list content of ${ddir}." ln -snf "${ddir}${latest_dir}" "${ddir}current" || \ _exit_err "Failed to create 'current' symlink." # # post_exec # if [ -x "${c_post_exec}" ]; then _techo "Executing ${c_post_exec} ..." "${c_post_exec}"; ret=$? _techo "Finished ${c_post_exec}." if [ "${ret}" -ne 0 ]; then _exit_err "${c_post_exec} failed." fi fi # # Time calculation # end_s="$(${SDATE})" full_seconds="$((end_s - begin_s))" hours="$((full_seconds / 3600))" minutes="$(((full_seconds % 3600) / 60))" seconds="$((full_seconds % 60))" _techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)" unlock "${name}" # wait for children (doing delete_from_file) if any still running wait ) || exit done # # Be a good parent and wait for our children, if they are running wild parallel # After all children are finished then remove control pipe. # if [ "${PARALLEL}" ]; then _techo "Waiting for children to complete..." wait rm -f "${CONTROL_PIPE}" fi # # Look for post-exec command (general) # if [ -x "${CPOSTEXEC}" ]; then _techo "Executing ${CPOSTEXEC} ..." "${CPOSTEXEC}"; ret=$? _techo "Finished ${CPOSTEXEC} (return code: ${ret})." if [ "${ret}" -ne 0 ]; then _techo "${CPOSTEXEC} failed." fi fi rm -f "${TMP}" _techo "Finished"