ccollect/ccollect.sh

600 lines
14 KiB
Bash
Raw Normal View History

2005-11-17 11:33:49 +00:00
#!/bin/sh
#
# 2005-2009 Nico Schottelius (nico-ccollect at schottelius.org)
#
2008-03-07 22:03:55 +00:00
# This file is part of ccollect.
#
# ccollect is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
2008-03-07 22:03:55 +00:00
# ccollect is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2008-03-07 22:03:55 +00:00
# You should have received a copy of the GNU General Public License
# along with ccollect. If not, see <http://www.gnu.org/licenses/>.
#
2007-08-14 12:33:22 +00:00
# Initially written for SyGroup (www.sygroup.ch)
2005-11-17 11:33:49 +00:00
# Date: Mon Nov 14 11:45:11 CET 2005
# Error upon expanding unset variables:
set -u
#
# Standard variables (stolen from cconf)
#
__pwd="$(pwd -P)"
__mydir="${0%/*}"; __abs_mydir="$(cd "$__mydir" && pwd -P)"
__myname=${0##*/}; __abs_myname="$__abs_mydir/$__myname"
#
2005-12-06 14:55:07 +00:00
# where to find our configuration and temporary file
#
CCOLLECT_CONF="${CCOLLECT_CONF:-/etc/ccollect}"
CSOURCES="${CCOLLECT_CONF}/sources"
CDEFAULTS="${CCOLLECT_CONF}/defaults"
2007-08-16 18:17:54 +00:00
CPREEXEC="${CDEFAULTS}/pre_exec"
CPOSTEXEC="${CDEFAULTS}/post_exec"
CMARKER=".ccollect-marker"
export TMP="$(mktemp "/tmp/${__myname}.XXXXXX")"
VERSION="0.8.1"
RELEASE="2009-11-XX"
2007-08-16 18:17:54 +00:00
HALF_VERSION="ccollect ${VERSION}"
FULL_VERSION="ccollect ${VERSION} (${RELEASE})"
#
# CDATE: how we use it for naming of the archives
# DDATE: how the user should see it in our output (DISPLAY)
#
2007-08-16 18:17:54 +00:00
CDATE="date +%Y%m%d-%H%M"
2007-08-14 12:33:22 +00:00
DDATE="date +%Y-%m-%d-%H:%M:%S"
SDATE="date +%s"
#
# unset values
#
PARALLEL=""
USE_ALL=""
2005-12-06 14:55:07 +00:00
#
# catch signals
#
2007-08-16 18:17:54 +00:00
trap "rm -f \"${TMP}\"" 1 2 15
2005-11-17 11:33:49 +00:00
2007-01-20 15:41:38 +00:00
#
# Functions
#
# time displaying echo
_techo()
{
2007-08-16 18:31:30 +00:00
echo "$(${DDATE}): $@"
}
# exit on error
2007-01-20 15:41:38 +00:00
_exit_err()
{
_techo "$@"
2007-08-16 18:17:54 +00:00
rm -f "${TMP}"
2007-01-20 15:41:38 +00:00
exit 1
}
add_name()
{
awk "{ print \"[${name}] \" \$0 }"
}
#
# Prepend "ssh ${remote_host}", if backing up to a remote host
#
2008-03-14 05:50:24 +00:00
pcmd()
{
[ "${remote_host}" ] && set -- "ssh" "${remote_host}" "$@"
"$@"
2008-03-14 05:50:24 +00:00
}
#
# ssh-"feature": we cannot do '... read ...; ssh ...; < file',
# because ssh reads stdin! -n does not work -> does not ask for password
# Alsa allow deletion for files without the given suffix
#
delete_from_file()
{
file="$1"; shift
suffix="" # It will be set, if deleting incomplete backups.
[ $# -eq 1 ] && suffix="$1" && shift
while read to_remove; do
set -- "$@" "${to_remove}"
if [ "$suffix" ]; then
to_remove_no_suffix="$(echo ${to_remove} | sed "s/$suffix\$//")"
set -- "$@" "${to_remove_no_suffix}"
fi
done < "${file}"
_techo "Removing $@ ..."
[ "${VVERBOSE}" ] && echo rm "$@"
pcmd rm -rf "$@" || _exit_err "Removing $@ failed."
}
2007-08-17 22:14:10 +00:00
display_version()
{
echo "${FULL_VERSION}"
exit 0
}
2005-12-06 12:45:37 +00:00
usage()
{
cat << eof
${__myname}: [args] <interval name> <sources to backup>
ccollect creates (pseudo) incremental backups
-h, --help: Show this help screen
-a, --all: Backup all sources specified in ${CSOURCES}
-p, --parallel: Parallelise backup processes
-v, --verbose: Be very verbose (uses set -x)
-V, --version: Print version information
This is version ${VERSION}, released on ${RELEASE}
(the first version was written on 2005-12-05 by Nico Schottelius).
Retrieve latest ccollect at http://www.nico.schottelius.org/software/ccollect/
eof
2005-12-06 12:45:37 +00:00
exit 0
}
#
# Parse options
#
while [ "$#" -ge 1 ]; do
case "$1" in
-a|--all)
USE_ALL=1
;;
-p|--parallel)
PARALLEL=1
;;
-v|--verbose)
set -x
;;
-V|--version)
display_version
;;
--)
# ignore the -- itself
shift
break
;;
-h|--help|-*)
usage
;;
*)
break
;;
esac
shift
done
#
# Setup interval
#
if [ $# -ge 1 ]; then
export INTERVAL="$1"
shift
else
usage
fi
#
# Check for configuraton directory
#
[ -d "${CCOLLECT_CONF}" ] || _exit_err "No configuration found in " \
2007-08-16 18:17:54 +00:00
"\"${CCOLLECT_CONF}\" (is \$CCOLLECT_CONF properly set?)"
2005-12-06 14:35:29 +00:00
#
# Create (portable!) source "array"
2005-12-06 14:35:29 +00:00
#
export no_sources=0
2006-10-25 12:48:48 +00:00
if [ "${USE_ALL}" = 1 ]; then
2005-12-06 14:55:07 +00:00
#
# Get sources from source configuration
2005-12-06 14:55:07 +00:00
#
( cd "${CSOURCES}" && ls -1 > "${TMP}" ) || \
_exit_err "Listing of sources failed. Aborting."
2006-10-25 12:48:48 +00:00
2005-12-06 14:55:07 +00:00
while read tmp; do
eval export source_${no_sources}=\"${tmp}\"
2007-08-16 18:17:54 +00:00
no_sources=$((${no_sources}+1))
done < "${TMP}"
else
#
# Get sources from command line
#
while [ "$#" -ge 1 ]; do
eval arg=\"\$1\"; shift
eval export source_${no_sources}=\"${arg}\"
no_sources="$((${no_sources}+1))"
done
2005-12-06 14:35:29 +00:00
fi
#
# Need at least ONE source to backup
#
if [ "${no_sources}" -lt 1 ]; then
usage
else
_techo "${HALF_VERSION}: Beginning backup using interval ${INTERVAL}"
fi
#
# Look for pre-exec command (general)
#
if [ -x "${CPREEXEC}" ]; then
2007-08-16 18:17:54 +00:00
_techo "Executing ${CPREEXEC} ..."
"${CPREEXEC}"; ret=$?
2007-08-16 19:37:39 +00:00
_techo "Finished ${CPREEXEC} (return code: ${ret})."
2007-08-16 19:37:39 +00:00
[ "${ret}" -eq 0 ] || _exit_err "${CPREEXEC} failed. Aborting"
fi
################################################################################
#
# Let's do the backup - here begins the real stuff
#
i=0
2007-08-16 18:17:54 +00:00
while [ "${i}" -lt "${no_sources}" ]; do
#
2006-04-29 10:58:57 +00:00
# Get current source
#
2006-11-14 23:01:19 +00:00
eval name=\"\$source_${i}\"
2007-08-16 18:17:54 +00:00
i=$((${i}+1))
export name
#
# Start ourself, if we want parallel execution
#
if [ "${PARALLEL}" ]; then
"$0" "${INTERVAL}" "${name}" &
continue
fi
#
# Start subshell for easy log editing
#
(
backup="${CSOURCES}/${name}"
#
# Stderr to stdout, so we can produce nice logs
#
exec 2>&1
#
# Record start of backup: internal and for the user
#
begin_s="$(${SDATE})"
_techo "Beginning to backup"
#
# Standard configuration checks
#
if [ ! -e "${backup}" ]; then
_exit_err "Source does not exist."
fi
#
# Configuration _must_ be a directory (cconfig style)
#
if [ ! -d "${backup}" ]; then
_exit_err "\"${backup}\" is not a cconfig-directory. Skipping."
fi
#
# Read / create configuration
#
c_source="${backup}/source"
c_dest="${backup}/destination"
c_pre_exec="${backup}/pre_exec"
c_post_exec="${backup}/post_exec"
for opt in verbose very_verbose summary exclude rsync_options \
delete_incomplete remote_host rsync_failure_codes \
mtime quiet_if_down ; do
if [ -f "${backup}/${opt}" -o -f "${backup}/no_${opt}" ]; then
eval c_$opt=\"${backup}/$opt\"
else
eval c_$opt=\"${CDEFAULTS}/$opt\"
fi
done
2007-08-16 18:31:30 +00:00
#
# Sort by ctime (default) or mtime (configuration option)
#
if [ -f "${c_mtime}" ] ; then
TSORT="t"
else
TSORT="tc"
fi
2007-08-16 18:31:30 +00:00
#
# First execute pre_exec, which may generate destination or other parameters
#
2007-03-13 07:47:04 +00:00
if [ -x "${c_pre_exec}" ]; then
_techo "Executing ${c_pre_exec} ..."
2007-03-13 07:47:04 +00:00
"${c_pre_exec}"; ret="$?"
_techo "Finished ${c_pre_exec} (return code ${ret})."
[ "${ret}" -eq 0 ] || _exit_err "${c_pre_exec} failed. Skipping."
fi
#
# Source configuration checks
#
2007-03-13 07:47:04 +00:00
if [ ! -f "${c_source}" ]; then
_exit_err "Source description \"${c_source}\" is not a file. Skipping."
else
2008-03-14 05:50:24 +00:00
source=$(cat "${c_source}"); ret="$?"
if [ "${ret}" -ne 0 ]; then
_exit_err "Source ${c_source} is not readable. Skipping."
fi
fi
#
2008-03-14 05:50:24 +00:00
# Destination is a path
#
2008-03-04 23:39:17 +00:00
if [ ! -f "${c_dest}" ]; then
_exit_err "Destination ${c_dest} is not a file. Skipping."
else
ddir="$(cat "${c_dest}")"; ret="$?"
2008-03-04 23:39:17 +00:00
if [ "${ret}" -ne 0 ]; then
_exit_err "Destination ${c_dest} is not readable. Skipping."
fi
fi
2008-03-14 05:50:24 +00:00
#
# Set pre-cmd, if we backup to a remote host.
2008-03-14 05:50:24 +00:00
#
if [ -f "${c_remote_host}" ]; then
remote_host="$(cat "${c_remote_host}")"; ret="$?"
2008-03-14 05:50:24 +00:00
if [ "${ret}" -ne 0 ]; then
_exit_err "Remote host file ${c_remote_host} is unreadable. Skipping."
2008-03-14 05:50:24 +00:00
fi
destination="${remote_host}:${ddir}"
else
remote_host=""
2008-03-14 05:50:24 +00:00
destination="${ddir}"
fi
export remote_host
2008-03-14 05:50:24 +00:00
#
# Parameters: ccollect defaults, configuration options, user options
#
2007-08-14 13:40:23 +00:00
#
# Rsync standard options
2007-08-14 13:40:23 +00:00
#
2007-08-16 18:08:23 +00:00
set -- "$@" "--archive" "--delete" "--numeric-ids" "--relative" \
"--delete-excluded" "--sparse"
2007-08-16 18:08:23 +00:00
#
# Exclude list
#
if [ -f "${c_exclude}" ]; then
2007-08-16 18:17:54 +00:00
set -- "$@" "--exclude-from=${c_exclude}"
fi
2006-10-25 12:48:48 +00:00
#
2006-01-24 20:46:49 +00:00
# Output a summary
#
2007-03-13 07:47:04 +00:00
if [ -f "${c_summary}" ]; then
2007-08-14 13:40:23 +00:00
set -- "$@" "--stats"
2006-01-24 20:46:49 +00:00
fi
2006-10-25 12:48:48 +00:00
#
# Verbosity for rsync, rm, and mkdir
#
VVERBOSE=""
if [ -f "${c_very_verbose}" ]; then
2007-08-14 13:40:23 +00:00
set -- "$@" "-vv"
VVERBOSE="-v"
2007-08-14 13:40:23 +00:00
elif [ -f "${c_verbose}" ]; then
set -- "$@" "-v"
2006-03-19 00:10:58 +00:00
fi
2006-10-25 12:48:48 +00:00
#
# Extra options for rsync provided by the user
#
if [ -f "${c_rsync_options}" ]; then
2007-08-14 13:40:23 +00:00
while read line; do
set -- "$@" "${line}"
done < "${c_rsync_options}"
fi
2006-10-25 12:48:48 +00:00
2007-08-16 18:08:23 +00:00
#
# Check: source is up and accepting connections (before deleting old backups!)
#
if ! rsync "$@" "${source}" >/dev/null 2>"${TMP}" ; then
if [ ! -f "${c_quiet_if_down}" ]; then
cat "${TMP}"
fi
_exit_err "Source ${source} is not readable. Skipping."
fi
#
# Check: destination exists?
#
( pcmd cd "${ddir}" ) || _exit_err "Cannot change to ${ddir}. Skipping."
#
# Check: incomplete backups? (needs echo to remove newlines)
2007-08-16 18:08:23 +00:00
#
# *.marker: not possible, creates an error, if no *.marker exists
# -> catch return value
pcmd ls -d1 "${ddir}/"*"${CMARKER}" > "${TMP}" 2>/dev/null; ret=$?
if [ "${ret}" -eq 0 ]; then
_techo "Incomplete backups: $(echo $(cat "${TMP}"))"
if [ -f "${c_delete_incomplete}" ]; then
delete_from_file "${TMP}" "${CMARKER}"
fi
fi
#
# Interval definition: First try source specific, fallback to default
#
c_interval="$(cat "${backup}/intervals/${INTERVAL}" 2>/dev/null)"
if [ -z "${c_interval}" ]; then
c_interval="$(cat "${CDEFAULTS}/intervals/${INTERVAL}" 2>/dev/null)"
if [ -z "${c_interval}" ]; then
_exit_err "No definition for interval \"${INTERVAL}\" found. Skipping."
fi
fi
#
# Check: maximum number of backups is reached?
#
count="$(pcmd ls -d1 "${ddir}/${INTERVAL}."*"/" | wc -l \
2008-03-17 11:08:40 +00:00
| sed 's/^ *//g')" || _exit_err "Counting backups failed"
2007-08-16 18:41:03 +00:00
_techo "Existing backups: ${count} Total keeping backups: ${c_interval}"
2007-08-16 18:41:03 +00:00
if [ "${count}" -ge "${c_interval}" ]; then
substract="$((${c_interval} - 1))"
remove="$((${count} - ${substract}))"
2007-08-16 19:26:56 +00:00
_techo "Removing ${remove} backup(s)..."
2007-08-16 18:41:03 +00:00
pcmd ls -${TSORT}d1r "${ddir}/${INTERVAL}."*"/" |
head -n "${remove}" > "${TMP}" || \
_exit_err "Listing old backups failed"
2007-08-16 19:37:39 +00:00
delete_from_file "${TMP}"
2007-08-16 18:41:03 +00:00
fi
2007-08-16 18:08:23 +00:00
#
2007-08-17 21:31:45 +00:00
# Check for backup directory to clone from: Always clone from the latest one!
#
Introduce consistent time sorting Based on patches by John Lawless <jll2_8854b@redwoodscientific.com>. Skipped the sort changing part (from -tc to -t) c.patch: --- ccollect-0.7.1-b.sh 2009-05-24 21:32:00.000000000 -0700 +++ ccollect-0.7.1-c.sh 2009-05-24 21:39:43.000000000 -0700 @@ -40,10 +40,13 @@ VERSION=0.7.1 RELEASE="2009-02-02" HALF_VERSION="ccollect ${VERSION}" FULL_VERSION="ccollect ${VERSION} (${RELEASE})" +#TSORT="tc" ; NEWER="cnewer" +TSORT="t" ; NEWER="newer" + # # CDATE: how we use it for naming of the archives # DDATE: how the user should see it in our output (DISPLAY) # CDATE="date +%Y%m%d-%H%M" @@ -513,14 +516,14 @@ # # Check for backup directory to clone from: Always clone from the latest one! # - # Use ls -1c instead of -1t, because last modification maybe the same on all - # and metadate update (-c) is updated by rsync locally. - # - last_dir="$(pcmd ls -tcp1 "${ddir}" | grep '/$' | head -n 1)" || \ + # Depending on your file system, you may want to sort on: + # 1. mtime (modification time) with TSORT=t, or + # 2. ctime (last change time, usually) with TSORT=tc + last_dir="$(pcmd ls -${TSORT}p1 "${ddir}" | grep '/$' | head -n 1)" || \ _exit_err "Failed to list contents of ${ddir}." # # clone from old backup, if existing # d.patch: --- ccollect-0.7.1-c.sh 2009-05-24 21:39:43.000000000 -0700 +++ ccollect-0.7.1-d.sh 2009-05-24 21:47:09.000000000 -0700 @@ -492,12 +492,12 @@ if [ "${count}" -ge "${c_interval}" ]; then substract=$((${c_interval} - 1)) remove=$((${count} - ${substract})) _techo "Removing ${remove} backup(s)..." - pcmd ls -p1 "$ddir" | grep "^${INTERVAL}\..*/\$" | \ - sort -n | head -n "${remove}" > "${TMP}" || \ + pcmd ls -${TSORT}p1r "$ddir" | grep "^${INTERVAL}\..*/\$" | \ + head -n "${remove}" > "${TMP}" || \ _exit_err "Listing old backups failed" i=0 while read to_remove; do eval remove_$i=\"${to_remove}\" Signed-off-by: Nico Schottelius <nico@ikn.schottelius.org>
2009-06-10 07:50:05 +00:00
last_dir="$(pcmd ls -${TSORT}p1 "${ddir}" | grep '/$' | head -n 1)" || \
2008-03-14 05:50:24 +00:00
_exit_err "Failed to list contents of ${ddir}."
#
2007-08-16 19:37:39 +00:00
# clone from old backup, if existing
#
if [ "${last_dir}" ]; then
2008-03-17 10:44:20 +00:00
set -- "$@" "--link-dest=${ddir}/${last_dir}"
_techo "Hard linking from ${last_dir}"
fi
# set time when we really begin to backup, not when we began to remove above
export destination_name="${INTERVAL}.$(${CDATE}).$$"
export destination_dir="${ddir}/${destination_name}"
export destination_full="${destination}/${destination_name}"
2006-10-25 12:48:48 +00:00
# give some info
_techo "Beginning to backup, this may take some time..."
2007-08-16 19:45:02 +00:00
_techo "Creating ${destination_dir} ..."
[ "${VVERBOSE}" ] && echo "mkdir ${destination_dir}"
pcmd mkdir "${destination_dir}" || \
_exit_err "Creating ${destination_dir} failed. Skipping."
#
# added marking in 0.6 (and remove it, if successful later)
#
pcmd touch "${destination_dir}${CMARKER}"
2005-12-06 16:08:38 +00:00
#
# the rsync part
#
2007-08-14 13:40:23 +00:00
_techo "Transferring files..."
rsync "$@" "${source}" "${destination_full}"; ret=$?
_techo "Finished backup (rsync return code: $ret)."
2006-10-25 13:16:37 +00:00
#
# Set modification time (mtime) to current time, if sorting by mtime is enabled
2006-10-25 13:16:37 +00:00
#
[ -f "$c_mtime" ] && pcmd touch "${destination_dir}"
2007-08-16 18:08:23 +00:00
2006-10-25 13:16:37 +00:00
#
# Check if rsync exit code indicates failure.
2006-10-25 13:16:37 +00:00
#
fail=""
if [ -f "$c_rsync_failure_codes" ]; then
while read code ; do
if [ "$ret" = "$code" ]; then
fail=1
fi
done <"${c_rsync_failure_codes}"
fi
2007-08-16 18:08:23 +00:00
#
# Remove marking here unless rsync failed.
#
if [ -z "$fail" ]; then
pcmd rm "${destination_dir}${CMARKER}" || \
_exit_err "Removing ${destination_dir}${CMARKER} failed."
if [ "${ret}" -ne 0 ]; then
_techo "Warning: rsync exited non-zero, the backup may be broken (see rsync errors)."
fi
else
_techo "Warning: rsync failed with return code $ret."
fi
#
# post_exec
#
2007-08-16 18:08:23 +00:00
if [ -x "${c_post_exec}" ]; then
_techo "Executing ${c_post_exec} ..."
"${c_post_exec}"; ret=$?
2007-08-16 18:08:23 +00:00
_techo "Finished ${c_post_exec}."
2006-03-19 00:13:38 +00:00
if [ "${ret}" -ne 0 ]; then
2007-08-16 18:08:23 +00:00
_exit_err "${c_post_exec} failed."
2006-03-19 00:13:38 +00:00
fi
fi
2007-08-16 18:08:23 +00:00
# Calculation
end_s="$(${SDATE})"
full_seconds="$((${end_s} - ${begin_s}))"
hours="$((${full_seconds} / 3600))"
seconds="$((${full_seconds} - (${hours} * 3600)))"
minutes="$((${seconds} / 60))"
seconds="$((${seconds} - (${minutes} * 60)))"
2007-08-16 18:08:23 +00:00
_techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)"
) | add_name
done
2005-12-06 14:55:07 +00:00
2005-12-06 16:08:38 +00:00
#
# Be a good parent and wait for our children, if they are running wild parallel
#
2007-08-16 18:08:23 +00:00
if [ "${PARALLEL}" ]; then
2007-08-16 19:37:39 +00:00
_techo "Waiting for children to complete..."
2005-12-06 16:08:38 +00:00
wait
fi
#
# Look for post-exec command (general)
#
2007-08-16 18:08:23 +00:00
if [ -x "${CPOSTEXEC}" ]; then
_techo "Executing ${CPOSTEXEC} ..."
2007-08-16 19:37:39 +00:00
"${CPOSTEXEC}"; ret=$?
_techo "Finished ${CPOSTEXEC} (return code: ${ret})."
2006-10-25 12:48:48 +00:00
if [ "${ret}" -ne 0 ]; then
2007-08-16 19:46:02 +00:00
_techo "${CPOSTEXEC} failed."
2006-03-19 00:13:38 +00:00
fi
fi
2007-08-16 18:08:23 +00:00
rm -f "${TMP}"
_techo "Finished"