ccollect/ccollect.sh
Nico Schottelius e917546ad1 introduce old backup matching
But this is definitly the WRONG way.

We cannot, we need not and we MUST NOT try to catch dumb users.

Instead we are the program, we may give the rules. And if we add a fancy
feature and the user is too dumb to use it, it is HER/HIS fault.

Just to remind my self:

   Add use of other backup intervals as source for this interval
   with the following behaviour:

   - locate all possible intervals in
      * default configuration
      * source specific configuration
   - locate all backups for each interval, record only the timestamp:
      * taeglich.2007-01-23-0042
         -> possible_old_sources += taeglich
         -> taeglich_last=2007-01-23-0042

         - NO READ NEED for this! other method:
      * sort by time, ALL possible backup:
         - does posix-ls support this? CHECK!
         - ( cd $C_SOURCE/destination/;
             ls --sort-by-time "$interval1".* "$interval2".* |
             tail -n1 )
         - catches all possible intervals
   - add optional behaviour:
      * prefer other interval (newer) over this one
2007-01-17 09:39:23 +01:00

536 lines
11 KiB
Bash
Executable file

#!/bin/sh
# Nico Schottelius
# written for SyGroup (www.sygroup.ch)
# Date: Mon Nov 14 11:45:11 CET 2005
# Last Modified: (See ls -l or git)
#
# where to find our configuration and temporary file
#
CCOLLECT_CONF=${CCOLLECT_CONF:-/etc/ccollect}
CSOURCES=$CCOLLECT_CONF/sources
CDEFAULTS=$CCOLLECT_CONF/defaults
CPREEXEC="$CDEFAULTS/pre_exec"
CPOSTEXEC="$CDEFAULTS/post_exec"
TMP=$(mktemp /tmp/$(basename $0).XXXXXX)
VERSION=0.5.2
RELEASE="2006-XXXXX"
HALF_VERSION="ccollect $VERSION"
FULL_VERSION="ccollect $VERSION ($RELEASE)"
#
# Date + Markup
# CDATE: how we use it for naming of the archives
# DDATE: how the user should see it in our output
# MDATE: how to match (shell expression) the date
#
CDATE="date +%Y-%m-%d-%H%M"
DDATE="date"
# FIXME: match only numbers!
MDATE="*-*-*-*"
MSUFFIX="${MDATE}.*"
echo $MSUFFIX "$MSUFFIX"
exit 1
#
# Fur future releases...
# Date matching:
#
# [1-9][0-9][0-9][0-9] - allow years from 1000 until 9999 - the human race will
# die earlier than this value is reached
#
# -
# [0-1][0-9] ...
#
#
#
# unset parallel execution
#
PARALLEL=""
#
# catch signals
#
trap "rm -f \"$TMP\"" 1 2 15
add_name()
{
sed "s:^:\[$name\] :"
}
#
# Tell how to use us
#
usage()
{
echo "$(basename $0): <interval name> [args] <sources to backup>"
echo ""
echo " ccollect creates (pseudo) incremental backups"
echo ""
echo " -h, --help: Show this help screen"
echo " -p, --parallel: Parallelise backup processes"
echo " -a, --all: Backup all sources specified in $CSOURCES"
echo " -v, --verbose: Be very verbose (uses set -x)."
echo ""
echo " This is version $VERSION, released on ${RELEASE}"
echo " (the first version was written on 2005-12-05 by Nico Schottelius)."
echo ""
echo " Retrieve latest ccollect at http://unix.schottelius.org/ccollect/"
exit 0
}
#
# need at least interval and one source or --all
#
if [ $# -lt 2 ]; then
usage
fi
#
# check for configuraton directory
#
if [ ! -d "$CCOLLECT_CONF" ]; then
echo "No configuration found in \"$CCOLLECT_CONF\"" \
" (is \$CCOLLECT_CONF properly set?)"
exit 1
fi
#
# Filter arguments
#
INTERVAL=$1; shift
i=1
no_sources=0
while [ $i -le $# ]; do
eval arg=\$$i
if [ "$NO_MORE_ARGS" = 1 ]; then
eval source_${no_sources}=\"$arg\"
no_sources=$(($no_sources+1))
else
case $arg in
-a|--all)
ALL=1
;;
-v|--verbose)
VERBOSE=1
;;
-p|--parallel)
PARALLEL=1
;;
-h|--help)
usage
;;
--)
NO_MORE_ARGS=1
;;
*)
eval source_${no_sources}=\"$arg\"
no_sources=$(($no_sources+1))
;;
esac
fi
i=$(($i+1))
done
#
# be really, really, really verbose
#
if [ "$VERBOSE" = 1 ]; then
set -x
fi
#
# Look for pre-exec command (general)
#
if [ -x "$CPREEXEC" ]; then
echo "Executing $CPREEXEC ..."
"$CPREEXEC"
ret=$?
echo "Finished ${CPREEXEC}."
if [ $ret -ne 0 ]; then
echo "$CPREEXEC failed, not starting backup."
exit 1
fi
fi
#
# Look, if we should take ALL sources
#
if [ "$ALL" = 1 ]; then
# reset everything specified before
no_sources=0
#
# get entries from sources
#
cwd=$(pwd -P)
( cd "$CSOURCES" && ls > "$TMP" )
if [ "$?" -ne 0 ]; then
echo "Listing of sources failed. Aborting."
exit 1
fi
while read tmp; do
eval source_${no_sources}=\"$tmp\"
no_sources=$(($no_sources+1))
done < "$TMP"
fi
#
# Need at least ONE source to backup
#
if [ "$no_sources" -lt 1 ]; then
usage
else
echo "==> $HALF_VERSION: Beginning backup using interval $INTERVAL <=="
fi
#
# check default configuration
#
D_FILE_INTERVAL="$CDEFAULTS/intervals/$INTERVAL"
D_INTERVAL=$(cat "$D_FILE_INTERVAL" 2>/dev/null)
#
# Let's do the backup
#
i=0
while [ "$i" -lt "$no_sources" ]; do
#
# Get current source
#
eval name=\"\$source_${i}\"
i=$(($i+1))
export name
#
# start ourself, if we want parallel execution
#
if [ "$PARALLEL" ]; then
"$0" "$INTERVAL" "$name" &
continue
fi
#
# Start subshell for easy log editing
#
(
#
# Stderr to stdout, so we can produce nice logs
#
exec 2>&1
#
# Standard locations
#
backup="$CSOURCES/$name"
c_source="$backup/source"
c_dest="$backup/destination"
c_exclude="$backup/exclude"
c_verbose="$backup/verbose"
c_vverbose="$backup/very_verbose"
c_rsync_extra="$backup/rsync_options"
c_summary="$backup/summary"
c_incomplete="$backup/incomplete_remove"
c_marker=".ccollect-${CDATE}.$$"
c_pre_exec="$backup/pre_exec"
c_post_exec="$backup/post_exec"
begin=$($DDATE)
begin_s=$(date +%s)
#
# unset possible options
#
EXCLUDE=""
RSYNC_EXTRA=""
SUMMARY=""
VERBOSE=""
VVERBOSE=""
echo "$begin Beginning to backup"
#
# Standard configuration checks
#
if [ ! -e "$backup" ]; then
echo "Source does not exist."
exit 1
fi
#
# configuration _must_ be a directory
#
if [ ! -d "$backup" ]; then
echo "\"$name\" is not a cconfig-directory. Skipping."
exit 1
fi
#
# first execute pre_exec, which may generate destination or other
# parameters
#
if [ -x "$c_pre_exec" ]; then
echo "Executing ${c_pre_exec} ..."
"$c_pre_exec"
ret="$?"
echo "Finished ${c_pre_exec}."
if [ "$ret" -ne 0 ]; then
echo "$c_pre_exec failed. Skipping."
exit 1
fi
fi
#
# interval definition: First try source specific, fallback to default
#
c_interval="$(cat "$backup/intervals/$INTERVAL" 2>/dev/null)"
if [ -z "$c_interval" ]; then
c_interval="$D_INTERVAL"
if [ -z "$c_interval" ]; then
echo "No definition for interval \"$INTERVAL\" found. Skipping."
exit 1
fi
fi
#
# Source checks
#
if [ ! -f "$c_source" ]; then
echo "Source description $c_source is not a file. Skipping."
exit 1
else
source=$(cat "$c_source")
if [ $? -ne 0 ]; then
echo "Source $c_source is not readable. Skipping."
exit 1
fi
fi
#
# destination _must_ be a directory
#
if [ ! -d "$c_dest" ]; then
echo "Destination $c_dest does not link to a directory. Skipping"
exit 1
fi
#
# exclude list
#
if [ -f "$c_exclude" ]; then
EXCLUDE="--exclude-from=$c_exclude"
fi
#
# extra options for rsync
#
if [ -f "$c_rsync_extra" ]; then
RSYNC_EXTRA="$(cat "$c_rsync_extra")"
fi
#
# Output a summary
#
if [ -f "$c_summary" ]; then
SUMMARY="--stats"
fi
#
# Verbosity for rsync
#
if [ -f "$c_verbose" ]; then
VERBOSE="-v"
fi
#
# MORE verbosity, includes standard verbosity
#
if [ -f "$c_vverbose" ]; then
VERBOSE="-v"
VVERBOSE="-v"
fi
#
# show if we shall remove partial backup, and whether the last one
# is incomplete or not
#
# FIXME: test general for incomplete and decide only for warn|delete based on option?
# FIXME: Define which is the last dir before? Or put this thing into
# a while loop? Is it senseful to remove _ALL_ backups if non is complete?
if [ -f "$c_incomplete" ]; then
last_dir=$(ls -d "$c_dest/${INTERVAL}."?* 2>/dev/null | sort -n | tail -n 1)
# check whether the last backup was incomplete
# STOPPED HERE
# todo: implement rm -rf, implement warning on non-cleaning
# implement the marknig and normal removing
if [ "$last_dir" ]; then
incomplete=$(cd "$last_dir" && ls .ccollect-????-??-)
if [ "$incomplete" ]; then
"Removing incomplete backup $last_dir ..."
echo rm -rf $VVERBOSE "$last_dir"
fi
fi
fi
#
# check if maximum number of backups is reached, if so remove
#
# the created directories are named $INTERVAL-$DATE-$TIME.$PID
count=$(cd "$c_dest" && ls -p1 | grep "^${INTERVAL}\..*/\$" | wc -l | sed 's/^ *//g')
echo -n "Currently $count backup(s) exist(s),"
echo " total keeping $c_interval backup(s)."
if [ "$count" -ge "$c_interval" ]; then
substract=$((${c_interval} - 1))
remove=$(($count - $substract))
echo "Removing $remove backup(s)..."
ls -d "$c_dest/${INTERVAL}."?* | sort -n | head -n $remove > "$TMP"
#( cd "$c_dest" && ls -p1 | grep "^${INTERVAL}\..*/\$" | sort -n | head -n $remove > "$TMP"
while read to_remove; do
dir="$to_remove"
echo "Removing $dir ..."
rm $VVERBOSE -rf "$dir"
done < "$TMP"
fi
#
# clone the old directory with hardlinks
#
destination_date=$($CDATE)
destination_dir="$c_dest/${INTERVAL}.${destination_date}.$$"
last_dir=$(ls -d "$c_dest/${INTERVAL}."?* 2>/dev/null | sort -n | tail -n 1)
# give some info
echo "Beginning to backup, this may take some time..."
echo "Creating $destination_dir ..."
mkdir $VVERBOSE "$destination_dir" || exit 1
#
# make an absolute path, perhaps $CCOLLECT_CONF is relative!
#
abs_destination_dir="$(cd "$destination_dir" && pwd -P)"
#
# add mark
#
touch "${abs_destination_dir}/${c_marker}"
#
# the rsync part
# options partly stolen from rsnapshot
#
echo "$($DDATE) Transferring files..."
ouropts="-a --delete --numeric-ids --relative --delete-excluded"
useropts="$VERBOSE $EXCLUDE $SUMMARY $RSYNC_EXTRA"
# Clone from previous backup, if existing
if [ "$last_dir" ]; then
#
# This directory MUST be absolute, because rsync does chdir()
# before beginning backup!
#
abs_last_dir="$(cd "$last_dir" && pwd -P)"
if [ -z "$abs_last_dir" ]; then
echo "Changing to the last backup directory failed. I skip this backup."
exit 1
fi
rsync_hardlink="--link-dest=$abs_last_dir"
rsync $ouropts "$rsync_hardlink" $useropts "$source" "$abs_destination_dir"
else
rsync $ouropts $useropts "$source" "$abs_destination_dir"
fi
ret=$?
if [ "$ret" -ne 0 ]; then
echo "rsync reported error $ret. The backup may be broken (see rsync errors)."
fi
#
# FIXME: remove marking here
# rm -f $c_marker
#
echo "$($DDATE) Finished backup"
#
# post_exec
#
if [ -x "$c_post_exec" ]; then
echo "$($DDATE) Executing $c_post_exec ..."
"$c_post_exec"
ret=$?
echo "$($DDATE) Finished ${c_post_exec}."
if [ $ret -ne 0 ]; then
echo "$c_post_exec failed."
fi
fi
end_s=$(date +%s)
full_seconds=$((${end_s} - ${begin_s}))
hours=$(($full_seconds / 3600))
seconds=$(($full_seconds - ($hours * 3600)))
minutes=$(($seconds / 60))
seconds=$((${seconds} - (${minutes} * 60)))
echo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)"
) | add_name
done
#
# Be a good parent and wait for our children, if they are running wild parallel
#
if [ "$PARALLEL" ]; then
echo "$($DDATE) Waiting for child jobs to complete..."
wait
fi
#
# Look for post-exec command (general)
#
if [ -x "$CPOSTEXEC" ]; then
echo "$($DDATE) Executing $CPOSTEXEC ..."
"$CPOSTEXEC"
ret=$?
echo "$($DDATE) Finished ${CPOSTEXEC}."
if [ $ret -ne 0 ]; then
echo "$CPOSTEXEC failed."
fi
fi
rm -f "$TMP"
echo "==> Finished $WE <=="