Merge branch 'performance/speed-up-removal' into 'master'

Improve performance

See merge request ungleich-public/ccollect!12
This commit is contained in:
poljakowski 2019-11-12 17:50:41 +01:00
commit dcc72aebf7
2 changed files with 52 additions and 16 deletions

View file

@ -160,16 +160,29 @@ delete_from_file()
file="$1"; shift file="$1"; shift
suffix="" # It will be set, if deleting incomplete backups. suffix="" # It will be set, if deleting incomplete backups.
[ $# -eq 1 ] && suffix="$1" && shift [ $# -eq 1 ] && suffix="$1" && shift
# dirs for deletion will be moved to this trash dir inside destination dir
# - for fast mv operation
trash="$(mktemp -d ".trash.XXXXXX")"
while read to_remove; do while read to_remove; do
mv "${to_remove}" "${trash}" ||
_exit_err "Moving ${to_remove} to ${trash} failed."
set -- "$@" "${to_remove}" set -- "$@" "${to_remove}"
if [ "${suffix}" ]; then if [ "${suffix}" ]; then
to_remove_no_suffix="$(echo ${to_remove} | sed "s/$suffix\$//")" to_remove_no_suffix="$(echo ${to_remove} | sed "s/$suffix\$//")"
mv "${to_remove_no_suffix}" "${trash}" ||
_exit_err "Moving ${to_remove_no_suffix} to ${trash} failed."
set -- "$@" "${to_remove_no_suffix}" set -- "$@" "${to_remove_no_suffix}"
fi fi
done < "${file}" done < "${file}"
_techo "Removing $@ ..." _techo "Removing $@ in ${trash}..."
[ "${VVERBOSE}" ] && echo rm "$@" empty_dir=".empty-dir"
rm -rf "$@" || _exit_err "Removing $@ failed." mkdir "${empty_dir}" || _exit_err "Empty directory ${empty_dir} cannot be created."
[ "${VVERBOSE}" ] && echo "Starting: rsync -a --delete ${empty_dir} ${trash}"
# rsync needs ending slash for directory content
rsync -a --delete "${empty_dir}/" "${trash}/" || _exit_err "Removing $@ failed."
rmdir "${trash}" || _exit_err "Removing ${trash} directory failed"
rmdir "${empty_dir}" || _exit_err "Removing ${empty_dir} directory failed"
_techo "Removing $@ in ${trash} finished."
} }
display_version() display_version()
@ -698,10 +711,16 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
if [ "$ret" -eq 0 ]; then if [ "$ret" -eq 0 ]; then
_techo "Incomplete backups: $(echo $(cat "${TMP}"))" _techo "Incomplete backups: $(echo $(cat "${TMP}"))"
if [ -f "${c_delete_incomplete}" ]; then if [ -f "${c_delete_incomplete}" ]; then
delete_from_file "${TMP}" "${CMARKER}" delete_from_file "${TMP}" "${CMARKER}" &
fi fi
fi fi
#
# Include current time in name, not the time when we began to remove above
#
export destination_name="${INTERVAL}.$(${CDATE}).$$-${source_no}"
export destination_dir="${ddir}/${destination_name}"
# #
# Check: maximum number of backups is reached? # Check: maximum number of backups is reached?
# #
@ -710,19 +729,38 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
_techo "Existing backups: ${count} Total keeping backups: ${c_interval}" _techo "Existing backups: ${count} Total keeping backups: ${c_interval}"
if [ "${count}" -ge "${c_interval}" ]; then if [ "${count}" -ge "${c_interval}" ]; then
remove="$((${count} - ${c_interval} + 1))" # Use oldest directory as new backup destination directory.
_techo "Removing ${remove} backup(s)..." # It need not to be deleted, rsync will sync its content.
oldest_bak=$(ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n 1 || \
_exit_err "Listing oldest backup failed")
_techo "Using ${oldest_bak} for destination dir ${destination_dir}"
if mv "${oldest_bak}" "${destination_dir}"; then
# Touch dest dir so it is not sorted wrong in listings below.
touch "${destination_dir}"
ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ # We have something to remove only if count > interval.
_exit_err "Listing old backups failed" remove="$((${count} - ${c_interval}))"
else
_techo_err "Renaming oldest backup ${oldest_bak} to ${destination_dir} failed, removing it."
remove="$((${count} - ${c_interval} + 1))"
fi
if [ "${remove}" -gt 0 ]; then
_techo "Removing ${remove} backup(s)..."
delete_from_file "${TMP}" ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \
_exit_err "Listing old backups failed"
delete_from_file "${TMP}" &
fi
fi fi
# #
# Check for backup directory to clone from: Always clone from the latest one! # Check for backup directory to clone from: Always clone from the latest one!
# Exclude destination_dir from listing, it can be touched reused and renamed
# oldest existing destination directory.
# #
last_dir="$(ls -${TSORT}p1 | grep '/$' | head -n 1)" || \ dest_dir_name=$(basename "${destination_dir}")
last_dir="$(ls -${TSORT}p1 | grep '/$' | grep -v "${dest_dir_name}" | head -n 1)" || \
_exit_err "Failed to list contents of ${ddir}." _exit_err "Failed to list contents of ${ddir}."
# #
@ -733,12 +771,6 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
_techo "Hard linking from ${last_dir}" _techo "Hard linking from ${last_dir}"
fi fi
#
# Include current time in name, not the time when we began to remove above
#
export destination_name="${INTERVAL}.$(${CDATE}).$$-${source_no}"
export destination_dir="${ddir}/${destination_name}"
# #
# Mark backup running and go back to original directory # Mark backup running and go back to original directory
# #
@ -807,6 +839,9 @@ while [ "${source_no}" -lt "${no_sources}" ]; do
_techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)" _techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)"
unlock "${name}" unlock "${name}"
# wait for children (doing delete_from_file) if any still running
wait
) || exit ) || exit
done done

1
doc/changes/next Normal file
View file

@ -0,0 +1 @@
* Improve performance, improve process of deletion of old backups