From 1628ce58c768211052b9d8d02cd0c4d3c7ea3a17 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Sat, 5 Oct 2019 10:48:19 +0200 Subject: [PATCH 01/12] Replace rm with faster rsync --delete with empty src dir --- ccollect | 18 +++++++++++++++--- doc/changes/next | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 doc/changes/next diff --git a/ccollect b/ccollect index bb44ed6..05b7014 100755 --- a/ccollect +++ b/ccollect @@ -160,16 +160,28 @@ delete_from_file() file="$1"; shift suffix="" # It will be set, if deleting incomplete backups. [ $# -eq 1 ] && suffix="$1" && shift + # dirs for deletion will be moved to this trash dir inside destination dir + # - for fast mv operation + trash="$(mktemp -d "trash.XXXXXX")" while read to_remove; do + mv "${to_remove}" "${trash}" || + _exit_err "Moving ${to_remove} to ${trash} failed." set -- "$@" "${to_remove}" if [ "${suffix}" ]; then to_remove_no_suffix="$(echo ${to_remove} | sed "s/$suffix\$//")" + mv "${to_remove_no_suffix}" "${trash}" || + _exit_err "Moving ${to_remove_no_suffix} to ${trash} failed." set -- "$@" "${to_remove_no_suffix}" fi done < "${file}" - _techo "Removing $@ ..." - [ "${VVERBOSE}" ] && echo rm "$@" - rm -rf "$@" || _exit_err "Removing $@ failed." + _techo "Removing $@ in ${trash}..." + empty_dir="empty-dir" + mkdir "${empty_dir}" || _exit_err "Empty directory ${empty_dir} cannot be created." + [ "${VVERBOSE}" ] && echo "rsync -a --delete ${empty_dir} ${trash}" + # rsync needs ending slash for directory content + rsync -a --delete "${empty_dir}/" "${trash}/" || _exit_err "Removing $@ failed." + rmdir "${trash}" || _exit_err "Removing ${trash} directory failed" + rmdir "${empty_dir}" || _exit_err "Removing ${empty_dir} directory failed" } display_version() diff --git a/doc/changes/next b/doc/changes/next new file mode 100644 index 0000000..df06edc --- /dev/null +++ b/doc/changes/next @@ -0,0 +1 @@ +* Performance: replace rm -rf with faster rsync --delete empty src dir From ca6d06c2c32c0296dfc8dcb107d469f863fd67e6 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 10 Oct 2019 10:54:56 +0200 Subject: [PATCH 02/12] Add more verbose logging --- ccollect | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ccollect b/ccollect index 05b7014..8e02524 100755 --- a/ccollect +++ b/ccollect @@ -177,11 +177,12 @@ delete_from_file() _techo "Removing $@ in ${trash}..." empty_dir="empty-dir" mkdir "${empty_dir}" || _exit_err "Empty directory ${empty_dir} cannot be created." - [ "${VVERBOSE}" ] && echo "rsync -a --delete ${empty_dir} ${trash}" + [ "${VVERBOSE}" ] && echo "Starting: rsync -a --delete ${empty_dir} ${trash}" # rsync needs ending slash for directory content rsync -a --delete "${empty_dir}/" "${trash}/" || _exit_err "Removing $@ failed." rmdir "${trash}" || _exit_err "Removing ${trash} directory failed" rmdir "${empty_dir}" || _exit_err "Removing ${empty_dir} directory failed" + [ "${VVERBOSE}" ] && echo "Finished: rsync -a --delete ${empty_dir} ${trash}" } display_version() From 30abef474dea75705ee005a0453ea536e73d2fb8 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Wed, 16 Oct 2019 14:03:13 +0200 Subject: [PATCH 03/12] Delete in background and finally wait for children --- ccollect | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ccollect b/ccollect index 8e02524..7ede4dc 100755 --- a/ccollect +++ b/ccollect @@ -711,7 +711,7 @@ while [ "${source_no}" -lt "${no_sources}" ]; do if [ "$ret" -eq 0 ]; then _techo "Incomplete backups: $(echo $(cat "${TMP}"))" if [ -f "${c_delete_incomplete}" ]; then - delete_from_file "${TMP}" "${CMARKER}" + delete_from_file "${TMP}" "${CMARKER}" & fi fi @@ -729,7 +729,7 @@ while [ "${source_no}" -lt "${no_sources}" ]; do ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ _exit_err "Listing old backups failed" - delete_from_file "${TMP}" + delete_from_file "${TMP}" & fi # @@ -820,6 +820,9 @@ while [ "${source_no}" -lt "${no_sources}" ]; do _techo "Backup lasted: ${hours}:${minutes}:${seconds} (h:m:s)" unlock "${name}" + + # wait for children (doing delete_from_file) if any still running + wait ) || exit done From bfb3c6338c3e7a5bb1f4dcf8c3e1aa9b1117fad0 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Wed, 16 Oct 2019 15:50:54 +0200 Subject: [PATCH 04/12] _techo instead of very verbose --- ccollect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccollect b/ccollect index 7ede4dc..f424f11 100755 --- a/ccollect +++ b/ccollect @@ -182,7 +182,7 @@ delete_from_file() rsync -a --delete "${empty_dir}/" "${trash}/" || _exit_err "Removing $@ failed." rmdir "${trash}" || _exit_err "Removing ${trash} directory failed" rmdir "${empty_dir}" || _exit_err "Removing ${empty_dir} directory failed" - [ "${VVERBOSE}" ] && echo "Finished: rsync -a --delete ${empty_dir} ${trash}" + _techo "Removing $@ in ${trash} finished." } display_version() From 702cdf931e6b76be8aaad5c8cbfaaf4f9e2dc168 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 06:52:51 +0200 Subject: [PATCH 05/12] Use hidden directory for deletion --- ccollect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccollect b/ccollect index f424f11..41f23c5 100755 --- a/ccollect +++ b/ccollect @@ -162,7 +162,7 @@ delete_from_file() [ $# -eq 1 ] && suffix="$1" && shift # dirs for deletion will be moved to this trash dir inside destination dir # - for fast mv operation - trash="$(mktemp -d "trash.XXXXXX")" + trash="$(mktemp -d ".trash.XXXXXX")" while read to_remove; do mv "${to_remove}" "${trash}" || _exit_err "Moving ${to_remove} to ${trash} failed." From 51dcf4a02f3b57bc94c1bc15b955ef097bfbf3a6 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 06:52:51 +0200 Subject: [PATCH 06/12] Use hidden empty directory for deletion --- ccollect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccollect b/ccollect index 41f23c5..c744e27 100755 --- a/ccollect +++ b/ccollect @@ -175,7 +175,7 @@ delete_from_file() fi done < "${file}" _techo "Removing $@ in ${trash}..." - empty_dir="empty-dir" + empty_dir=".empty-dir" mkdir "${empty_dir}" || _exit_err "Empty directory ${empty_dir} cannot be created." [ "${VVERBOSE}" ] && echo "Starting: rsync -a --delete ${empty_dir} ${trash}" # rsync needs ending slash for directory content From 1e18e71b9de892b255516cdd9fbfdfa4304f7c57 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 07:42:31 +0200 Subject: [PATCH 07/12] Use oldest backup as destination dir without deletion --- ccollect | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/ccollect b/ccollect index c744e27..cf7abe9 100755 --- a/ccollect +++ b/ccollect @@ -715,6 +715,12 @@ while [ "${source_no}" -lt "${no_sources}" ]; do fi fi + # + # Include current time in name, not the time when we began to remove above + # + export destination_name="${INTERVAL}.$(${CDATE}).$$-${source_no}" + export destination_dir="${ddir}/${destination_name}" + # # Check: maximum number of backups is reached? # @@ -723,13 +729,26 @@ while [ "${source_no}" -lt "${no_sources}" ]; do _techo "Existing backups: ${count} Total keeping backups: ${c_interval}" if [ "${count}" -ge "${c_interval}" ]; then - remove="$((${count} - ${c_interval} + 1))" - _techo "Removing ${remove} backup(s)..." + # Use oldest directory as new backup destination directory. + # It need not to be deleted, rsync will sync its content. + oldest_bak=$(ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n 1 || \ + _exit_err "Listing oldest backup failed") + _techo "Using ${oldest_bak} for destination dir" + mv "${oldest_bak}" "${destination_dir}" || + _exit_err "Moving oldest backup ${oldest_bak} to ${destination_dir} failed." + # Touch dest dir so it is not sorted wrong in listings below. + touch "${destination_dir}" - ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ - _exit_err "Listing old backups failed" + # We have something to remove only if count > interval. + if [ "${count}" -gt "${c_interval}" ]; then + remove="$((${count} - ${c_interval}))" + _techo "Removing ${remove} backup(s)..." - delete_from_file "${TMP}" & + ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \ + _exit_err "Listing old backups failed" + + delete_from_file "${TMP}" & + fi fi # @@ -746,12 +765,6 @@ while [ "${source_no}" -lt "${no_sources}" ]; do _techo "Hard linking from ${last_dir}" fi - # - # Include current time in name, not the time when we began to remove above - # - export destination_name="${INTERVAL}.$(${CDATE}).$$-${source_no}" - export destination_dir="${ddir}/${destination_name}" - # # Mark backup running and go back to original directory # From 2788de47b8a7f5a23192b48f421ed37a059f8bb0 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 07:52:53 +0200 Subject: [PATCH 08/12] Improve log line --- ccollect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccollect b/ccollect index cf7abe9..af7a2c1 100755 --- a/ccollect +++ b/ccollect @@ -733,7 +733,7 @@ while [ "${source_no}" -lt "${no_sources}" ]; do # It need not to be deleted, rsync will sync its content. oldest_bak=$(ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n 1 || \ _exit_err "Listing oldest backup failed") - _techo "Using ${oldest_bak} for destination dir" + _techo "Using ${oldest_bak} for destination dir ${destination_dir}" mv "${oldest_bak}" "${destination_dir}" || _exit_err "Moving oldest backup ${oldest_bak} to ${destination_dir} failed." # Touch dest dir so it is not sorted wrong in listings below. From c39205d30898110b40040c6fad2d6a026013b977 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 08:03:12 +0200 Subject: [PATCH 09/12] Exclude destintion dir from listing for last dir --- ccollect | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ccollect b/ccollect index af7a2c1..35d2071 100755 --- a/ccollect +++ b/ccollect @@ -753,8 +753,10 @@ while [ "${source_no}" -lt "${no_sources}" ]; do # # Check for backup directory to clone from: Always clone from the latest one! + # Exclude destination_dir from listing, it can be touched reused and renamed + # oldest existing destination directory. # - last_dir="$(ls -${TSORT}p1 | grep '/$' | head -n 1)" || \ + last_dir="$(ls -${TSORT}p1 | grep '/$' | grep -v "${destination_dir}" | head -n 1)" || \ _exit_err "Failed to list contents of ${ddir}." # From 7701bdb0a87413bc756ea408656e32d65d7dfb20 Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 08:06:13 +0200 Subject: [PATCH 10/12] Use destination dir basename --- ccollect | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ccollect b/ccollect index 35d2071..f5aa145 100755 --- a/ccollect +++ b/ccollect @@ -756,7 +756,8 @@ while [ "${source_no}" -lt "${no_sources}" ]; do # Exclude destination_dir from listing, it can be touched reused and renamed # oldest existing destination directory. # - last_dir="$(ls -${TSORT}p1 | grep '/$' | grep -v "${destination_dir}" | head -n 1)" || \ + dest_dir_name=$(basename "${destination_dir}") + last_dir="$(ls -${TSORT}p1 | grep '/$' | grep -v "${dest_dir_name}" | head -n 1)" || \ _exit_err "Failed to list contents of ${ddir}." # From e44dede92f9114db1ffec24a46f215fb270ed08a Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 09:05:55 +0200 Subject: [PATCH 11/12] ++changelog --- doc/changes/next | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/next b/doc/changes/next index df06edc..a68cbbc 100644 --- a/doc/changes/next +++ b/doc/changes/next @@ -1 +1 @@ -* Performance: replace rm -rf with faster rsync --delete empty src dir +* Improve performance, improve process of deletion of old backups From de720ecfe9a71bd2fad3e5ef429629007e8ea6ed Mon Sep 17 00:00:00 2001 From: Darko Poljak Date: Thu, 17 Oct 2019 11:53:46 +0200 Subject: [PATCH 12/12] If renaming oldest bak dir fails then fallback to removing it --- ccollect | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ccollect b/ccollect index f5aa145..b8011ba 100755 --- a/ccollect +++ b/ccollect @@ -734,14 +734,17 @@ while [ "${source_no}" -lt "${no_sources}" ]; do oldest_bak=$(ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n 1 || \ _exit_err "Listing oldest backup failed") _techo "Using ${oldest_bak} for destination dir ${destination_dir}" - mv "${oldest_bak}" "${destination_dir}" || - _exit_err "Moving oldest backup ${oldest_bak} to ${destination_dir} failed." - # Touch dest dir so it is not sorted wrong in listings below. - touch "${destination_dir}" + if mv "${oldest_bak}" "${destination_dir}"; then + # Touch dest dir so it is not sorted wrong in listings below. + touch "${destination_dir}" - # We have something to remove only if count > interval. - if [ "${count}" -gt "${c_interval}" ]; then + # We have something to remove only if count > interval. remove="$((${count} - ${c_interval}))" + else + _techo_err "Renaming oldest backup ${oldest_bak} to ${destination_dir} failed, removing it." + remove="$((${count} - ${c_interval} + 1))" + fi + if [ "${remove}" -gt 0 ]; then _techo "Removing ${remove} backup(s)..." ls -${TSORT}1r | grep "^${INTERVAL}\\." | head -n "${remove}" > "${TMP}" || \