#!/bin/bash
#
# Ken Zahorec 2015-02-16
#
# This script prepares hypervisor VMs for backup and then clones them to a backup area using libvirt.
# It emphasizes backuppc processing by suspending any running VMs before calling virt-cone to create VM dumps in a desginated dump area.
#
# This script first looks for currently running VMs and suspends each of them.
# It calls virt-cone script on each of the suspended VMs
# It them enumerates the VMs in the shutoff state and runs virt-clone on each of them.
# Finally, it resumes the previously running VMs.
# NOTE: This script does NOT dump paused or transitioning VMs--these VMs are not dumped, so will not be backed up via backuppc.
# VMs need to be either shutdown (inactive) or running to be backed up.
# When the script changes. please keep the version number updated via "year.month.day.increment" as follows.
version="2015.02.09.00"
# Exit on use of non-set variables
set -o nounset
# Exit on any errors returned
set -o errexit
# target_script_dir is set to the directory where you run this script and also the location of the VMs dump data.
target_script_dir=/var/lib/libvirt/images/virt-backup
# define logfile, name of this script, a date-time stamp
logfile="dump.log"
lockfile="dump.lock"
dump_subdir="dump"
script_name=${0##*/}
date_time=$(date)
operation_type=""
###################################################
function print_run_time () {
# Get current time in seconds
script_now_sec=$(date +%s)
# Difference between now and start time is the run time
script_run_time_sec=$((${script_now_sec}-${script_start_sec}))
script_run_time_min=$((${script_run_time_sec}/60))
script_run_time_hour=$((${script_run_time_min}/60))
script_run_time_day=$((${script_run_time_hour}/24)) # hopefully we need go no further
if [ ${script_run_time_day} -ge 2 ]; then
run_time="approximately $script_run_time_day days"
elif [ ${script_run_time_hour} -ge 4 ]; then
run_time="approximately $script_run_time_hour hours"
elif [ ${script_run_time_min} -ge 10 ]; then
run_time="approximately $script_run_time_min minutes"
else
run_time="$script_run_time_sec seconds"
fi
echo " ==> elapsed $script_run_time_sec seconds"
echo " ==> $operation_type required $run_time"
}
###################################################
function create_and_dump_temporary_clone () {
# grab the first argument which contains the VM name to use
vm_target="$1"
date_time=$(date)
echo "$date_time : create_and_dump_temporary_clone will attempt to dump VM $vm_target"
clone_stamp="c"
# Create the specific vm dump subdirectory if it does not already exist.
mkdir -p "$DUMPDIR/$dump_subdir/$vm_target"
# Overall name of the temporary clone can not exceed 50 characters or virt-clone will fail.
# Use alternate clone stamp and shorten original name if character limit would exceed 50.
if [ ${#i} -gt 48 ]; then
# We have to truncate the last part of the name because it it too long.
# Produce a 6 char unique postpend stamp for clone names xxxxxx (hour-minute-second)
alt_clone_stamp=$(date +%H%M%S)
# Truncate the original name
vm_target_short=${vm_target:0:42}
# Combine the truncated name with the clone stamp to create the alt_clone_name
alt_clone_name="$vm_target_short-$alt_clone_stamp"
echo "VM name $vm_target-$clone_stamp is greater than 50 characters. Using $alt_clone_name instead."
clone_name="$alt_clone_name"
sleep 1
# Sleep briefly to insure we never get an identical alt_clone_stamp during the same dump operation.
# We never want to produce a name collision when creating clones or they will fail.
else
clone_name="$vm_target-$clone_stamp"
fi
# We need to enhance this area of the script to support multiple virtual drives on the VM.
# Some vms can be setup with mutilple virtual hard disks. We need to enumerate them and provide
# a target in the temporary clone for them by repeat of the -f parameter in the virt-clone command.
# Use of the virsh command with domblklist parameter can help us here.
# v?? is VirtIO disk, h?? is VirtIDE disk, S?? is Virt SCSI
# "-" imples a CDROM drive that is disonnected. For example:
#
# [root@et-virt105 virt-backup]# virsh domblklist puppetmaster-clone-initialstate
# Target Source
# ------------------------------------------------
# vda /var/lib/libvirt/images/puppetmaster-clone-initialstate.img
# vdb /var/lib/libvirt/images/puppetmaster-clone-initialstate-1.img
# hda /var/lib/libvirt/images/puppetmaster-clone-initialstate-2.img
# hdc -
# sda /var/lib/libvirt/images/puppetmaster-clone-initialstate-3.img
#
# We can pull information we need from the list. Generally speaking, the number
# of filespecs in the list imply the number of virtual storage devices.
# Something as simple as this:
#
# [root@et-virt105 virt-backup]# virsh domblklist puppetmaster-clone-initialstate | grep /
# vda /var/lib/libvirt/images/puppetmaster-clone-initialstate.img
# vdb /var/lib/libvirt/images/puppetmaster-clone-initialstate-1.img
# hda /var/lib/libvirt/images/puppetmaster-clone-initialstate-2.img
# sda /var/lib/libvirt/images/puppetmaster-clone-initialstate-3.img
#
# Use of virt-clone:
# We can pass more "-f" option entries than virtual disks, it will populate only
# as many as it needs. Read-only drives will remain rooted to the original in the
# system in the new clone XML file. Read-only virtual storage drives are not dumped.
num_drives=$(virsh domblklist ${vm_target} | grep / | wc -l)
echo "VM $vm_target has $num_drives virtual disks"
# Check number of virtual drives to make sure there are no more than 5, a reasonable limit.
if [ ${num_drives} -le 5 ]; then
echo "$date_time : calling virt-clone to clone vm $vm_target ---> $clone_name"
set +e # Disable errexit...keep chugging even if the virt-clone commands fail for this particular VM.
# The command pipe to grep below reduces a rather lengthy output of continued progress updates during virt-clone process.
virt-clone --connect=qemu:///system -o "$vm_target" -n "$clone_name" \
-f "$DUMPDIR/$dump_subdir/$vm_target/$clone_name-1.img" \
-f "$DUMPDIR/$dump_subdir/$vm_target/$clone_name-2.img" \
-f "$DUMPDIR/$dump_subdir/$vm_target/$clone_name-3.img" \
-f "$DUMPDIR/$dump_subdir/$vm_target/$clone_name-4.img" \
-f "$DUMPDIR/$dump_subdir/$vm_target/$clone_name-5.img" | grep ${clone_name}
# Copy both the original VM XML and the temporary cloned VM XML files to the dump area.
cp -pv "/etc/libvirt/qemu/$vm_target.xml" "$DUMPDIR/$dump_subdir/$vm_target"
cp -pv "/etc/libvirt/qemu/$clone_name.xml" "$DUMPDIR/$dump_subdir/$vm_target"
# Undefine the clone. This does not delete the clones disk(s) image(s), which remain in the dump area.
echo "$date_time : calling virsh undefine to remove temporary clone $clone_name"
virsh undefine "$clone_name"
set +e # Return to normal errexit bahavior. Exit script on any errors.
# Check to see if the image was created. If virt disk is read-only in VM, it will not get dumped.
# We need to get this information to the log file. Users need to know that read-only drives will
# not get dumped for backup.
date_time=$(date)
for index in $(seq 1 ${num_drives});
do
if [ -e "$DUMPDIR/$dump_subdir/$vm_target/${clone_name}-${index}.img" ]; then
echo -e "$date_time : SUCCESS - dump virtual storage disk ${clone_name}-${index} succeeded \n"
else
# Copy of the vm virtual storage disk image failed
echo "$date_time : WARNING - dump of virtual storage volume ${clone_name}-${index} ==> FAILED <=="
echo -e "==> Review VM settings. Perhaps a mounted CDROM image or other type of read-only storage is mounted.\n"
fi
done
else
echo "====> ERROR : VM $vm_target has more than 5 virtual disks, it will not be dumped."
fi
}
#########################################################
#########################################################
# Script processing basically starts here
################# script logging control ############
# FOR DEBUGGING USE ONLY: From this point onward, all console std out and std
# err output gets appended to the log file. Normally we do not use this as all
# stream outputs go to the backuppc logs for this host. If script is not run
# from backup area and we used this feature, we would lose log information.
# exec 3>&1 1>>${logfile} 2>&1
############### script dependencies check ###########
### check for virsh
if ! [ -x "$(command -v virsh)" ]; then
echo 'The virsh utility appears to be missing. This script is designed for KVM/Qemu/libvirt systems.' >&2
exit
fi
### check for virt-clone
if ! [ -x "$(command -v virt-clone)" ]; then
echo 'The virt-clone utility appears to be missing. Perhaps install package virt-install.' >&2
exit
fi
# Set defaults
DUMPDIR="/var/lib/libvirt/images/virt-backup"
CONCURRENT=0
DUMP=0
CLEAN=0
HELP=0
for i in "$@"
do
# echo " $i"
case $i in
-d=*|--dumpdir=*)
DUMPDIR=`echo $i | sed 's/[-a-zA-Z0-9]*=//'`
# If present, remove a trailing slash
DUMPDIR=${DUMPDIR%/}
;;
-o|--concurrent)
CONCURRENT=1
DUMP=1
;;
-d|--dump)
DUMP=1
;;
-c|--clean)
CLEAN=1
operation_type="dump cleanup"
;;
-h|--help)
HELP=1
;;
*)
echo "==>WARNING: An unknown or unsupported parameter: $i"
HELP=1
;;
esac
done
if [ $HELP -eq 1 ]; then
echo -e "$script_name version $version help information:\nUsage:\n"
echo -n " $script_name"
echo -e ' [ -d | --dump ]|[ -o | --concurrent ] [ -c | --clean ][ -d=(dir) | --dumpdir=(dir) ] [-h | --help]
----------------------------------------------------------------------------
This script is designed to be used for pre-backup and post-backup dump and
cleanup operations in conjunction with BackupPC or any other enterprise grade
backup facility. This script provides a data dump of VM data at the
hypervisor. It is designed to work with KVM/Qemu/libvirtd systems. It has been
tested and used on CentOS v6.X and CentOS v7 systems.
Seems reasonable that this script would run on Debian based KVM/Qemu/Libvirt
systems as well.
This script should be used during non-essential or non-business user times.
It causes stress at the hypervisor due to the intense amount of IO required
to move vm data.
----------- -------------------
Options are described below:
-d | --dump
Perform a normal clone dump operation of all running and shutdown VMs at the
hypervisor. This pauses all running VMs before cloning and dumping them.
After dumping each vm, all of the previously running vms are then resumed.
This provides least load at the hypervisor, but will result in unavailable
vms during the time it takes the system to dump all of the vm data.
-o | --concurrent
Perform a high availability dump operation of all running and shutdown VMs at
the hypervisor. This will pause VMs individually and perform the clone dump,
while other VMs remain running.
On less capable servers, concurrent dumps can impact vm user experience
because of limited IO capacity. During the dump, other running VMs can
become generally unresponsive resulting in user complaints. On more capable
servers this option can limit vm downtime during dump operations.
-c | --clean
Perform a cleanup of the dumpdir location. This removes the dump lockfile and
deletes all files that were previously dumped. It clears out the dump area to
ready it for a future dump operation.
-d=(dir) | --dumpdir=(dir)
Specifying the dumpdir is not required, unless you want to use something other
that the default value. The default dumpdir is
/var/lib/libvirt/images/virt-backup
------------- Additional Info
The dumpdir specified is used as the *parent* dump control directory. The
dump subdirectory, "dump/", below the parent, is the actual target directory
for the vm dump data. The dumpdir contains the lockfile control file and
typically this script.
You can place this script in the dumpdir and run it from there. This way you
will get the very same script used for dumping, and cleanup, included in the
backup data and it will remain on the system for backup use. Be sure to
specify the dumpdir as the backup target in the BackupPC configuration for
the hypervisor host.
This script will not dump paused, or transitional vms. It will dump vms which
are active running and/or inactive shutdown at invocation time. It will not
dump images of vms which have their virtual disks set to read-only--it will
dump only the vm XML for these vms.
This script has not been tested with vms which are run without storage.
Furthermore it requires that the vm be setup with a single virtual disk.
Support for vms with multiple virtual disks is planned for near future.
'
exit
fi
echo "$date_time : **************** BEGIN $script_name version $version"
# Get current time in seconds
script_start_sec=$(date +%s)
if [ $CLEAN -eq 1 ]; then
echo " ==> Dump cleanup is requested. Proceeding..."
# Cleanup the lock file
if [ -e $DUMPDIR/${lockfile} ]; then
# Lockfile exists, delete it.
echo "Deleting $DUMPDIR/$lockfile ..."
rm -f "$DUMPDIR/$lockfile"
else
# Lockfile does not exist. Warn and continue.
echo "$DUMPDIR/$lockfile was not detected - continuing anyway ..."
fi
# Cleanup the dump area
if [ -e $DUMPDIR/${dump_subdir} ]; then
echo "Deleting data found at $DUMPDIR/$dump_subdir"
rm -rf "$DUMPDIR/$dump_subdir"
else
echo "No VM dump data detected at $DUMPDIR/$dump_subdir"
fi
# Keep the dump-subdir directory available for possible manual restores from the backup server.
# This may not be needed for restoration from backup server. It is here as a safeguard.
echo "create new empty dump area $DUMPDIR/$dump_subdir"
mkdir -p "$DUMPDIR/$dump_subdir"
print_run_time
date_time=$(date)
echo "$date_time : **************** COMPLETED $script_name"
# We are done, nothing else to do.
exit
fi
if [ $DUMP -eq 1 ]; then
operation_type="STANDARD IDLE dump"
echo " ==> Dump operation is requested"
echo " Parent dumpdir path is = $DUMPDIR"
echo " VM dump data path is = $DUMPDIR/$dump_subdir"
fi
if [ $CONCURRENT -eq 1 ]; then
operation_type="CONCURRENT dump"
echo " ==> A dump of type CONCURRENT is requested"
fi
# At this point in the script, all we have to do is dump or concurrent dump.
# We are not checking the $DUMP varable because there is no other decision to make.
# Proceeding with dump operations...
# Make sure that dumpdir directory exists
if ! [ -d $DUMPDIR/ ]; then
echo "$date_time : ERROR ABORT $script_name - $DUMPDIR does not exist!!"
exit
fi
# If the lockfile file exists, then we have not completed a previous backup operation.
# The backuppc server may still be collecting the dump from this host.
# The previous dump attempt may have prematurely failed.
# Do not start another dump if one is already in progress and/or the backuppc server has not yet invoked the dump cleanup script.
if [ -e $DUMPDIR/${lockfile} ]; then
echo " WARNING $script_name - WARNING backup and/or dump may already being runing."
echo " =====> detected lockfile at $DUMPDIR/$lockfile <===== "
echo " Dump may already be running. Use --clean option first to remove the lock and try again."
exit
else
# Create the lockfile and begin...
echo "$date_time dump processing started" > ${DUMPDIR}/${lockfile}
echo "$date_time : $DUMPDIR/$lockfile created, proceeding with dump operation..."
fi
# Create the vm dump subdirectory if it does not already exist.
mkdir -p "$DUMPDIR/$dump_subdir"
# Use virsh to get an array of running VMs on the hypervisor.
# We want only the names of the VMs that are in the running state.
vms_running=($(virsh list --state-running --name))
# Pause each of the running VMs to ready them for the dump operation as perscribed by the "concurrent" option.
# Dump each of the previously running VMs
if [ ${#vms_running[@]} -eq 0 ]; then
echo "$date_time : no running VMs detected"
else
# Suspend all running VMs only if we are not dumping concurrently.
if [ $CONCURRENT -eq 0 ]; then
for i in "${vms_running[@]}"; do
date_time=$(date)
echo "$date_time : suspending VM $i"
virsh suspend "$i"
done
fi
# Dump VMs by creation of a temporary clone.
for i in "${vms_running[@]}"
do
# If concurrent, then suspend the respective VM
if [ $CONCURRENT -eq 1 ]; then
date_time=$(date)
echo "$date_time : concurrently suspending VM $i"
virsh suspend "$i"
fi
# Perform the VM clone dump...
create_and_dump_temporary_clone "$i"
# If concurrent, then resume the respective VM
if [ $CONCURRENT -eq 1 ]; then
date_time=$(date)
echo "$date_time : concurrently resuming VM $i"
virsh resume "$i"
fi
done
fi
# Now we deal with the remaining VMs. The previously shutoff, or inactive ones.
vms_inactive=($(virsh list --inactive --name))
if [ ${#vms_inactive[@]} -eq 0 ]; then
echo "$date_time : no inactive VMs detected"
else
# Dump VMs by creation of a temporary clone.
for i in "${vms_inactive[@]}"
do
create_and_dump_temporary_clone "$i"
done
fi
if [ $CONCURRENT -eq 0 ]; then
# Finally we resume all of the previously running VMs, to restore them to originally running state after all of the dumps have completed.
if [ ${#vms_running[@]} -eq 0 ]; then
echo "$date_time : There are no running VMs to resume"
else
for i in "${vms_running[@]}"
do
date_time=$(date)
echo "$date_time : resuming VM $i"
virsh resume "$i"
done
fi
fi
print_run_time
date_time=$(date)
# We are done for now. Exit
echo "$date_time : **************** COMPLETED $script_name"
exit