#!/bin/bash # do a parallel backup and restore of specified files # # Copyright (C) 2008 Sun Microsystems, Inc. # # This file is part of Lustre, http://www.lustre.org. # # Lustre is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. # # Lustre is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Lustre; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # Author: Andreas Dilger # VERSION=1.1.0 export LC_ALL=C RSH=${RSH:-"ssh"} # use "bash -c" for local testing SPLITMB=${SPLITMB:-8192} # target chunk size (uncompressed) SPLITCOUNT=${SPLITCOUNT:-200} # number of files to give each client TAR=${TAR:-"tar"} # also htar in theory works (untested) #PROGPATH=$(which $0 2> /dev/null) || PROGPATH=$PWD/$0 case $0 in .*) PROGPATH=$PWD/$0 ;; *) PROGPATH=$0 ;; esac PROGNAME="$(basename $PROGPATH)" LOGPREFIX="$PROGNAME" log() { echo "$LOGPREFIX: $*" 1>&2 } fatal() { log "ERROR: $*" exit 1 } usage() { log "$*" echo "usage: $PROGNAME [-chjvxz] [-C directory] [-e rsh] [-i inputlist]" echo -e "\t\t[-l logdir] [-n nodes] [-s splitmb] [-T tar] -f ${FTYPE}base" echo -e "\t-c create archive" echo -e "\t-C directory: relative directory for filenames (default PWD)" echo -e "\t-e rsh: specify the passwordless remote shell (default $RSH)" if [ "$OP" = "backup" ]; then echo -e "\t-f outputfile: specify base output filename for backup" else echo -e "\t-f ${OP}filelist: specify list of files to $OP" fi echo -e "\t-h: print help message and exit (use -x -h for restore help)" if [ "$OP" = "backup" ]; then echo -e "\t-i inputfile: list of files to backup (default stdin)" fi echo -e "\t-j: use bzip2 compression on $FTYPE file(s)" echo -e "\t-l logdir: directory for output logs" echo -e "\t-n nodes: comma-separated list of client nodes to run ${OP}s" if [ "$OP" = "backup" ]; then echo -e "\t-s splitmb: target size for backup chunks " \ "(default ${SPLITMB}MiB)" echo -e "\t-S splitcount: number of files sent to each client "\ "(default ${SPLITCOUNT})" fi echo -e "\t-t: list table of contents of tarfile" echo -e "\t-T tar: specify the backup command (default $TAR)" echo -e "\t-v: be verbose - list all files being processed" echo -e "\t-V: print version number and exit" echo -e "\t-x: extract files instead of backing them up" echo -e "\t-z: use gzip compression on $FTYPE file(s)" exit 1 } usage_inactive() { usage "inactive argument '$1 $2' in '$3' mode" } set_op_type() { case $1 in *backup*) OP=backup; FTYPE=output; TAROP="-c" ;; *list*) OP=list; FTYPE=input; TAROP="-t"; SPLITCOUNT=1 ;; *restore*) OP=restore; FTYPE=input; TAROP="-x"; SPLITCOUNT=1 ;; *) FTYPE="output"; usage "unknown archive operation '$1'";; esac } #echo ARGV: "$@" # --fileonly, --remote are internal-use-only options TEMPARGS=$(getopt -n $LOGPREFIX -o cC:e:f:hi:jl:n:ps:S:tT:vVxz --long create,extract,list,restore,directory:,rsh:,outputbase:,help,inputfile:,bzip2,logdir:,nodes:,permissions:splitmb,splitcount,tar:,verbose,version,gzip,fileonly,remote: -- "$@") eval set -- "$TEMPARGS" set_op_type $PROGNAME # parse input arguments, and accumulate the client-specific args while true; do case "$1" in -c|--create) [ "$OP" != "backup" ] && usage "can't use $1 $TAROP at the same time" OP="backup"; ARGS="$ARGS $1"; shift ;; -C|--directory) GOTODIR="$2"; cd "$2" || usage "error cd to -C $2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -e|--rsh) RSH="$2"; shift 2;; -f|--outputbase)OUTPUTBASE="$2";ARGS="$ARGS $1 \"$2\""; shift 2 ;; -h|--help) ARGS=""; break;; -i|--inputfile) INPUT="$2"; shift 2;; -j|--bzip2) TARCOMP="-j"; ARGS="$ARGS $1"; shift ;; -l|--logdir) LOGDIR="$2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -n|--nodes) NODELIST="$NODELIST,$2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -p|--permissions) PERM="-p"; ARGS="$ARGS $1"; shift ;; -s|--splitmb) [ "$OP" != "backup" ] && usage_inactive $1 $2 $OP SPLITMB=$2; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -S|--splitcount)[ "$OP" != "backup" ] && usage_inactive $1 $2 $OP SPLITCOUNT=$2; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -t|--list) [ "$OP" != "backup" -a "$OP" != "list" ] && usage "can't use $1 $TAROP at the same time" OP="list"; ARGS="$ARGS $1"; shift ;; -T|--tar) TAR="$2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;; -v|--vebose) [ "$VERBOSE" = "-v" ] && set -vx # be extra verbose VERBOSE="-v"; ARGS="$ARGS $1"; shift ;; -V|--version) echo "$LOGPREFIX: version $VERSION"; exit 0;; -x|--extract|--restore) [ "$OP" != "backup" -a "$OP" != "restore" ] && usage "can't use $1 $TAROP at the same time" OP="restore"; ARGS="$ARGS $1"; shift ;; -z|--gzip) TARCOMP="-z"; ARGS="$ARGS $1"; shift ;; # these commands are for internal use only --remote) NODENUM="$2"; LOGPREFIX="$(hostname).$2"; shift 2;; --fileonly) FILEONLY="yes"; shift;; --) shift; break;; *) usage "unknown argument '$1'" 1>&2 ;; esac done set_op_type $OP #log "ARGS: $ARGS" [ -z "$ARGS" ] && usage "$OP a list of files, running on multiple nodes" # we should be able to use any backup tool that can accept filenames # from an input file instead of just pathnames on the command-line. # Unset TARCOMP for htar, as it doesn't support on-the-fly compression. TAREXT= case "$(basename $TAR)" in htar*) TARARG="-L"; TAROUT="-f"; TARCOMP=""; MINKB=0 ;; tar*|gnutar*|gtar*) TARARG="-T"; TAROUT="-b 2048 -f"; TAREXT=.tar ;; *) fatal "unknown archiver '$TAR'" ;; esac if [ "$OP" = "backup" ]; then [ -z "$OUTPUTBASE" ] && usage "'-f ${FTYPE}base' must be given for $OP" # Make sure we leave some margin free in the output filesystem for the # chunks. If we are dumping to a network filesystem (denoted by having # a ':' in the name, not sure how else to check) then we assume this # filesystem is shared among all clients and expect the other nodes # to also consume space there. OUTPUTFS=$(dirname $OUTPUTBASE) NETFS=$(df -P $OUTPUTFS | awk '/^[[:alnum:]]*:/ { print $1 }') MINKB=${MINKB:-$((SPLITMB * 2 * 1024))} [ "$NETFS" ] && MINKB=$(($(echo $NODELIST | tr ',' ' ' | wc -w) * $MINKB)) # Compress the output files as we go. case "$TARCOMP" in -z) TAREXT="$TAREXT.gz";; -j) TAREXT="$TAREXT.bz2";; esac else [ -z "$OUTPUTBASE" ] && usage "-f ${OP}filelist must be specified for $OP" # we want to be able to use this for a list of files to restore # but it is convenient to use $INPUT for reading the pathnames # of the tar files during restore/list operations to handle stdin [ "$INPUT" ] && usage "-i inputbase unsupported for $OP" INPUT=$OUTPUTBASE TARARG="" fi [ -z "$NODELIST" ] && NODELIST="localhost" # If we are writing to a char or block device (e.g. tape) don't add any suffix # We can't currently specify a different target device per client... if [ -b "$OUTPUTBASE" -o -c "$OUTPUTBASE" ]; then MINKB=0 [ -z "$LOGDIR" ] && LOGDIR="/var/log" LOGBASE="$LOGDIR/$PROGNAME" elif [ -d "$OUTPUTBASE" ]; then usage "-f $OUTPUTBASE must be a pathname, not a directory" else [ -z "$LOGDIR" ] && LOGBASE="$OUTPUTBASE" || LOGBASE="$LOGDIR/$PROGNAME" fi LOGBASE="$LOGBASE.$(date +%Y%m%d%H%M)" # tar up a sinle list of files into a chunk. We don't exit if there is an # error returned, since that might happen frequently with e.g. files moving # and no longer being available for backup. # usage: run_one_tar {file_list} {chunk_nr} {chunkbytes} DONE_MSG="FINISH_THIS_PROGRAM_NOW_I_TELL_YOU" KILL_MSG="EXIT_THIS_PROGRAM_NOW_I_TELL_YOU" run_one_backup() { #set -vx TMPLIST="$1" CHUNK="$2" CHUNKMB="$(($3 / 1048576))" if [ -b "$OUTPUTBASE" -o -c "$OUTPUTBASE" ]; then OUTFILE="$OUTPUTBASE" else OUTFILE="$OUTPUTBASE.$NODENUM.$CHUNK$TAREXT" fi CHUNKBASE="$LOGBASE.$NODENUM.$CHUNK" LISTFILE="$CHUNKBASE.list" LOG="$CHUNKBASE.log" cp "$TMPLIST" "$LISTFILE" SLEPT=0 FREEKB=$(df -P $OUTPUTFS 2> /dev/null | tail -n 1 | awk '{print $4}') while [ $FREEKB -lt $MINKB ]; do sleep 5 SLEPT=$((SLEPT + 5)) if [ $((SLEPT % 60)) -eq 10 ]; then log "waiting ${SLEPT}s for ${MINKB}kB free in $OUTPUTFS" fi FREEKB=$(df -P $OUTPUTFS | tail -n 1 | awk '{print $4}') done [ $SLEPT -gt 0 ] && log "waited ${SLEPT}s for space in ${OUTPUTFS}" log "$LISTFILE started - est. ${CHUNKMB}MB" START=$(date +%s) eval $TAR $TAROP $PERM $TARARG "$TMPLIST" -v $TARCOMP $TAROUT "$OUTFILE" \ 2>&1 >>"$LOG" | tee -a $LOG | grep -v "Removing leading" RC=${PIPESTATUS[0]} ELAPSE=$(($(date +%s) - START)) if [ $RC -eq 0 ]; then if [ -f "$OUTFILE" ]; then BYTES=$(stat -c '%s' "$OUTFILE") CHUNKMB=$((BYTES / 1048576)) log "$LISTFILE finished - act. ${CHUNKMB}MB/${ELAPSE}s" else log "$LISTFILE finished OK - ${ELAPSE}s" fi echo "OK" > $CHUNKBASE.done else echo "ERROR=$RC" > $CHUNKBASE.done log "ERROR: $LISTFILE exited with rc=$RC" fi rm $TMPLIST return $RC } run_one_restore_or_list() { #set -vx INPUTFILE="$1" LOG="$LOGBASE.$(basename $INPUTFILE).restore.log" SLEPT=0 while [ $MINKB != 0 -a ! -r "$INPUTFILE" ]; do SLEPT=$((SLEPT + 5)) if [ $((SLEPT % 60)) -eq 10 ]; then log "waiting ${SLEPT}s for $INPUTFILE staging" fi sleep 5 done [ $SLEPT -gt 0 ] && log "waited ${SLEPT}s for $INPUTFILE staging" log "$OP of $INPUTFILE started" START=$(date +%s) eval $TAR $TAROP -v $TARCOMP $TAROUT "$INPUTFILE" 2>&1 >>"$LOG" | tee -a "$LOG" | grep -v "Removing leading" RC=${PIPESTATUS[0]} ELAPSE=$(($(date +%s) - START)) [ "$OP" = "list" ] && cat $LOG if [ $RC -eq 0 ]; then log "$INPUTFILE finished OK - ${ELAPSE}s" echo "OK" > $INPUTFILE.restore.done else echo "ERROR=$RC" > $INPUTFILE.restore.done log "ERROR: $OP of $INPUTFILE exited with rc=$RC" fi return $RC } # Run as a remote command and read input filenames from stdin and create tar # output files of the requested size. The input filenames can either be: # "bytes filename" or "filename" depending on whether FILEONLY is set. # # Read filenames until we have either a large enough list of small files, # or we get a very large single file that is backed up by itself. run_remotely() { #set -vx log "started thread" RCMAX=0 [ "$FILEONLY" ] && PARAMS="FILENAME" || PARAMS="BYTES FILENAME" if [ "$OP" = "backup" ]; then TMPBASE=$PROGNAME.$LOGPREFIX.temp TMPFILE="$(mktemp -t $TMPBASE.$(date +%s).XXXXXXX)" OUTPUTFILENUM=0 SUMBYTES=0 fi BYTES="" while read $PARAMS; do [ "$FILENAME" = "$DONE_MSG" -o "$BYTES" = "$DONE_MSG" ] && break if [ "$FILENAME" = "$KILL_MSG" -o "$BYTES" = "$KILL_MSG" ]; then log "exiting $OP on request" [ "$TARPID" ] && kill -9 $TARPID 2> /dev/null exit 9 fi case "$OP" in list|restore) run_one_restore_or_list $FILENAME; RC=$? ;; backup) STAT=($(stat -c '%s %F' "$FILENAME")) [ "$FILEONLY" ] && BYTES=${STAT[0]} # if this is a directory that has files in it, it will # be backed up as part of this (or some other) backup. # Only include it in the backup if empty, otherwise # the files therein will be backed up multiple times if [ "${STAT[1]}" = "directory" ]; then NUM=`find "$FILENAME" -maxdepth 1|head -2|wc -l` [ "$NUM" -gt 1 ] && continue fi [ "$VERBOSE" ] && log "$FILENAME" # if a file is > 3/4 of chunk size, archive by itself # avoid shell math: 1024 * 1024 / (3/4) = 1398101 if [ $((BYTES / 1398101)) -gt $SPLITMB ]; then # create a very temp list for just this file TARLIST=$(mktemp -t $TMPBASE.$(date +%s).XXXXXX) echo "$FILENAME" > "$TARLIST" TARBYTES=$BYTES else SUMBYTES=$((SUMBYTES + BYTES)) echo "$FILENAME" >> $TMPFILE # not large enough input list, keep collecting [ $((SUMBYTES >> 20)) -lt $SPLITMB ] && continue TARBYTES=$SUMBYTES SUMBYTES=0 TARLIST="$TMPFILE" TMPFILE=$(mktemp -t $TMPBASE.$(date +%s).XXXXXXX) fi wait $TARPID RC=$? run_one_backup "$TARLIST" "$OUTPUTFILENUM" $TARBYTES & TARPID=$! OUTPUTFILENUM=$((OUTPUTFILENUM + 1)) ;; esac [ $RC -gt $RCMAX ] && RCMAX=$RC done if [ "$TARPID" ]; then wait $TARPID RC=$? [ $RC -gt $RCMAX ] && RCMAX=$RC fi if [ -s "$TMPFILE" ]; then run_one_backup "$TMPFILE" "$OUTPUTFILENUM" $SUMBYTES RC=$? [ $RC -gt $RCMAX ] && RCMAX=$RC fi exit $RCMAX } # If we are a client then just run that subroutine and exit [ "$NODENUM" ] && run_remotely && exit 0 # Tell the clients to exit. Their input pipes might be busy so it may # take a while for them to consume the files and finish. CLEANING=no cleanup() { log "cleaning up remote processes" for FD in $(seq $BASEFD $((BASEFD + NUMCLI - 1))); do echo "$DONE_MSG" >&$FD done CLEANING=yes SLEPT=0 RUN=$(ps auxww | egrep -v "grep|bash" | grep -c "$PROGNAME.*remote") while [ $RUN -gt 0 ]; do set +vx #ps auxww | grep "$PROGNAME.*remote" | egrep -v "grep|bash" sleep 1 SLEPT=$((SLEPT + 1)) [ $((SLEPT % 30)) -eq 0 ] && log "wait for $RUN processes to finish" [ $((SLEPT % 300)) -eq 0 ] && ps auxww |grep "$PROGNAME.*remote" |egrep -v "grep|bash" RUN=$(ps auxww|egrep -v "grep|bash"|grep -c "$PROGNAME.*remote") done trap 0 } do_cleanup() { if [ "$CLEANING" = "yes" ]; then log "killing all remote processes - may not stop immediately" for FD in $(seq $BASEFD $((BASEFD + NUMCLI - 1))); do echo "$KILL_MSG" >&$FD done sleep 1 PROCS=$(ps auxww|awk '/$PROGNAME.*remote/ { print $2 }') [ "$PROCS" ] && kill -9 $PROCS trap 0 fi cleanup } # values that only need to be determined on the master # always read from stdin, even if it is a file, to be more consistent case "$INPUT" in -|"") INPUT="standard input";; *) if [ ! -r "$INPUT" ]; then [ "$VERBOSE" ] && ls -l "$INPUT" usage "can't read input file '$INPUT'" fi exec <$INPUT ;; esac # if unspecified, run remote clients in the current PWD to get correct paths [ -z "$GOTODIR" ] && ARGS="$ARGS -C \"$PWD\"" # main() BASEFD=100 NUMCLI=0 # Check if the input list has the file size specified or not. Input # lines should be of the form "{bytes} {filename}" or "{filename}". # If no size is given then the file sizes are determined by the clients # to do the chunking (useful for making a full backup, but not as good # at evenly distributing the data among clients). In rare cases the first # file specified may have a blank line and no size - check that as well. if [ "$OP" = "backup" ]; then read BYTES FILENAME if [ -z "$FILENAME" -a -e "$BYTES" ]; then FILENAME="$BYTES" BYTES="" FILEONLY="yes" && ARGS="$ARGS --fileonly" elif [ -e "$BYTES $FILENAME" ]; then FILENAME="$BYTES $FILENAME" BYTES="" FILEONLY="yes" && ARGS="$ARGS --fileonly" elif [ ! -e "$FILENAME" ]; then log "input was '$BYTES $FILENAME'" fatal "first line of '$INPUT' is not a file" fi else FILEONLY="yes" && ARGS="$ARGS --fileonly" fi # kill the $RSH processes if we get a signal trap do_cleanup INT EXIT # start up the remote processes, each one with its stdin attached to a # different output file descriptor, so that we can communicate with them # individually when sending files to back up. We generate a remote log # file and also return output to this process. for CLIENT in $(echo $NODELIST | tr ',' ' '); do FD=$((BASEFD+NUMCLI)) LOG=$OUTPUTBASE.$CLIENT.$FD.log eval "exec $FD> >($RSH $CLIENT '$PROGPATH --remote=$NUMCLI $ARGS')" RC=$? if [ $RC -eq 0 ]; then log "starting $0.$NUMCLI on $CLIENT" NUMCLI=$((NUMCLI + 1)) else log "ERROR: failed '$RSH $CLIENT $PROGPATH': RC=$?" fi done if [ $NUMCLI -eq 0 ]; then fatal "unable to start any threads" fi CURRCLI=0 # We don't want to use "BYTES FILENAME" if the input doesn't include the # size, as this might cause problems with files with whitespace in them. # Instead we just have two different loops depending on whether the size # is in the input file or not. We dish out the files either by size # (to fill a chunk), or just round-robin and hope for the best. if [ "$FILEONLY" ]; then if [ "$FILENAME" ]; then [ "$VERBOSE" ] && log "$FILENAME" echo "$FILENAME" 1>&$BASEFD # rewrite initial line fi # if we don't know the size, just round-robin among the clients while read FILENAME; do FD=$((BASEFD+CURRCLI)) [ -n "$VERBOSE" -a "$OP" != "backup" ] && log "$OP $FILENAME" echo "$FILENAME" 1>&$FD COUNT=$((COUNT + 1)) if [ $COUNT -ge $SPLITCOUNT ]; then CURRCLI=$(((CURRCLI + 1) % NUMCLI)) COUNT=0 fi done else [ "$VERBOSE" ] && log "$FILENAME" echo $BYTES "$FILENAME" 1>&$BASEFD # rewrite initial line # if we know the size, then give each client enough to start a chunk while read BYTES FILENAME; do FD=$((BASEFD+CURRCLI)) [ "$VERBOSE" ] && log "$FILENAME" echo $BYTES "$FILENAME" >&$FD # take tar blocking factor into account [ $BYTES -lt 10240 ] && BYTES=10240 SUMBYTES=$((SUMBYTES + BYTES)) if [ $((SUMBYTES / 1048576)) -ge $SPLITMB ]; then CURRCLI=$(((CURRCLI + 1) % NUMCLI)) SUMBYTES=0 fi done fi # Once all of the files have been given out, wait for the remote processes # to complete. That might take a while depending on the size of the backup. cleanup