2 # do a parallel backup and restore of specified files
4 # Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
6 # This file is part of Lustre, http://www.lustre.org.
8 # Lustre is free software; you can redistribute it and/or
9 # modify it under the terms of version 2 of the GNU General Public
10 # License as published by the Free Software Foundation.
12 # Lustre is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Lustre; if not, write to the Free Software
19 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 # Author: Andreas Dilger <adilger@sun.com>
28 RSH=${RSH:-"ssh"} # use "bash -c" for local testing
29 SPLITMB=${SPLITMB:-8192} # target chunk size (uncompressed)
30 SPLITCOUNT=${SPLITCOUNT:-200} # number of files to give each client
31 TAR=${TAR:-"tar"} # also htar in theory works (untested)
33 #PROGPATH=$(which $0 2> /dev/null) || PROGPATH=$PWD/$0
35 .*) PROGPATH=$PWD/$0 ;;
39 PROGNAME="$(basename $PROGPATH)"
42 echo "$LOGPREFIX: $*" 1>&2
52 echo "usage: $PROGNAME [-chjvxz] [-C directory] [-e rsh] [-i inputlist]"
53 echo -e "\t\t[-l logdir] [-n nodes] [-s splitmb] [-T tar] -f ${FTYPE}base"
54 echo -e "\t-c create archive"
55 echo -e "\t-C directory: relative directory for filenames (default PWD)"
56 echo -e "\t-e rsh: specify the passwordless remote shell (default $RSH)"
57 if [ "$OP" = "backup" ]; then
58 echo -e "\t-f outputfile: specify base output filename for backup"
60 echo -e "\t-f ${OP}filelist: specify list of files to $OP"
62 echo -e "\t-h: print help message and exit (use -x -h for restore help)"
63 if [ "$OP" = "backup" ]; then
64 echo -e "\t-i inputfile: list of files to backup (default stdin)"
66 echo -e "\t-j: use bzip2 compression on $FTYPE file(s)"
67 echo -e "\t-l logdir: directory for output logs"
68 echo -e "\t-n nodes: comma-separated list of client nodes to run ${OP}s"
69 if [ "$OP" = "backup" ]; then
70 echo -e "\t-s splitmb: target size for backup chunks " \
71 "(default ${SPLITMB}MiB)"
72 echo -e "\t-S splitcount: number of files sent to each client "\
73 "(default ${SPLITCOUNT})"
75 echo -e "\t-t: list table of contents of tarfile"
76 echo -e "\t-T tar: specify the backup command (default $TAR)"
77 echo -e "\t-v: be verbose - list all files being processed"
78 echo -e "\t-V: print version number and exit"
79 echo -e "\t-x: extract files instead of backing them up"
80 echo -e "\t-z: use gzip compression on $FTYPE file(s)"
85 usage "inactive argument '$1 $2' in '$3' mode"
90 *backup*) OP=backup; FTYPE=output; TAROP="-c" ;;
91 *list*) OP=list; FTYPE=input; TAROP="-t"; SPLITCOUNT=1 ;;
92 *restore*) OP=restore; FTYPE=input; TAROP="-x"; SPLITCOUNT=1 ;;
93 *) FTYPE="output"; usage "unknown archive operation '$1'";;
99 # --fileonly, --remote are internal-use-only options
100 TEMPARGS=$(getopt -n $LOGPREFIX -o cC:e:f:hi:jl:n:ps:S:tT:vVxz --long create,extract,list,restore,directory:,rsh:,outputbase:,help,inputfile:,bzip2,logdir:,nodes:,permissions:splitmb,splitcount,tar:,verbose,version,gzip,fileonly,remote: -- "$@")
102 eval set -- "$TEMPARGS"
104 set_op_type $PROGNAME
106 # parse input arguments, and accumulate the client-specific args
109 -c|--create) [ "$OP" != "backup" ] &&
110 usage "can't use $1 $TAROP at the same time"
111 OP="backup"; ARGS="$ARGS $1"; shift ;;
112 -C|--directory) GOTODIR="$2"; cd "$2" || usage "error cd to -C $2";
113 ARGS="$ARGS $1 \"$2\""; shift 2 ;;
114 -e|--rsh) RSH="$2"; shift 2;;
115 -f|--outputbase)OUTPUTBASE="$2";ARGS="$ARGS $1 \"$2\""; shift 2 ;;
116 -h|--help) ARGS=""; break;;
117 -i|--inputfile) INPUT="$2"; shift 2;;
118 -j|--bzip2) TARCOMP="-j"; ARGS="$ARGS $1"; shift ;;
119 -l|--logdir) LOGDIR="$2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;;
120 -n|--nodes) NODELIST="$NODELIST,$2";
121 ARGS="$ARGS $1 \"$2\""; shift 2 ;;
122 -p|--permissions) PERM="-p"; ARGS="$ARGS $1"; shift ;;
123 -s|--splitmb) [ "$OP" != "backup" ] && usage_inactive $1 $2 $OP
124 SPLITMB=$2; ARGS="$ARGS $1 \"$2\""; shift 2 ;;
125 -S|--splitcount)[ "$OP" != "backup" ] && usage_inactive $1 $2 $OP
126 SPLITCOUNT=$2; ARGS="$ARGS $1 \"$2\""; shift 2 ;;
127 -t|--list) [ "$OP" != "backup" -a "$OP" != "list" ] &&
128 usage "can't use $1 $TAROP at the same time"
129 OP="list"; ARGS="$ARGS $1"; shift ;;
130 -T|--tar) TAR="$2"; ARGS="$ARGS $1 \"$2\""; shift 2 ;;
131 -v|--vebose) [ "$VERBOSE" = "-v" ] && set -vx # be extra verbose
132 VERBOSE="-v"; ARGS="$ARGS $1"; shift ;;
133 -V|--version) echo "$LOGPREFIX: version $VERSION"; exit 0;;
134 -x|--extract|--restore)
135 [ "$OP" != "backup" -a "$OP" != "restore" ] &&
136 usage "can't use $1 $TAROP at the same time"
137 OP="restore"; ARGS="$ARGS $1"; shift ;;
138 -z|--gzip) TARCOMP="-z"; ARGS="$ARGS $1"; shift ;;
139 # these commands are for internal use only
140 --remote) NODENUM="$2"; LOGPREFIX="$(hostname).$2"; shift 2;;
141 --fileonly) FILEONLY="yes"; shift;;
143 *) usage "unknown argument '$1'" 1>&2 ;;
151 [ -z "$ARGS" ] && usage "$OP a list of files, running on multiple nodes"
153 # we should be able to use any backup tool that can accept filenames
154 # from an input file instead of just pathnames on the command-line.
155 # Unset TARCOMP for htar, as it doesn't support on-the-fly compression.
157 case "$(basename $TAR)" in
158 htar*) TARARG="-L"; TAROUT="-f"; TARCOMP=""; MINKB=0 ;;
159 tar*|gnutar*|gtar*) TARARG="-T"; TAROUT="-b 2048 -f"; TAREXT=.tar ;;
160 *) fatal "unknown archiver '$TAR'" ;;
163 if [ "$OP" = "backup" ]; then
164 [ -z "$OUTPUTBASE" ] && usage "'-f ${FTYPE}base' must be given for $OP"
165 # Make sure we leave some margin free in the output filesystem for the
166 # chunks. If we are dumping to a network filesystem (denoted by having
167 # a ':' in the name, not sure how else to check) then we assume this
168 # filesystem is shared among all clients and expect the other nodes
169 # to also consume space there.
170 OUTPUTFS=$(dirname $OUTPUTBASE)
171 NETFS=$(df -P $OUTPUTFS | awk '/^[[:alnum:]]*:/ { print $1 }')
172 MINKB=${MINKB:-$((SPLITMB * 2 * 1024))}
173 [ "$NETFS" ] && MINKB=$(($(echo $NODELIST | tr ',' ' ' | wc -w) * $MINKB))
175 # Compress the output files as we go.
177 -z) TAREXT="$TAREXT.gz";;
178 -j) TAREXT="$TAREXT.bz2";;
181 [ -z "$OUTPUTBASE" ] &&
182 usage "-f ${OP}filelist must be specified for $OP"
183 # we want to be able to use this for a list of files to restore
184 # but it is convenient to use $INPUT for reading the pathnames
185 # of the tar files during restore/list operations to handle stdin
186 [ "$INPUT" ] && usage "-i inputbase unsupported for $OP"
191 [ -z "$NODELIST" ] && NODELIST="localhost"
193 # If we are writing to a char or block device (e.g. tape) don't add any suffix
194 # We can't currently specify a different target device per client...
195 if [ -b "$OUTPUTBASE" -o -c "$OUTPUTBASE" ]; then
197 [ -z "$LOGDIR" ] && LOGDIR="/var/log"
198 LOGBASE="$LOGDIR/$PROGNAME"
199 elif [ -d "$OUTPUTBASE" ]; then
200 usage "-f $OUTPUTBASE must be a pathname, not a directory"
202 [ -z "$LOGDIR" ] && LOGBASE="$OUTPUTBASE" || LOGBASE="$LOGDIR/$PROGNAME"
204 LOGBASE="$LOGBASE.$(date +%Y%m%d%H%M)"
206 # tar up a sinle list of files into a chunk. We don't exit if there is an
207 # error returned, since that might happen frequently with e.g. files moving
208 # and no longer being available for backup.
209 # usage: run_one_tar {file_list} {chunk_nr} {chunkbytes}
210 DONE_MSG="FINISH_THIS_PROGRAM_NOW_I_TELL_YOU"
211 KILL_MSG="EXIT_THIS_PROGRAM_NOW_I_TELL_YOU"
216 CHUNKMB="$(($3 / 1048576))"
217 if [ -b "$OUTPUTBASE" -o -c "$OUTPUTBASE" ]; then
218 OUTFILE="$OUTPUTBASE"
220 OUTFILE="$OUTPUTBASE.$NODENUM.$CHUNK$TAREXT"
222 CHUNKBASE="$LOGBASE.$NODENUM.$CHUNK"
223 LISTFILE="$CHUNKBASE.list"
226 cp "$TMPLIST" "$LISTFILE"
229 FREEKB=$(df -P $OUTPUTFS 2> /dev/null | tail -n 1 | awk '{print $4}')
230 while [ $FREEKB -lt $MINKB ]; do
233 if [ $((SLEPT % 60)) -eq 10 ]; then
234 log "waiting ${SLEPT}s for ${MINKB}kB free in $OUTPUTFS"
236 FREEKB=$(df -P $OUTPUTFS | tail -n 1 | awk '{print $4}')
238 [ $SLEPT -gt 0 ] && log "waited ${SLEPT}s for space in ${OUTPUTFS}"
239 log "$LISTFILE started - est. ${CHUNKMB}MB"
241 eval $TAR $TAROP $PERM $TARARG "$TMPLIST" -v $TARCOMP $TAROUT "$OUTFILE" \
242 2>&1 >>"$LOG" | tee -a $LOG | grep -v "Removing leading"
244 ELAPSE=$(($(date +%s) - START))
245 if [ $RC -eq 0 ]; then
246 if [ -f "$OUTFILE" ]; then
247 BYTES=$(stat -c '%s' "$OUTFILE")
248 CHUNKMB=$((BYTES / 1048576))
249 log "$LISTFILE finished - act. ${CHUNKMB}MB/${ELAPSE}s"
251 log "$LISTFILE finished OK - ${ELAPSE}s"
253 echo "OK" > $CHUNKBASE.done
255 echo "ERROR=$RC" > $CHUNKBASE.done
256 log "ERROR: $LISTFILE exited with rc=$RC"
262 run_one_restore_or_list() {
265 LOG="$LOGBASE.$(basename $INPUTFILE).restore.log"
268 while [ $MINKB != 0 -a ! -r "$INPUTFILE" ]; do
270 if [ $((SLEPT % 60)) -eq 10 ]; then
271 log "waiting ${SLEPT}s for $INPUTFILE staging"
275 [ $SLEPT -gt 0 ] && log "waited ${SLEPT}s for $INPUTFILE staging"
276 log "$OP of $INPUTFILE started"
278 eval $TAR $TAROP -v $TARCOMP $TAROUT "$INPUTFILE" 2>&1 >>"$LOG" |
279 tee -a "$LOG" | grep -v "Removing leading"
281 ELAPSE=$(($(date +%s) - START))
282 [ "$OP" = "list" ] && cat $LOG
283 if [ $RC -eq 0 ]; then
284 log "$INPUTFILE finished OK - ${ELAPSE}s"
285 echo "OK" > $INPUTFILE.restore.done
287 echo "ERROR=$RC" > $INPUTFILE.restore.done
288 log "ERROR: $OP of $INPUTFILE exited with rc=$RC"
293 # Run as a remote command and read input filenames from stdin and create tar
294 # output files of the requested size. The input filenames can either be:
295 # "bytes filename" or "filename" depending on whether FILEONLY is set.
297 # Read filenames until we have either a large enough list of small files,
298 # or we get a very large single file that is backed up by itself.
304 [ "$FILEONLY" ] && PARAMS="FILENAME" || PARAMS="BYTES FILENAME"
306 if [ "$OP" = "backup" ]; then
307 TMPBASE=$PROGNAME.$LOGPREFIX.temp
308 TMPFILE="$(mktemp -t $TMPBASE.$(date +%s).XXXXXXX)"
314 while read $PARAMS; do
315 [ "$FILENAME" = "$DONE_MSG" -o "$BYTES" = "$DONE_MSG" ] && break
316 if [ "$FILENAME" = "$KILL_MSG" -o "$BYTES" = "$KILL_MSG" ]; then
317 log "exiting $OP on request"
318 [ "$TARPID" ] && kill -9 $TARPID 2> /dev/null
324 run_one_restore_or_list $FILENAME; RC=$?
326 backup) STAT=($(stat -c '%s %F' "$FILENAME"))
327 [ "$FILEONLY" ] && BYTES=${STAT[0]}
328 # if this is a directory that has files in it, it will
329 # be backed up as part of this (or some other) backup.
330 # Only include it in the backup if empty, otherwise
331 # the files therein will be backed up multiple times
332 if [ "${STAT[1]}" = "directory" ]; then
333 NUM=`find "$FILENAME" -maxdepth 1|head -2|wc -l`
334 [ "$NUM" -gt 1 ] && continue
336 [ "$VERBOSE" ] && log "$FILENAME"
338 # if a file is > 3/4 of chunk size, archive by itself
339 # avoid shell math: 1024 * 1024 / (3/4) = 1398101
340 if [ $((BYTES / 1398101)) -gt $SPLITMB ]; then
341 # create a very temp list for just this file
342 TARLIST=$(mktemp -t $TMPBASE.$(date +%s).XXXXXX)
343 echo "$FILENAME" > "$TARLIST"
346 SUMBYTES=$((SUMBYTES + BYTES))
347 echo "$FILENAME" >> $TMPFILE
349 # not large enough input list, keep collecting
350 [ $((SUMBYTES >> 20)) -lt $SPLITMB ] && continue
355 TMPFILE=$(mktemp -t $TMPBASE.$(date +%s).XXXXXXX)
360 run_one_backup "$TARLIST" "$OUTPUTFILENUM" $TARBYTES &
362 OUTPUTFILENUM=$((OUTPUTFILENUM + 1))
366 [ $RC -gt $RCMAX ] && RCMAX=$RC
369 if [ "$TARPID" ]; then
372 [ $RC -gt $RCMAX ] && RCMAX=$RC
375 if [ -s "$TMPFILE" ]; then
376 run_one_backup "$TMPFILE" "$OUTPUTFILENUM" $SUMBYTES
378 [ $RC -gt $RCMAX ] && RCMAX=$RC
383 # If we are a client then just run that subroutine and exit
384 [ "$NODENUM" ] && run_remotely && exit 0
386 # Tell the clients to exit. Their input pipes might be busy so it may
387 # take a while for them to consume the files and finish.
390 log "cleaning up remote processes"
391 for FD in $(seq $BASEFD $((BASEFD + NUMCLI - 1))); do
392 echo "$DONE_MSG" >&$FD
397 RUN=$(ps auxww | egrep -v "grep|bash" | grep -c "$PROGNAME.*remote")
398 while [ $RUN -gt 0 ]; do
400 #ps auxww | grep "$PROGNAME.*remote" | egrep -v "grep|bash"
403 [ $((SLEPT % 30)) -eq 0 ] &&
404 log "wait for $RUN processes to finish"
405 [ $((SLEPT % 300)) -eq 0 ] &&
406 ps auxww |grep "$PROGNAME.*remote" |egrep -v "grep|bash"
407 RUN=$(ps auxww|egrep -v "grep|bash"|grep -c "$PROGNAME.*remote")
413 if [ "$CLEANING" = "yes" ]; then
414 log "killing all remote processes - may not stop immediately"
415 for FD in $(seq $BASEFD $((BASEFD + NUMCLI - 1))); do
416 echo "$KILL_MSG" >&$FD
419 PROCS=$(ps auxww|awk '/$PROGNAME.*remote/ { print $2 }')
420 [ "$PROCS" ] && kill -9 $PROCS
427 # values that only need to be determined on the master
428 # always read from stdin, even if it is a file, to be more consistent
430 -|"") INPUT="standard input";;
431 *) if [ ! -r "$INPUT" ]; then
432 [ "$VERBOSE" ] && ls -l "$INPUT"
433 usage "can't read input file '$INPUT'"
438 # if unspecified, run remote clients in the current PWD to get correct paths
439 [ -z "$GOTODIR" ] && ARGS="$ARGS -C \"$PWD\""
445 # Check if the input list has the file size specified or not. Input
446 # lines should be of the form "{bytes} {filename}" or "{filename}".
447 # If no size is given then the file sizes are determined by the clients
448 # to do the chunking (useful for making a full backup, but not as good
449 # at evenly distributing the data among clients). In rare cases the first
450 # file specified may have a blank line and no size - check that as well.
451 if [ "$OP" = "backup" ]; then
453 if [ -z "$FILENAME" -a -e "$BYTES" ]; then
456 FILEONLY="yes" && ARGS="$ARGS --fileonly"
457 elif [ -e "$BYTES $FILENAME" ]; then
458 FILENAME="$BYTES $FILENAME"
460 FILEONLY="yes" && ARGS="$ARGS --fileonly"
461 elif [ ! -e "$FILENAME" ]; then
462 log "input was '$BYTES $FILENAME'"
463 fatal "first line of '$INPUT' is not a file"
466 FILEONLY="yes" && ARGS="$ARGS --fileonly"
469 # kill the $RSH processes if we get a signal
470 trap do_cleanup INT EXIT
472 # start up the remote processes, each one with its stdin attached to a
473 # different output file descriptor, so that we can communicate with them
474 # individually when sending files to back up. We generate a remote log
475 # file and also return output to this process.
476 for CLIENT in $(echo $NODELIST | tr ',' ' '); do
477 FD=$((BASEFD+NUMCLI))
478 LOG=$OUTPUTBASE.$CLIENT.$FD.log
479 eval "exec $FD> >($RSH $CLIENT '$PROGPATH --remote=$NUMCLI $ARGS')"
481 if [ $RC -eq 0 ]; then
482 log "starting $0.$NUMCLI on $CLIENT"
483 NUMCLI=$((NUMCLI + 1))
485 log "ERROR: failed '$RSH $CLIENT $PROGPATH': RC=$?"
489 if [ $NUMCLI -eq 0 ]; then
490 fatal "unable to start any threads"
494 # We don't want to use "BYTES FILENAME" if the input doesn't include the
495 # size, as this might cause problems with files with whitespace in them.
496 # Instead we just have two different loops depending on whether the size
497 # is in the input file or not. We dish out the files either by size
498 # (to fill a chunk), or just round-robin and hope for the best.
499 if [ "$FILEONLY" ]; then
500 if [ "$FILENAME" ]; then
501 [ "$VERBOSE" ] && log "$FILENAME"
502 echo "$FILENAME" 1>&$BASEFD # rewrite initial line
504 # if we don't know the size, just round-robin among the clients
505 while read FILENAME; do
506 FD=$((BASEFD+CURRCLI))
507 [ -n "$VERBOSE" -a "$OP" != "backup" ] && log "$OP $FILENAME"
508 echo "$FILENAME" 1>&$FD
511 if [ $COUNT -ge $SPLITCOUNT ]; then
512 CURRCLI=$(((CURRCLI + 1) % NUMCLI))
517 [ "$VERBOSE" ] && log "$FILENAME"
518 echo $BYTES "$FILENAME" 1>&$BASEFD # rewrite initial line
519 # if we know the size, then give each client enough to start a chunk
520 while read BYTES FILENAME; do
521 FD=$((BASEFD+CURRCLI))
522 [ "$VERBOSE" ] && log "$FILENAME"
523 echo $BYTES "$FILENAME" >&$FD
525 # take tar blocking factor into account
526 [ $BYTES -lt 10240 ] && BYTES=10240
527 SUMBYTES=$((SUMBYTES + BYTES))
528 if [ $((SUMBYTES / 1048576)) -ge $SPLITMB ]; then
529 CURRCLI=$(((CURRCLI + 1) % NUMCLI))
535 # Once all of the files have been given out, wait for the remote processes
536 # to complete. That might take a while depending on the size of the backup.