#!/bin/bash
-# set -x
-set -e
# lfs_migrate: a simple tool to copy and check files.
#
# to be 100% safe the administrator needs to ensure this is safe.
RSYNC=${RSYNC:-rsync}
+OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false}
ECHO=echo
LFS=${LFS:-lfs}
+RSYNC_WITH_HLINKS=false
+LFS_MIGRATE_TMP=${TMPDIR:-/tmp}
+MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)"
+NEWNAME=""
+REMOVE_FID='s/^\[[0-9a-fx:]*\] //'
+PROG=$(basename $0)
+
+add_to_set() {
+ local old_fid="$1"
+ local path="$2"
+
+ echo "$old_fid $path" >> "$MIGRATED_SET"
+}
+
+path_in_set() {
+ local path="$1"
+
+ sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$"
+}
+
+old_fid_in_set() {
+ local old_fid="$1"
+
+ grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 |
+ sed -e "$REMOVE_FID"
+}
usage() {
cat -- <<USAGE 1>&2
-usage: lfs_migrate [-c|-s] [-h] [-l] [-n] [-y] [file|dir ...]
- -c compare file data after migrate (default)
- -s skip file data comparison after migrate
- -h show this usage message
- -l migrate files with hard links (skip by default)
- -n only print the names of files to be migrated
- -q run quietly (don't print filenames or status)
- -y answer 'y' to usage question
+usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q]
+ [--auto-stripe|-A [-C <cap>]
+ [--min-free|-M <min_free>] [--max-free|-X <max_free>]]
+ [--pool|-p <pool>] [--stripe-count|-c <stripe_count>]
+ [--stripe-size|-S <stripe_size>]
+ [-D] [-h] [-n] [-S]
+ [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0]
+ [FILE|DIR...]
+ -A restripe file using an automatically selected stripe count,
+ uses stripe_count = sqrt(size_in_GB) + 1
+ -c <stripe_count>
+ restripe file using the specified <stripe_count>
+ -C <cap> when -A is set, limit the migrated file to use on each OST
+ at most 1/<cap> of the available space of the smallest OST
+ -D do not use direct I/O to copy file contents
+ -h show this usage message
+ -M <min_free>
+ when -A is set, an OST must contain more available space than
+ <min_free> KB in order for it to be considered available for
+ use in the migration
+ --no-rsync do not fall back to rsync mode even if lfs migrate fails
+ -n only print the names of files to be migrated
+ -p <pool> use the specified OST pool for the destination file
+ -q run quietly (don't print filenames or status)
+ --rsync force rsync mode instead of using lfs migrate
+ -R restripe file using default directory striping
+ -s skip file data comparison after migrate
+ -S <stripe_size>
+ restripe file using the specified stripe size
+ -v show verbose debug messages
+ -X <max_free>
+ when -A is set, limit the amount of space on each OST that
+ can be considered available for the migration to
+ <max_free> KB
+ -y answer 'y' to usage question
+ -0 input file names on stdin are separated by a null character
+
+Options '-A', '-c', and '-R' are mutually exclusive.
+Options '-C', '-M', and '-X' are ignored if '-A' is not set.
+
+The --rsync and --no-rsync options may not be specified at the same time.
If a directory is an argument, all files in the directory are migrated.
If no file/directory is given, the file list is read from standard input.
-e.g.: lfs_migrate /mnt/lustre/file
+Any arguments that are not explicitly recognized by the script are passed
+through to the 'lfs migrate' utility.
+
+Examples:
+ lfs_migrate /mnt/lustre/dir
+ lfs_migrate -p newpool /mnt/lustre/dir
lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y
USAGE
exit 1
}
-OPT_CHECK=y
-
-while getopts "chlnqsy" opt $*; do
- case $opt in
- c) OPT_CHECK=y;;
- l) OPT_NLINK=y;;
- n) OPT_DRYRUN=n; OPT_YES=y;;
- q) ECHO=:;;
- s) OPT_CHECK="";;
- y) OPT_YES=y;;
- h|\?) usage;;
- esac
+cleanup() {
+ rm -f "$MIGRATED_SET"
+ [ -n "$NEWNAME" ] && rm -f "$NEWNAME"
+}
+
+trap cleanup EXIT
+
+OPT_CHECK=true
+OPT_DEBUG=false
+OPT_DRYRUN=false
+OPT_FILE=()
+OPT_LAYOUT=()
+OPT_COMP=false
+OPT_NO_RSYNC=false
+OPT_NO_DIRECT=false
+OPT_NULL=false
+OPT_PASSTHROUGH=()
+OPT_POOL=""
+OPT_RESTRIPE=false
+OPT_YES=false
+LFS_OPT_DIRECTIO=""
+OPT_AUTOSTRIPE=false
+OPT_STRIPE_COUNT=""
+OPT_STRIPE_SIZE=""
+OPT_MINFREE=262144
+OPT_MAXFREE=""
+OPT_CAP=100
+
+# Examine any long options and arguments. getopts does not support long
+# options, so they must be stripped out and classified as either options
+# for the script, or passed through to "lfs migrate".
+while [ -n "$*" ]; do
+ arg="$1"
+ case "$arg" in
+ -h|--help) usage;;
+ -l|--link) ;; # maintained backward compatibility for now
+ -n) OPT_DRYRUN=true; OPT_YES=true
+ echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;;
+ --dry-run) OPT_DRYRUN=true; OPT_YES=true;;
+ -p|--pool) OPT_POOL="$arg $2"; OPT_LAYOUT+="$OPT_POOL "; shift;;
+ -q|--quiet) ECHO=:;;
+ -R|--restripe) OPT_RESTRIPE=true;;
+ -s|--skip) OPT_CHECK=false;;
+ -v|--verbose) OPT_DEBUG=true; ECHO=echo;;
+ -y|--yes) OPT_YES=true;;
+ -0) OPT_NULL=true;;
+ -b|--block|--non-block|--non-direct|--no-verify)
+ # Always pass non-layout options to 'lfs migrate'
+ OPT_PASSTHROUGH+=("$arg");;
+ --rsync) OPT_RSYNC=true;;
+ --no-rsync) OPT_NO_RSYNC=true;;
+ --copy|--yaml|--file) OPT_COMP=true;
+ # these options have files as arguments, pass both through
+ OPT_LAYOUT+="$arg $2 "; shift;;
+ --auto-stripe|-A) OPT_AUTOSTRIPE=true;;
+ -C) OPT_CAP="$2"; shift;;
+ -D) LFS_OPT_DIRECTIO="-D";;
+ -M|--min-free) OPT_MINFREE="$2"; shift;;
+ -X|--max-free) OPT_MAXFREE="$2"; shift;;
+ -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;;
+ -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;;
+ *) # Pass other non-file layout options to 'lfs migrate'
+ [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg "
+ esac
+ shift
done
-shift $((OPTIND - 1))
-if [ -z "$OPT_YES" ]; then
+if $OPT_RESTRIPE || $OPT_AUTOSTRIPE && [ -n "$OPT_LAYOUT" ]; then
+ echo "$PROG error: Options '$OPT_LAYOUT' can't be used with -R or -A" \
+ 1>&2
+ exit 1
+elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then
+ echo "$PROG error: Option -R can't be used with -c or -S" 1>&2
+ exit 1
+elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then
+ echo "$PROG error: Option -A can't be used with -c" 1>&2
+ exit 1
+elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then
+ echo "$PROG error: Option -A can't be used with -R" 1>&2
+ exit 1
+fi
+
+if $OPT_RSYNC && $OPT_NO_RSYNC; then
+ echo "$PROG: Options --rsync and --no-rsync may not be" \
+ "specified at the same time." 1>&2
+ exit 1
+fi
+
+if ! $OPT_YES; then
echo ""
echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2
echo "Use it only when you are sure migrated files are unused." 1>&2
echo "" 1>&2
- echo "If emptying OST(s) that are not disabled on the MDS, new" 1>&2
- echo "files may use them. To prevent MDS allocating any files on" 1>&2
- echo "OSTNNNN run 'lctl --device %{fsname}-OSTNNNN-osc deactivate'" 1>&2
- echo "on the MDS." 1>&2
+ echo "If emptying an OST that is active on the MDS, new files may" 1>&2
+ echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2
+ echo " lctl set_param osp.<fsname>-OSTNNNN*.max_create_count=0'" 1>&2
+ echo "on each MDS using the OST(s) being emptied." 1>&2
echo -n "Continue? (y/n) "
read CHECK
[ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1
# If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189)
strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=:
+# rsync creates its temporary files with lenient permissions, even if
+# permissions on the original files are more strict. Tighten umask here
+# to avoid the brief window where unprivileged users might be able to
+# access the temporary file.
+umask 0077
+
+# Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST.
+function calc_stripe()
+{
+ local filename=$1
+ local filekb=$2
+ local obj_max_kb=$3
+ local filegb=$((filekb / 1048576))
+ local stripe_count=1
+ local ost_max_count=0
+
+ # Files up to 1GB will have 1 stripe if they fit within the object max
+ if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then
+ echo 1 "$obj_max_kb" && return
+ fi
+
+ stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) ||
+ { echo "cannot auto calculate stripe count" >&2; return; }
+
+ if [ -z "$obj_max_kb" ]; then
+ local ost_min_kb=$((1 << 62))
+
+ # Calculate cap on object size at 1% of smallest OST
+ # but only include OSTs that have 256MB+ available space
+ while IFS='' read avail; do
+ [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] &&
+ avail=$OPT_MAXFREE
+ if [ $avail -ge $OPT_MINFREE ]; then
+ ost_max_count=$((ost_max_count + 1))
+ if [ $avail -lt $ost_min_kb ]; then
+ ost_min_kb=$avail
+ fi
+ fi
+ done < <($LFS df $OPT_POOL $OLDNAME | awk '/OST/ { print $4 }')
+
+ if [ $ost_max_count -eq 0 ]; then
+ # no OSTs with enough space, stripe over all of them
+ echo "-1" "0"
+ return
+ fi
+
+ if (( ost_min_kb == (1 << 62) )); then
+ echo "warning: unable to determine minimum OST size, " \
+ "object size not capped" >&2
+ echo "$stripe_count" "0"
+ return
+ fi
+
+ obj_max_kb=$((ost_min_kb / $OPT_CAP))
+ elif [ $obj_max_kb -eq 0 ]; then
+ echo "warning: unable to determine minimum OST size " \
+ "from previous migrate, object size not capped" >&2
+ echo "$stripe_count" "$obj_max_kb"
+ return
+ fi
+
+ # If disk usage would exceed the cap, increase the number of stripes.
+ # Round up to the nearest MB to ensure file will fit.
+ (( filekb > stripe_count * obj_max_kb )) &&
+ stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb))
+
+ # Limit the count to the number of eligible OSTs
+ if [ "$stripe_count" -gt $ost_max_count ]; then
+ echo "$ost_max_count" "$obj_max_kb"
+ else
+ echo "$stripe_count" "$obj_max_kb"
+ fi
+}
+
lfs_migrate() {
- while read OLDNAME; do
+ while IFS='' read -d '' OLDNAME; do
+ local hlinks=()
+ local layout
+
$ECHO -n "$OLDNAME: "
# avoid duplicate stat if possible
- TYPE_LINK=($(stat -c "%h %F" "$OLDNAME" || true))
+ local nlink_type=($(LANG=C stat -c "%h %F %s" "$OLDNAME" \
+ 2> /dev/null))
# skip non-regular files, since they don't have any objects
# and there is no point in trying to migrate them.
- if [ "${TYPE_LINK[1]}" != "regular" ]; then
- echo -e "not a regular file, skipped"
+ if [ "${nlink_type[1]}" != "regular" ]; then
+ echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2
continue
fi
- if [ -z "$OPT_NLINK" -a ${TYPE_LINK[0]} -gt 1 ]; then
- echo -e "multiple hard links, skipped"
+ # working out write perms is hard, let the shell do it
+ if [ ! -w "$OLDNAME" ]; then
+ echo -e "\r$OLDNAME: no write permission, skipped" 1>&2
continue
fi
- # working out write perms is hard, let the shell do it
- if [ ! -w "$OLDNAME" ]; then
- echo -e "no write permission, skipped"
+ if $OPT_DRYRUN && ! $OPT_DEBUG; then
+ $ECHO "dry run, skipped"
continue
fi
- if [ "$OPT_DRYRUN" ]; then
- echo -e "dry run, skipped"
+ # xattrs use absolute file paths, so ensure provided path is
+ # also absolute so that the names can be compared
+ local oldname_absolute=$(readlink -f "$OLDNAME")
+ if [ -z "$oldname_absolute" ]; then
+ echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2
continue
fi
+ OLDNAME=$oldname_absolute
+
+ # In the future, the path2fid and fid2path calls below
+ # should be replaced with a single call to
+ # "lfs path2links" once that command is available. The logic
+ # for detecting unlisted hard links could then be removed.
+ local fid=$($LFS path2fid "$OLDNAME" 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ echo -e "\r$OLDNAME: cannot determine FID; skipping; " \
+ "is this a Lustre file system?" 1>&2
+ continue
+ fi
+
+ if [[ ${nlink_type[0]} -gt 1 ]] || $RSYNC_WITH_HLINKS; then
+ # don't migrate a hard link if it was already migrated
+ if path_in_set "$OLDNAME"; then
+ $ECHO "already migrated via another hard link"
+ continue
+ fi
+
+ # There is limited space available in the xattrs
+ # to store all of the hard links for a file, so it's
+ # possible that $OLDNAME is part of a link set but is
+ # not listed in xattrs and therefore not listed as
+ # being migrated.
+ local migrated=$(old_fid_in_set "$fid")
+ if [ -n "$migrated" ]; then
+ $ECHO "already migrated via another hard link"
+ if $OPT_RSYNC; then
+ # Only the rsync case has to relink.
+ # The lfs migrate case preserves the
+ # inode so the links are already
+ # correct.
+ [ "$migrated" != "$OLDNAME" ] &&
+ ln -f "$migrated" "$OLDNAME"
+ fi
+ add_to_set "$fid" "$OLDNAME"
+ continue;
+ fi
+ fi
+
+ local stripe_size="$OPT_STRIPE_SIZE"
+ local stripe_count="$OPT_STRIPE_COUNT"
+ local stripe_opts="-N --comp-count -c -S -p -y"
+ local parent_count=""
+ local parent_size=""
+ local stripe_pool="${OPT_POOL#-p }"
+ local mirror_count=1
+ local comp_count=0
+ # avoid multiple getstripe calls
+ # lcm_mirror_count: 1
+ # lcm_entry_count: 0
+ # lmm_stripe_count: 1
+ # lmm_stripe_size: 1048576
+ # lmm_pool: pool_abc
+ local l_mirror_count=0
+ local l_comp_count=1
+ local l_stripe_count=2
+ local l_stripe_size=3
+ local l_stripe_pool=4
+ local layout_info=($($LFS getstripe $stripe_opts $OLDNAME \
+ 2>/dev/null | awk '{ print $2 }'))
+ layout="${OPT_PASSTHROUGH[@]} "
+
+ if $OPT_RESTRIPE; then
+ UNLINK=""
+ layout+="--copy $(dirname $OLDNAME)"
+ OPT_COMP=true
+ else
+ # if rsync copies Lustre xattrs properly in the future
+ # (i.e. before the file data, so that it preserves
+ # striping) then we don't need to do this getstripe
+ # stuff.
+ UNLINK="-u"
+
+ [ -n "$OPT_POOL" ] ||
+ stripe_pool=${layout_info[$l_stripe_pool]}
+ mirror_count=${layout_info[$l_mirror_count]}
+
+ if $OPT_AUTOSTRIPE; then
+ local filekb=$((${nlink_type[3]} / 1024))
+
+ read stripe_count OBJ_MAX_KB < <(calc_stripe \
+ "$OLDNAME" "$filekb" "$OBJ_MAX_KB")
+ [ -z "$stripe_count" ] && exit 1
+ [ $stripe_count -lt 1 ] && stripe_count=1
+ else
+ [ -n "$stripe_count" ] ||
+ stripe_count=${layout_info[$l_stripe_count]}
+ fi
+ [ -n "$stripe_size" ] ||
+ stripe_size=${layout_info[$l_stripe_size]}
+
+ [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK=""
+ fi
+
+ if $OPT_DEBUG; then
+ local parent_count
+ local parent_size
+ local parent_layout
+
+ if $OPT_RESTRIPE; then
+ parent_layout=($($LFS getstripe $stripe_opts \
+ -d $(dirname $OLDNAME) 2>/dev/null |
+ awk '{print $2 }'))
+ parent_count=${parent_layout[$l_stripe_count]}
+ parent_size=${parent_layout[$l_stripe_size]}
+ stripe_pool=${parent_layout[$l_stripe_pool]}
+ mirror_count=${parent_layout[$l_mirror_count]}
+ fi
+
+ $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}"
+ [ -n "$stripe_pool" ] &&
+ $ECHO -n ",pool=${stripe_pool}"
+ [[ $mirror_count -gt 1 ]] &&
+ $ECHO -n ",mirror_count=${mirror_count}"
+ $ECHO -n " "
+ fi
- # if rsync copies Lustre xattrs properly in the future
- # (i.e. before the file data, so that it preserves striping)
- # then we don't need to do this getstripe/mktemp stuff.
- UNLINK="-u"
- COUNT=$($LFS getstripe -c "$OLDNAME" 2> /dev/null)
- SIZE=$($LFS getstripe -s "$OLDNAME" 2> /dev/null)
- [ -z "$COUNT" -o -z "$SIZE" ] && UNLINK=""
- NEWNAME=$(mktemp $UNLINK "$OLDNAME.tmp.XXXXXX")
+ if $OPT_DRYRUN; then
+ $ECHO " dry run, skipped"
+ continue
+ fi
+
+ if ! $OPT_COMP && [ ${layout_info[$l_comp_count]} -gt 0 ]; then
+ layout+="--copy $OLDNAME"
+ OPT_COMP=true
+ fi
+ if ! $OPT_COMP; then
+ [ -n "$stripe_count" ] && layout+="-c $stripe_count "
+ [ -n "$stripe_size" ] && layout+="-S $stripe_size "
+ [ -n "$OPT_POOL" -a -n "$stripe_pool" ] &&
+ layout+="-p $stripe_pool "
+ [[ $mirror_count -gt 1 ]] && layout+="-N $mirror_count "
+ fi
+ layout+="$OPT_LAYOUT"
+
+ # detect other hard links and store them on a global
+ # list so we don't re-migrate them
+ local mntpoint=$(df -P "$OLDNAME" |
+ awk 'NR==2 { print $NF; exit }')
+ if [ -z "$mntpoint" ]; then
+ echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2
+ continue
+ fi
+ hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2
+ continue
+ fi
+ hlinks+=("$OLDNAME")
+
+ # first try to migrate via Lustre tools, then fall back to rsync
+ if ! $OPT_RSYNC; then
+ if $LFS migrate $layout "$OLDNAME"; then
+ $ECHO "done"
+ for link in ${hlinks[*]}; do
+ add_to_set "$fid" "$link"
+ done
+ continue
+ elif $OPT_NO_RSYNC; then
+ echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2
+ continue
+ else
+ $ECHO -n "falling back to rsync: "
+ OPT_RSYNC=true
+ fi
+ fi
+
+ local olddir=$(dirname $OLDNAME)
+ local oldfile=$(basename $OLDNAME)
+ NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX")
if [ $? -ne 0 -o -z "$NEWNAME" ]; then
- echo -e "\r$OLDNAME: can't make temp file, skipped" 1>&2
+ echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2
continue
fi
- [ "$UNLINK" ] && $LFS setstripe -c${COUNT} -s${SIZE} "$NEWNAME"
+ if [ "$UNLINK" ]; then
+ if ! $LFS setstripe $layout "$NEWNAME"; then
+ echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2
+ exit 2
+ fi
+ fi
# we use --inplace, since we created our own temp file already
if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then
echo -e "\r$OLDNAME: copy error, exiting" 1>&2
- rm -f "$NEWNAME"
exit 4
fi
- if [ "$OPT_CHECK" ] && ! cmp "$OLDNAME" "$NEWNAME"; then
+ if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then
echo -e "\r$NEWNAME: compare failed, exiting" 1>&2
exit 8
fi
echo -e "\r$OLDNAME: rename error, exiting" 1>&2
exit 12
fi
- $ECHO "done"
+
+ $ECHO "done rsync"
+ for link in ${hlinks[*]}; do
+ if [ "$link" != "$OLDNAME" ]; then
+ ln -f "$OLDNAME" "$link"
+ fi
+ add_to_set "$fid" "$link"
+ done
+
+ # If the number of hlinks exceeds the space in the xattrs,
+ # when the final path is statted it will have a link count
+ # of 1 (all other links will point to the new inode).
+ # This flag indicates that even paths with a link count of
+ # 1 are potentially part of a link set.
+ (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true
done
}
if [ "$#" -eq 0 ]; then
- lfs_migrate
+ if $OPT_NULL; then
+ lfs_migrate
+ else
+ tr '\n' '\0' | lfs_migrate
+ fi
else
while [ "$1" ]; do
if [ -d "$1" ]; then
- lfs find "$1" -type f | lfs_migrate
+ $LFS find "$1" -type f -print0
else
- echo $1 | lfs_migrate
+ echo -en "$1\0"
fi
shift
- done
+ done | lfs_migrate
fi
+