X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fscripts%2Flfs_migrate;h=f4108407721ad4e32b96290738c10634b27325d6;hb=449c648793d2fc4e8eee3a2dd918379b75cc81e2;hp=7ec10510193dfff1e61dad468b7930a45a685fba;hpb=acc0a56bde36937beef89e6e36be03c98c681485;p=fs%2Flustre-release.git diff --git a/lustre/scripts/lfs_migrate b/lustre/scripts/lfs_migrate old mode 100644 new mode 100755 index 7ec1051..f410840 --- a/lustre/scripts/lfs_migrate +++ b/lustre/scripts/lfs_migrate @@ -1,6 +1,4 @@ #!/bin/bash -# set -x -set -e # lfs_migrate: a simple tool to copy and check files. # @@ -15,53 +13,189 @@ set -e # to be 100% safe the administrator needs to ensure this is safe. RSYNC=${RSYNC:-rsync} +OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false} ECHO=echo LFS=${LFS:-lfs} +RSYNC_WITH_HLINKS=false +LFS_MIGRATE_TMP=${TMPDIR:-/tmp} +MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)" +NEWNAME="" +REMOVE_FID='s/^\[[0-9a-fx:]*\] //' +PROG=$(basename $0) + +add_to_set() { + local old_fid="$1" + local path="$2" + + echo "$old_fid $path" >> "$MIGRATED_SET" +} + +path_in_set() { + local path="$1" + + sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$" +} + +old_fid_in_set() { + local old_fid="$1" + + grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 | + sed -e "$REMOVE_FID" +} usage() { cat -- <&2 -usage: lfs_migrate [-c|-s] [-h] [-l] [-n] [-y] [file|dir ...] - -c compare file data after migrate (default) - -s skip file data comparison after migrate - -h show this usage message - -l migrate files with hard links (skip by default) - -n only print the names of files to be migrated - -q run quietly (don't print filenames or status) - -y answer 'y' to usage question +usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q] + [--auto-stripe|-A [-C ] + [--min-free|-M ] [--max-free|-X ]] + [--pool|-p ] [--stripe-count|-c ] + [--stripe-size|-S ] + [-D] [-h] [-n] [-S] + [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0] + [FILE|DIR...] + -A restripe file using an automatically selected stripe count, + uses stripe_count = sqrt(size_in_GB) + 1 + -c + restripe file using the specified + -C when -A is set, limit the migrated file to use on each OST + at most 1/ of the available space of the smallest OST + -D do not use direct I/O to copy file contents + -h show this usage message + -M + when -A is set, an OST must contain more available space than + KB in order for it to be considered available for + use in the migration + --no-rsync do not fall back to rsync mode even if lfs migrate fails + -n only print the names of files to be migrated + -p use the specified OST pool for the destination file + -q run quietly (don't print filenames or status) + --rsync force rsync mode instead of using lfs migrate + -R restripe file using default directory striping + -s skip file data comparison after migrate + -S + restripe file using the specified stripe size + -v show verbose debug messages + -X + when -A is set, limit the amount of space on each OST that + can be considered available for the migration to + KB + -y answer 'y' to usage question + -0 input file names on stdin are separated by a null character + +Options '-A', '-c', and '-R' are mutually exclusive. +Options '-C', '-M', and '-X' are ignored if '-A' is not set. + +The --rsync and --no-rsync options may not be specified at the same time. If a directory is an argument, all files in the directory are migrated. If no file/directory is given, the file list is read from standard input. -e.g.: lfs_migrate /mnt/lustre/file +Any arguments that are not explicitly recognized by the script are passed +through to the 'lfs migrate' utility. + +Examples: + lfs_migrate /mnt/lustre/dir + lfs_migrate -p newpool /mnt/lustre/dir lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y USAGE exit 1 } -OPT_CHECK=y - -while getopts "chlnqsy" opt $*; do - case $opt in - c) OPT_CHECK=y;; - l) OPT_NLINK=y;; - n) OPT_DRYRUN=n; OPT_YES=y;; - q) ECHO=:;; - s) OPT_CHECK="";; - y) OPT_YES=y;; - h|\?) usage;; - esac +cleanup() { + rm -f "$MIGRATED_SET" + [ -n "$NEWNAME" ] && rm -f "$NEWNAME" +} + +trap cleanup EXIT + +OPT_CHECK=true +OPT_DEBUG=false +OPT_DRYRUN=false +OPT_FILE=() +OPT_LAYOUT=() +OPT_COMP=false +OPT_NO_RSYNC=false +OPT_NO_DIRECT=false +OPT_NULL=false +OPT_PASSTHROUGH=() +OPT_POOL="" +OPT_RESTRIPE=false +OPT_YES=false +OPT_AUTOSTRIPE=false +OPT_STRIPE_COUNT="" +OPT_STRIPE_SIZE="" +OPT_MINFREE=262144 +OPT_MAXFREE="" +OPT_CAP=100 + +# Examine any long options and arguments. getopts does not support long +# options, so they must be stripped out and classified as either options +# for the script, or passed through to "lfs migrate". +while [ -n "$*" ]; do + arg="$1" + case "$arg" in + -h|--help) usage;; + -l|--link) ;; # maintained backward compatibility for now + -n) OPT_DRYRUN=true; OPT_YES=true + echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;; + --dry-run) OPT_DRYRUN=true; OPT_YES=true;; + -p|--pool) OPT_POOL="$arg $2"; OPT_LAYOUT+="$OPT_POOL "; shift;; + -q|--quiet) ECHO=:;; + -R|--restripe) OPT_RESTRIPE=true;; + -s|--skip) OPT_CHECK=false;; + -v|--verbose) OPT_DEBUG=true; ECHO=echo;; + -y|--yes) OPT_YES=true;; + -0) OPT_NULL=true;; + -b|--block|--non-block|--non-direct|-D|--no-verify) + # Always pass non-layout options to 'lfs migrate' + OPT_PASSTHROUGH+=("$arg");; + --rsync) OPT_RSYNC=true;; + --no-rsync) OPT_NO_RSYNC=true;; + --copy|--yaml|--file) OPT_COMP=true; + # these options have files as arguments, pass both through + OPT_LAYOUT+="$arg $2 "; shift;; + --auto-stripe|-A) OPT_AUTOSTRIPE=true;; + -C) OPT_CAP="$2"; shift;; + -M|--min-free) OPT_MINFREE="$2"; shift;; + -X|--max-free) OPT_MAXFREE="$2"; shift;; + -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;; + -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;; + *) # Pass other non-file layout options to 'lfs migrate' + [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg " + esac + shift done -shift $((OPTIND - 1)) -if [ -z "$OPT_YES" ]; then +if $OPT_RESTRIPE || $OPT_AUTOSTRIPE && [ -n "$OPT_LAYOUT" ]; then + echo "$PROG error: Options '$OPT_LAYOUT' can't be used with -R or -A" \ + 1>&2 + exit 1 +elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then + echo "$PROG error: Option -R can't be used with -c or -S" 1>&2 + exit 1 +elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then + echo "$PROG error: Option -A can't be used with -c" 1>&2 + exit 1 +elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then + echo "$PROG error: Option -A can't be used with -R" 1>&2 + exit 1 +fi + +if $OPT_RSYNC && $OPT_NO_RSYNC; then + echo "$PROG: Options --rsync and --no-rsync may not be" \ + "specified at the same time." 1>&2 + exit 1 +fi + +if ! $OPT_YES; then echo "" echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2 echo "Use it only when you are sure migrated files are unused." 1>&2 echo "" 1>&2 - echo "If emptying OST(s) that are not disabled on the MDS, new" 1>&2 - echo "files may use them. To prevent MDS allocating any files on" 1>&2 - echo "OSTNNNN run 'lctl --device %{fsname}-OSTNNNN-osc deactivate'" 1>&2 - echo "on the MDS." 1>&2 + echo "If emptying an OST that is active on the MDS, new files may" 1>&2 + echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2 + echo " lctl set_param osp.-OSTNNNN*.max_create_count=0'" 1>&2 + echo "on each MDS using the OST(s) being emptied." 1>&2 echo -n "Continue? (y/n) " read CHECK [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1 @@ -73,60 +207,319 @@ $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A" # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189) strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=: +# rsync creates its temporary files with lenient permissions, even if +# permissions on the original files are more strict. Tighten umask here +# to avoid the brief window where unprivileged users might be able to +# access the temporary file. +umask 0077 + +# Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST. +function calc_stripe() +{ + local filename=$1 + local filekb=$2 + local obj_max_kb=$3 + local filegb=$((filekb / 1048576)) + local stripe_count=1 + local ost_max_count=0 + + # Files up to 1GB will have 1 stripe if they fit within the object max + if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then + echo 1 "$obj_max_kb" && return + fi + + stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) || + { echo "cannot auto calculate stripe count" >&2; return; } + + if [ -z "$obj_max_kb" ]; then + local ost_min_kb=$((1 << 62)) + + # Calculate cap on object size at 1% of smallest OST + # but only include OSTs that have 256MB+ available space + while IFS='' read avail; do + [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] && + avail=$OPT_MAXFREE + if [ $avail -ge $OPT_MINFREE ]; then + ost_max_count=$((ost_max_count + 1)) + if [ $avail -lt $ost_min_kb ]; then + ost_min_kb=$avail + fi + fi + done < <($LFS df $OPT_POOL $OLDNAME | awk '/OST/ { print $4 }') + + if [ $ost_max_count -eq 0 ]; then + # no OSTs with enough space, stripe over all of them + echo "-1" "0" + return + fi + + if (( ost_min_kb == (1 << 62) )); then + echo "warning: unable to determine minimum OST size, " \ + "object size not capped" >&2 + echo "$stripe_count" "0" + return + fi + + obj_max_kb=$((ost_min_kb / $OPT_CAP)) + elif [ $obj_max_kb -eq 0 ]; then + echo "warning: unable to determine minimum OST size " \ + "from previous migrate, object size not capped" >&2 + echo "$stripe_count" "$obj_max_kb" + return + fi + + # If disk usage would exceed the cap, increase the number of stripes. + # Round up to the nearest MB to ensure file will fit. + (( filekb > stripe_count * obj_max_kb )) && + stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb)) + + # Limit the count to the number of eligible OSTs + if [ "$stripe_count" -gt $ost_max_count ]; then + echo "$ost_max_count" "$obj_max_kb" + else + echo "$stripe_count" "$obj_max_kb" + fi +} + lfs_migrate() { - while read OLDNAME; do + local last_dev + local mntpoint + + while IFS='' read -d '' OLDNAME; do + local hlinks=() + local layout + local fid + $ECHO -n "$OLDNAME: " - # avoid duplicate stat if possible - TYPE_LINK=($(stat -c "%h %F" "$OLDNAME" || true)) + # avoid duplicate stat call by fetching all attrs at once + local nlink_idx_link=0 # %h is the hard link count + local nlink_idx_type=1 # %F is "regular file", ignore others + local nlink_idx_file=2 # "file" is here + local nlink_idx_size=3 # %s is file size in bytes + local nlink_idx_dev=4 # %D is the underlying device number + # nlink_type=(1 regular file 1234 0x810) + local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \ + 2> /dev/null)) # skip non-regular files, since they don't have any objects # and there is no point in trying to migrate them. - if [ "${TYPE_LINK[1]}" != "regular" ]; then - echo -e "not a regular file, skipped" + if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then + echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2 continue fi - if [ -z "$OPT_NLINK" -a ${TYPE_LINK[0]} -gt 1 ]; then - echo -e "multiple hard links, skipped" + # working out write perms is hard, let the shell do it + if [ ! -w "$OLDNAME" ]; then + echo -e "\r$OLDNAME: no write permission, skipped" 1>&2 continue fi - # working out write perms is hard, let the shell do it - if [ ! -w "$OLDNAME" ]; then - echo -e "no write permission, skipped" + if $OPT_DRYRUN && ! $OPT_DEBUG; then + $ECHO "dry run, skipped" continue fi - if [ "$OPT_DRYRUN" ]; then - echo -e "dry run, skipped" + # xattrs use absolute file paths, so ensure provided path is + # also absolute so that the names can be compared + local oldname_absolute=$(readlink -f "$OLDNAME") + if [ -z "$oldname_absolute" ]; then + echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2 + continue + fi + OLDNAME=$oldname_absolute + + if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] || + $RSYNC_WITH_HLINKS; then + fid=$($LFS path2fid "$OLDNAME" 2> /dev/null) + if [ $? -ne 0 ]; then + echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2 + continue + fi + + # don't migrate a hard link if it was already migrated + if path_in_set "$OLDNAME"; then + $ECHO "already migrated via another hard link" + continue + fi + + # There is limited space available in the xattrs + # to store all of the hard links for a file, so it's + # possible that $OLDNAME is part of a link set but is + # not listed in xattrs and therefore not listed as + # being migrated. + local migrated=$(old_fid_in_set "$fid") + if [ -n "$migrated" ]; then + $ECHO "already migrated via another hard link" + # Only the rsync case has to relink. The + # "lfs migrate" case keeps the same inode so + # all of the links are already correct. + $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] && + ln -f "$migrated" "$OLDNAME" + + add_to_set "$fid" "$OLDNAME" + continue; + fi + fi + + local stripe_size="$OPT_STRIPE_SIZE" + local stripe_count="$OPT_STRIPE_COUNT" + local stripe_opts="-N --comp-count -c -S -p -y" + local parent_count="" + local parent_size="" + local stripe_pool="${OPT_POOL#-p }" + local mirror_count=1 + local comp_count=0 + # avoid multiple getstripe calls + # lcm_mirror_count: 1 + # lcm_entry_count: 0 + # lmm_stripe_count: 1 + # lmm_stripe_size: 1048576 + # lmm_pool: pool_abc + local l_mirror_count=0 + local l_comp_count=1 + local l_stripe_count=2 + local l_stripe_size=3 + local l_stripe_pool=4 + local layout_info=($($LFS getstripe $stripe_opts $OLDNAME \ + 2>/dev/null | awk '{ print $2 }')) + + layout="${OPT_PASSTHROUGH[@]} " + + if $OPT_RESTRIPE; then + UNLINK="" + layout+="--copy $(dirname $OLDNAME)" + OPT_COMP=true + else + # If rsync copies Lustre xattrs properly in the future + # (i.e. before the file data, so that it preserves + # striping) then we don't need this getstripe stuff. + UNLINK="-u" + + [ -n "$OPT_POOL" ] || + stripe_pool=${layout_info[$l_stripe_pool]} + mirror_count=${layout_info[$l_mirror_count]} + + if $OPT_AUTOSTRIPE; then + local filekb=$((${nlink_type[$nlink_idx_size]} / + 1024)) + + read stripe_count OBJ_MAX_KB < <(calc_stripe \ + "$OLDNAME" "$filekb" "$OBJ_MAX_KB") + [ -z "$stripe_count" ] && exit 1 + [ $stripe_count -lt 1 ] && stripe_count=1 + else + [ -n "$stripe_count" ] || + stripe_count=${layout_info[$l_stripe_count]} + fi + [ -n "$stripe_size" ] || + stripe_size=${layout_info[$l_stripe_size]} + + [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK="" + fi + + if $OPT_DEBUG; then + local parent_count + local parent_size + local parent_layout + + if $OPT_RESTRIPE; then + parent_layout=($($LFS getstripe $stripe_opts \ + -d $(dirname $OLDNAME) 2>/dev/null | + awk '{print $2 }')) + parent_count=${parent_layout[$l_stripe_count]} + parent_size=${parent_layout[$l_stripe_size]} + stripe_pool=${parent_layout[$l_stripe_pool]} + mirror_count=${parent_layout[$l_mirror_count]} + fi + + $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}" + [ -n "$stripe_pool" ] && + $ECHO -n ",pool=${stripe_pool}" + [[ $mirror_count -gt 1 ]] && + $ECHO -n ",mirror_count=${mirror_count}" + $ECHO -n " " + fi + + if $OPT_DRYRUN; then + $ECHO " dry run, skipped" continue fi + if ! $OPT_COMP && [ ${layout_info[$l_comp_count]} -gt 0 ]; then + layout+="--copy $OLDNAME" + OPT_COMP=true + fi + if ! $OPT_COMP; then + [ -n "$stripe_count" ] && layout+="-c $stripe_count " + [ -n "$stripe_size" ] && layout+="-S $stripe_size " + [ -n "$OPT_POOL" -a -n "$stripe_pool" ] && + layout+="-p $stripe_pool " + [[ $mirror_count -gt 1 ]] && layout+="-N $mirror_count " + fi + layout+="$OPT_LAYOUT" + + # detect other hard links and store them on a global + # list so we don't re-migrate them + if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then + [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] || + mntpoint=$(df -P "$OLDNAME" | + awk 'NR==2 { print $NF }') + if [ -z "$mntpoint" ]; then + echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2 + continue + fi + hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null) + if $OPT_RSYNC && [ $? -ne 0 ]; then + echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2 + continue + fi + hlinks+=("$OLDNAME") + else + hlinks= + fi - # if rsync copies Lustre xattrs properly in the future - # (i.e. before the file data, so that it preserves striping) - # then we don't need to do this getstripe/mktemp stuff. - UNLINK="-u" - COUNT=$($LFS getstripe -c "$OLDNAME" 2> /dev/null) - SIZE=$($LFS getstripe -s "$OLDNAME" 2> /dev/null) - [ -z "$COUNT" -o -z "$SIZE" ] && UNLINK="" - NEWNAME=$(mktemp $UNLINK "$OLDNAME.tmp.XXXXXX") + # first try to migrate via Lustre tools, then fall back to rsync + if ! $OPT_RSYNC; then + $OPT_DEBUG && echo -e "\n$LFS migrate $layout $OLDNAME" + if $LFS migrate $layout "$OLDNAME"; then + $ECHO "done" + # no-op if hlinks empty for 1-link files + for link in ${hlinks[*]}; do + add_to_set "$fid" "$link" + done + continue + elif $OPT_NO_RSYNC; then + echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2 + continue + else + $ECHO -n "falling back to rsync: " + OPT_RSYNC=true + fi + fi + + local olddir=$(dirname $OLDNAME) + local oldfile=$(basename $OLDNAME) + NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX") if [ $? -ne 0 -o -z "$NEWNAME" ]; then - echo -e "\r$OLDNAME: can't make temp file, skipped" 1>&2 + echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2 continue fi - [ "$UNLINK" ] && $LFS setstripe -c${COUNT} -s${SIZE} "$NEWNAME" + if [ "$UNLINK" ]; then + if ! $LFS setstripe $layout "$NEWNAME"; then + echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2 + exit 2 + fi + fi # we use --inplace, since we created our own temp file already if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then echo -e "\r$OLDNAME: copy error, exiting" 1>&2 - rm -f "$NEWNAME" exit 4 fi - if [ "$OPT_CHECK" ] && ! cmp "$OLDNAME" "$NEWNAME"; then + if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then echo -e "\r$NEWNAME: compare failed, exiting" 1>&2 exit 8 fi @@ -135,19 +528,39 @@ lfs_migrate() { echo -e "\r$OLDNAME: rename error, exiting" 1>&2 exit 12 fi - $ECHO "done" + + $ECHO "done rsync" + # no-op if hlinks empty for 1-link files + for link in ${hlinks[*]}; do + if [ "$link" != "$OLDNAME" ]; then + ln -f "$OLDNAME" "$link" + fi + add_to_set "$fid" "$link" + done + + # If the number of hlinks exceeds the space in the xattrs, + # when the final path is statted it will have a link count + # of 1 (all other links will point to the new inode). + # This flag indicates that even paths with a link count of + # 1 are potentially part of a link set. + (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true done } if [ "$#" -eq 0 ]; then - lfs_migrate + if $OPT_NULL; then + lfs_migrate + else + tr '\n' '\0' | lfs_migrate + fi else while [ "$1" ]; do if [ -d "$1" ]; then - lfs find "$1" -type f | lfs_migrate + $LFS find "$1" -type f -print0 else - echo $1 | lfs_migrate + echo -en "$1\0" fi shift - done + done | lfs_migrate fi +