#!/bin/bash # lfs_migrate: a simple tool to copy and check files. # # To avoid allocating objects on one or more OSTs, they should be # deactivated on the MDS via "lctl --device {device_number} deactivate", # where {device_number} is from the output of "lctl dl" on the MDS. # # To guard against corruption, the file is compared after migration # to verify the copy is correct and the file has not been modified. # This is not a protection against the file being open by another # process, but it would catch the worst cases of in-use files, but # to be 100% safe the administrator needs to ensure this is safe. RSYNC=${RSYNC:-rsync} OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false} ECHO=echo LFS=${LFS:-lfs} RSYNC_WITH_HLINKS=false LFS_MIGRATE_TMP=${TMPDIR:-/tmp} MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)" NEWNAME="" REMOVE_FID='s/^\[[0-9a-fx:]*\] //' PROG=$(basename $0) add_to_set() { local old_fid="$1" local path="$2" echo "$old_fid $path" >> "$MIGRATED_SET" } path_in_set() { local path="$1" sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$" } old_fid_in_set() { local old_fid="$1" grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 | sed -e "$REMOVE_FID" } usage() { cat -- <&2 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q] [--auto-stripe|-A [-C ] [--min-free|-M ] [--max-free|-X ]] [--stripe-count|-c ] [--stripe-size|-S ] [-D] [-h] [-n] [-S] [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0] [FILE|DIR...] -A restripe file using an automatically selected stripe count, uses stripe_count = sqrt(size_in_GB) + 1 -c restripe file using the specified -C when -A is set, limit the migrated file to use on each OST at most 1/ of the available space of the smallest OST -D do not use direct I/O to copy file contents -h show this usage message -M when -A is set, an OST must contain more available space than KB in order for it to be considered available for use in the migration --no-rsync do not fall back to rsync mode even if lfs migrate fails -n only print the names of files to be migrated -q run quietly (don't print filenames or status) --rsync force rsync mode instead of using lfs migrate -R restripe file using default directory striping -s skip file data comparison after migrate -S restripe file using the specified stripe size -v show verbose debug messages -X when -A is set, limit the amount of space on each OST that can be considered available for the migration to KB -y answer 'y' to usage question -0 input file names on stdin are separated by a null character Options '-A', '-c', and '-R' are mutually exclusive. Options '-C', '-M', and '-X' are ignored if '-A' is not set. The --rsync and --no-rsync options may not be specified at the same time. If a directory is an argument, all files in the directory are migrated. If no file/directory is given, the file list is read from standard input. Any arguments that are not explicitly recognized by the script are passed through to the 'lfs migrate' utility. Examples: lfs_migrate /mnt/lustre/dir lfs_migrate -p newpool /mnt/lustre/dir lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y USAGE exit 1 } cleanup() { rm -f "$MIGRATED_SET" [ -n "$NEWNAME" ] && rm -f "$NEWNAME" } trap cleanup EXIT OPT_CHECK=true OPT_DEBUG=false OPT_DRYRUN=false OPT_FILE=() OPT_LAYOUT=() OPT_NO_RSYNC=false OPT_NO_DIRECT=false OPT_NULL=false OPT_PASSTHROUGH=() OPT_RESTRIPE=false OPT_YES=false LFS_OPT_DIRECTIO="" OPT_AUTOSTRIPE=false OPT_STRIPE_COUNT="" OPT_STRIPE_SIZE="" OPT_MINFREE=262144 OPT_MAXFREE="" OPT_CAP=100 # Examine any long options and arguments. getopts does not support long # options, so they must be stripped out and classified as either options # for the script, or passed through to "lfs migrate". while [ -n "$*" ]; do arg="$1" case "$arg" in -h|--help) usage;; -l|--link) ;; # maintained backward compatibility for now -n|--dry-run) OPT_DRYRUN=true; OPT_YES=true echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;; -q|--quiet) ECHO=:;; -R|--restripe) OPT_RESTRIPE=true;; -s|--skip) OPT_CHECK=false;; -v|--verbose) OPT_DEBUG=true; ECHO=echo;; -y|--yes) OPT_YES=true;; -0) OPT_NULL=true;; -b|--block|--non-block|--non-direct|--no-verify) # Always pass non-layout options to 'lfs migrate' OPT_PASSTHROUGH+=("$arg");; --rsync) OPT_RSYNC=true;; --no-rsync) OPT_NO_RSYNC=true;; --copy|--yaml|--file) # these options have files as arguments, pass both through OPT_LAYOUT+="$arg $2"; shift;; --auto-stripe|-A) OPT_AUTOSTRIPE=true;; -C) OPT_CAP="$2"; shift;; -D) LFS_OPT_DIRECTIO="-D";; -M|--min-free) OPT_MINFREE="$2"; shift;; -X|--max-free) OPT_MAXFREE="$2"; shift;; -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;; -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;; *) # Pass other non-file layout options to 'lfs migrate' [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg " esac shift done if $OPT_RESTRIPE && [ -n "$OPT_LAYOUT" ]; then echo "$PROG: Options $OPT_LAYOUT cannot be used with the -R option" 1>&2 exit 1 elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then echo "$(basename $0): Options -c and -S "\ "may not be specified at the same time as the -R option." 1>&2 exit 1 elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then echo "" echo "$(basename $0) error: The -c option may not" 1>&2 echo "be specified at the same time as the -A option." 1>&2 exit 1 elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then echo "" echo "$(basename $0) error: The -A option may not be specified at" 1>&2 echo "the same time as the -R option." 1>&2 exit 1 fi if $OPT_RSYNC && $OPT_NO_RSYNC; then echo "$PROG: Options --rsync and --no-rsync may not be" \ "specified at the same time." 1>&2 exit 1 fi if ! $OPT_YES; then echo "" echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2 echo "Use it only when you are sure migrated files are unused." 1>&2 echo "" 1>&2 echo "If emptying an OST that is active on the MDS, new files may" 1>&2 echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2 echo " lctl set_param osp.-OSTNNNN*.max_create_count=0'" 1>&2 echo "on each MDS using the OST(s) being emptied." 1>&2 echo -n "Continue? (y/n) " read CHECK [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1 fi # if rsync has --xattr support, then try to copy the xattrs. $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X" $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A" # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189) strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=: # rsync creates its temporary files with lenient permissions, even if # permissions on the original files are more strict. Tighten umask here # to avoid the brief window where unprivileged users might be able to # access the temporary file. umask 0077 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST. function calc_stripe() { local filename=$1 local filekb=$2 local obj_max_kb=$3 local filegb=$((filekb / 1048576)) local stripe_count=1 local ost_max_count=0 # Files up to 1GB will have 1 stripe if they fit within the object max if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then echo 1 "$obj_max_kb" && return fi stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) || { echo "cannot auto calculate stripe count" >&2; return; } if [ -z "$obj_max_kb" ]; then local ost_min_kb=$((1 << 62)) # Calculate cap on object size at 1% of smallest OST # but only include OSTs that have 256MB+ available space while IFS='' read avail; do [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] && avail=$OPT_MAXFREE if [ $avail -ge $OPT_MINFREE ]; then ost_max_count=$((ost_max_count + 1)) if [ $avail -lt $ost_min_kb ]; then ost_min_kb=$avail fi fi done < <($LFS df $OLDNAME | awk '/OST/ { print $4 }') # Once this script supports pools, the lfs df command above # should also include the -p option to restrict the # listed OSTs to the correct pool. if [ $ost_max_count -eq 0 ]; then echo "no OSTs with sufficient available space" >&2 return fi if (( ost_min_kb == (1 << 62) )); then echo "warning: unable to determine minimum OST size, " \ "object size not capped" >&2 obj_max_kb=0 echo "$stripe_count" "$obj_max_kb" return fi obj_max_kb=$((ost_min_kb / $OPT_CAP)) elif [ $obj_max_kb -eq 0 ]; then echo "warning: unable to determine minimum OST size " \ "from previous migrate, object size not capped" >&2 echo "$stripe_count" "$obj_max_kb" return fi # If disk usage would exceed the cap, increase the number of stripes. # Round up to the nearest MB to ensure file will fit. (( filekb > stripe_count * obj_max_kb )) && stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb)) # Limit the count to the number of eligible OSTs if [ "$stripe_count" -gt $ost_max_count ]; then echo "$ost_max_count" "$obj_max_kb" else echo "$stripe_count" "$obj_max_kb" fi } lfs_migrate() { local last_dev local mntpoint while IFS='' read -d '' OLDNAME; do local hlinks=() local stripe_size="$OPT_STRIPE_SIZE" local stripe_count="$OPT_STRIPE_COUNT" local parent_count="" local parent_size="" local stripe_pool local mirror_count local layout local fid $ECHO -n "$OLDNAME: " # avoid duplicate stat call by fetching all attrs at once local nlink_idx_link=0 # %h is the hard link count local nlink_idx_type=1 # %F is "regular file", ignore others local nlink_idx_file=2 # "file" is here local nlink_idx_size=3 # %s is file size in bytes local nlink_idx_dev=4 # %D is the underlying device number # nlink_type=(1 regular file 1234 0x810) local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \ 2> /dev/null)) # skip non-regular files, since they don't have any objects # and there is no point in trying to migrate them. if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2 continue fi # working out write perms is hard, let the shell do it if [ ! -w "$OLDNAME" ]; then echo -e "\r$OLDNAME: no write permission, skipped" 1>&2 continue fi if $OPT_DRYRUN && ! $OPT_DEBUG; then $ECHO "dry run, skipped" continue fi # xattrs use absolute file paths, so ensure provided path is # also absolute so that the names can be compared local oldname_absolute=$(readlink -f "$OLDNAME") if [ -z "$oldname_absolute" ]; then echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2 continue fi OLDNAME=$oldname_absolute if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] || $RSYNC_WITH_HLINKS; then fid=$($LFS path2fid "$OLDNAME" 2> /dev/null) if [ $? -ne 0 ]; then echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2 continue fi # don't migrate a hard link if it was already migrated if path_in_set "$OLDNAME"; then $ECHO "already migrated via another hard link" continue fi # There is limited space available in the xattrs # to store all of the hard links for a file, so it's # possible that $OLDNAME is part of a link set but is # not listed in xattrs and therefore not listed as # being migrated. local migrated=$(old_fid_in_set "$fid") if [ -n "$migrated" ]; then $ECHO "already migrated via another hard link" # Only the rsync case has to relink. The # "lfs migrate" case keeps the same inode so # all of the links are already correct. $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] && ln -f "$migrated" "$OLDNAME" add_to_set "$fid" "$OLDNAME" continue; fi fi if $OPT_RESTRIPE; then UNLINK="" else # If rsync copies Lustre xattrs properly in the future # (i.e. before the file data, so that it preserves # striping) then we don't need this getstripe stuff. UNLINK="-u" stripe_pool=$($LFS getstripe -p "$OLDNAME" 2> /dev/null) mirror_count=$($LFS getstripe -N "$OLDFILE" 2> /dev/null) if $OPT_AUTOSTRIPE; then local filekb=$((${nlink_type[$nlink_idx_size]} / 1024)) read stripe_count OBJ_MAX_KB < <(calc_stripe \ "$OLDNAME" "$filekb" "$OBJ_MAX_KB") [ -z "$stripe_count" ] && exit 1 [ $stripe_count -lt 1 ] && stripe_count=1 else [ "$OPT_STRIPE_COUNT" ] && stripe_count=$OPT_STRIPE_COUNT || stripe_count=$($LFS getstripe -c "$OLDNAME" \ 2> /dev/null) fi [ -z "$stripe_size" ] && stripe_size=$($LFS getstripe -S "$OLDNAME" 2> /dev/null) [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK="" fi if $OPT_DEBUG; then local parent_count local parent_size if $OPT_RESTRIPE; then parent_count=$($LFS getstripe -c \ $(dirname "$OLDNAME") 2> \ /dev/null) parent_size=$($LFS getstripe -S \ $(dirname "$OLDNAME") 2> \ /dev/null) stripe_pool=$($LFS getstripe --pool \ $(dirname "$OLDNAME") 2> \ /dev/null) mirror_count=$($LFS getstripe -N \ $(dirname "$OLDFILE") 2> \ /dev/null) fi $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}" [ -n "$stripe_pool" ] && $ECHO -n ",pool=${stripe_pool}" [ -n "$mirror_count" ] && $ECHO -n ",mirror_count=${mirror_count}" $ECHO -n " " fi if $OPT_DRYRUN; then $ECHO " dry run, skipped" continue fi [ -n "$stripe_count" ] && stripe_count="-c $stripe_count" [ -n "$stripe_size" ] && stripe_size="-S $stripe_size" [ -n "$stripe_pool" ] && stripe_pool="-p $stripe_pool" [ -n "$mirror_count" ] && mirror_count="-N $mirror_count" layout="$stripe_count $stripe_size $stripe_pool $mirror_count \ $OPT_LAYOUT" # detect other hard links and store them on a global # list so we don't re-migrate them if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] || mntpoint=$(df -P "$OLDNAME" | awk 'NR==2 { print $NF }') if [ -z "$mntpoint" ]; then echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2 continue fi hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null) if $OPT_RSYNC && [ $? -ne 0 ]; then echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2 continue fi hlinks+=("$OLDNAME") else hlinks= fi # first try to migrate via Lustre tools, then fall back to rsync if ! $OPT_RSYNC; then if $LFS migrate "${OPT_PASSTHROUGH[@]}" $layout \ "$OLDNAME"; then $ECHO "done" # no-op if hlinks empty for 1-link files for link in ${hlinks[*]}; do add_to_set "$fid" "$link" done continue elif $OPT_NO_RSYNC; then echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2 continue else $ECHO -n "falling back to rsync: " OPT_RSYNC=true fi fi NEWNAME=$(mktemp $UNLINK "$OLDNAME-lfs_migrate.tmp.XXXXXX") if [ $? -ne 0 -o -z "$NEWNAME" ]; then echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2 continue fi if [ "$UNLINK" ]; then if ! $LFS setstripe "${OPT_PASSTHROUGH[@]}" $layout \ "$NEWNAME"; then echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2 exit 2 fi fi # we use --inplace, since we created our own temp file already if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then echo -e "\r$OLDNAME: copy error, exiting" 1>&2 exit 4 fi if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then echo -e "\r$NEWNAME: compare failed, exiting" 1>&2 exit 8 fi if ! mv "$NEWNAME" "$OLDNAME"; then echo -e "\r$OLDNAME: rename error, exiting" 1>&2 exit 12 fi $ECHO "done rsync" # no-op if hlinks empty for 1-link files for link in ${hlinks[*]}; do if [ "$link" != "$OLDNAME" ]; then ln -f "$OLDNAME" "$link" fi add_to_set "$fid" "$link" done # If the number of hlinks exceeds the space in the xattrs, # when the final path is statted it will have a link count # of 1 (all other links will point to the new inode). # This flag indicates that even paths with a link count of # 1 are potentially part of a link set. [ ${#hlinks[*]} -gt 1 ] && RSYNC_WITH_HLINKS=true done } if [ "$#" -eq 0 ]; then if $OPT_NULL; then lfs_migrate else tr '\n' '\0' | lfs_migrate fi else while [ "$1" ]; do if [ -d "$1" ]; then $LFS find "$1" -type f -print0 else echo -en "$1\0" fi shift done | lfs_migrate fi