#!/bin/bash # lfs_migrate: a simple tool to copy and check files. # # To avoid allocating objects on one or more OSTs, they should be # deactivated on the MDS via "lctl --device {device_number} deactivate", # where {device_number} is from the output of "lctl dl" on the MDS. # # To guard against corruption, the file is compared after migration # to verify the copy is correct and the file has not been modified. # This is not a protection against the file being open by another # process, but it would catch the worst cases of in-use files, but # to be 100% safe the administrator needs to ensure this is safe. RSYNC=${RSYNC:-rsync} ECHO=echo LFS=${LFS:-lfs} RSYNC_WITH_HLINKS=false LFS_MIGRATE_TMP=${TMPDIR:-/tmp} MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)" NEWNAME="" REMOVE_FID='s/^\[[0-9a-fx:]*\] //' PROG=$(basename $0) add_to_set() { local old_fid="$1" local path="$2" echo "$old_fid $path" >> "$MIGRATED_SET" } path_in_set() { local path="$1" sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$" } old_fid_in_set() { local old_fid="$1" grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 | sed -e "$REMOVE_FID" } usage() { cat -- <&2 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q] [--auto-stripe|-A [-C ] [--min-free|-M ] [--max-free|-X ]] [--pool|-p ] [--stripe-count|-c ] [--stripe-size|-S ] [-D] [-h] [-n] [-S] [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0] [FILE|DIR...] -A restripe file using an automatically selected stripe count, uses stripe_count = sqrt(size_in_GB) + 1 -c restripe file using the specified -C when -A is set, limit the migrated file to use on each OST at most 1/ of the available space of the smallest OST -D do not use direct I/O to copy file contents -h show this usage message -M when -A is set, an OST must contain more available space than KB in order for it to be considered available for use in the migration --no-rsync do not fall back to rsync even if lfs migrate fails (default) -n only print the names of files to be migrated -p use the specified OST pool for the destination file -q run quietly (don't print filenames or status) --rsync force rsync mode instead of using lfs migrate -R restripe file using default directory striping -s skip file data comparison after migrate -S restripe file using the specified stripe size -v show verbose debug messages -X when -A is set, limit the amount of space on each OST that can be considered available for the migration to KB -y answer 'y' to usage question (only when --rsync used) -0 input file names on stdin are separated by a null character Options '-A', and '-R' are mutually exclusive with each other, and any specific layout (e.g. any specific parameters like '-c', '-S', '-E', '-p'). Options '-C', '-M', and '-X' are ignored if '-A' is not set. If a directory is an argument, all files in the directory are migrated. If no file/directory is given, the file list is read from standard input. Any arguments that are not explicitly recognized by the script are passed through to the 'lfs migrate' utility. If emptying an active OST, new files may continue to be allocated there. To prevent this, on each MDS run the following for each OST being emptied: lctl set_param osp.-OSTxxxx*.max_create_count=0 Examples: lfs_migrate /mnt/lustre/dir lfs_migrate -p newpool /mnt/lustre/dir lfs find /test -O test-OST0004 -size +4G | lfs_migrate USAGE exit 1 } cleanup() { rm -f "$MIGRATED_SET" [ -n "$NEWNAME" ] && rm -f "$NEWNAME" } trap cleanup EXIT OPT_AUTOSTRIPE=false OPT_CHECK=true OPT_DEBUG=false OPT_DRYRUN=false OPT_FILE=() OPT_LAYOUT=() OPT_COMP=false OPT_COPY=false OPT_NO_DIRECT=false OPT_NO_RSYNC=true OPT_NULL=false OPT_PASSTHROUGH=() OPT_POOL="" OPT_RESTRIPE=false OPT_RSYNC=false OPT_STRIPE_COUNT="" OPT_STRIPE_SIZE="" OPT_YES=false OPT_MINFREE=262144 OPT_MAXFREE="" OPT_CAP=100 # Examine any long options and arguments. getopts does not support long # options, so they must be stripped out and classified as either options # for the script, or passed through to "lfs migrate". while [ -n "$*" ]; do arg="$1" case "$arg" in -h|--help) usage;; -l|--link) ;; # maintained backward compatibility for now -n) OPT_DRYRUN=true; OPT_YES=true echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;; --dry-run) OPT_DRYRUN=true; OPT_YES=true;; -p|--pool) # if within a component, pass through pool $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_POOL="-p $2"; shift;; -q|--quiet) ECHO=:;; -R|--restripe) OPT_RESTRIPE=true;; -s|--skip) OPT_CHECK=false;; -v|--verbose) OPT_DEBUG=true; ECHO=echo;; -y|--yes) OPT_YES=true;; -0) OPT_NULL=true;; -b|--block|--non-block|--non-direct|-D|--no-verify) # Always pass non-layout options to 'lfs migrate' OPT_PASSTHROUGH+=("$arg");; --rsync) OPT_RSYNC=true; OPT_NO_RSYNC=false;; --no-rsync) OPT_NO_RSYNC=true; OPT_RSYNC=false;; --copy|--yaml|--file) OPT_COPY=true; # these options have files as arguments, pass both through OPT_LAYOUT+="$arg $2 "; shift;; --auto-stripe|-A) OPT_AUTOSTRIPE=true;; -C) OPT_CAP="$2"; shift;; -D) LFS_OPT_DIRECTIO="-D";; -E|--comp-end|--component-end) OPT_COMP=true; OPT_LAYOUT+="$arg ";; -M|--min-free) OPT_MINFREE="$2"; shift;; -X|--max-free) OPT_MAXFREE="$2"; shift;; -c|--stripe-count) # if within a component, pass through stripe_count $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_STRIPE_COUNT="$2" shift;; -S|--stripe-size) # if within a component, pass through stripe_size $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_STRIPE_SIZE="$2" shift;; *) # Pass other non-file layout options to 'lfs migrate' [[ -e "$arg" ]] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg " esac shift done if ( $OPT_RESTRIPE || $OPT_AUTOSTRIPE ) && [[ -n "$OPT_STRIPE_COUNT" || -n "$OPT_STRIPE_SIZE" || -n "$OPT_POOL" ]]; then echo "$PROG error: option -R or -A cannot be used with -c, -S, or -p" 1>&2 exit 1 elif ( $OPT_RESTRIPE || $OPT_AUTOSTRIPE ) && [[ -n "$OPT_LAYOUT" ]]; then echo "$PROG error: option -R or -A cannot be used with $OPT_LAYOUT" 1>&2 exit 1 elif $OPT_RESTRIPE && $OPT_AUTOSTRIPE; then echo "$PROG error: option -R cannot be used with -A" 1>&2 exit 1 fi if $OPT_RSYNC && ! $OPT_YES; then echo "" echo "'lfs_migrate --rsync' is NOT SAFE for moving in-use files." 1>&2 echo "Use it only when you are sure migrated files are unused." 1>&2 echo "" 1>&2 echo -n "Continue? (y/n) " read CHECK [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1 fi # if rsync has --xattr support, then try to copy the xattrs. $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X" $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A" # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189) strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=: # rsync creates its temporary files with lenient permissions, even if # permissions on the original files are more strict. Tighten umask here # to avoid the brief window where unprivileged users might be able to # access the temporary file. umask 0077 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST. function calc_stripe() { local filename=$1 local filekb=$2 local obj_max_kb=$3 local filegb=$((filekb / 1048576)) local stripe_count=1 local ost_max_count=0 # Files up to 1GB will have 1 stripe if they fit within the object max if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then echo 1 "$obj_max_kb" && return fi stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) || { echo "cannot auto calculate stripe count" >&2; return; } if [ -z "$obj_max_kb" ]; then local ost_min_kb=$((1 << 62)) # Calculate cap on object size at 1% of smallest OST # but only include OSTs that have 256MB+ available space while IFS='' read avail; do [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] && avail=$OPT_MAXFREE if [ $avail -ge $OPT_MINFREE ]; then ost_max_count=$((ost_max_count + 1)) if [ $avail -lt $ost_min_kb ]; then ost_min_kb=$avail fi fi done < <($LFS df $OPT_POOL "$OLDNAME" | awk '/OST/ { print $4 }') if [ $ost_max_count -eq 0 ]; then # no OSTs with enough space, stripe over all of them echo "-1" "0" return fi if (( ost_min_kb == (1 << 62) )); then echo "warning: unable to determine minimum OST size, " \ "object size not capped" >&2 echo "$stripe_count" "0" return fi obj_max_kb=$((ost_min_kb / $OPT_CAP)) elif [ $obj_max_kb -eq 0 ]; then echo "warning: unable to determine minimum OST size " \ "from previous migrate, object size not capped" >&2 echo "$stripe_count" "$obj_max_kb" return fi # If disk usage would exceed the cap, increase the number of stripes. # Round up to the nearest MB to ensure file will fit. (( filekb > stripe_count * obj_max_kb )) && stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb)) # Limit the count to the number of eligible OSTs if [ "$stripe_count" -gt $ost_max_count ]; then echo "$ost_max_count" "$obj_max_kb" else echo "$stripe_count" "$obj_max_kb" fi } lfs_migrate() { local last_dev local mntpoint while IFS='' read -d '' OLDNAME; do local hlinks=() local layout local fid $ECHO -n "$OLDNAME: " # avoid duplicate stat call by fetching all attrs at once local nlink_idx_link=0 # %h is the hard link count local nlink_idx_type=1 # %F is "regular file", ignore others local nlink_idx_file=2 # "file" is here local nlink_idx_size=3 # %s is file size in bytes local nlink_idx_dev=4 # %D is the underlying device number # nlink_type=(1 regular file 1234 0x810) local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \ 2> /dev/null)) # skip non-regular files, since they don't have any objects # and there is no point in trying to migrate them. if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2 continue fi # working out write perms is hard, let the shell do it if [ ! -w "$OLDNAME" ]; then echo -e "\r$OLDNAME: no write permission, skipped" 1>&2 continue fi if $OPT_DRYRUN && ! $OPT_DEBUG; then $ECHO "dry run, skipped" continue fi # xattrs use absolute file paths, so ensure provided path is # also absolute so that the names can be compared local oldname_absolute=$(readlink -f "$OLDNAME") if [ -z "$oldname_absolute" ]; then echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2 continue fi OLDNAME=$oldname_absolute if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] || $RSYNC_WITH_HLINKS; then fid=$($LFS path2fid "$OLDNAME" 2> /dev/null) if [ $? -ne 0 ]; then echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2 continue fi # don't migrate a hard link if it was already migrated if path_in_set "$OLDNAME"; then $ECHO "already migrated via another hard link" continue fi # There is limited space available in the xattrs # to store all of the hard links for a file, so it's # possible that $OLDNAME is part of a link set but is # not listed in xattrs and therefore not listed as # being migrated. local migrated=$(old_fid_in_set "$fid") if [ -n "$migrated" ]; then $ECHO "already migrated via another hard link" # Only the rsync case has to relink. The # "lfs migrate" case keeps the same inode so # all of the links are already correct. $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] && ln -f "$migrated" "$OLDNAME" add_to_set "$fid" "$OLDNAME" continue; fi fi local olddir=$(dirname "$OLDNAME") local stripe_size="$OPT_STRIPE_SIZE" local stripe_count="$OPT_STRIPE_COUNT" local getstripe_opts="-N --comp-count -c -S -p -y" local parent_count="" local parent_size="" local stripe_pool="${OPT_POOL#-p }" local mirror_count=1 local comp_count=0 layout="${OPT_PASSTHROUGH[@]} " if $OPT_RESTRIPE; then UNLINK="" layout+="--copy $olddir" OPT_COPY=true elif ! $OPT_COMP; then # avoid multiple getstripe calls # lcm_mirror_count: 1 # lcm_entry_count: 0 # lmm_stripe_count: 1 # lmm_stripe_size: 1048576 # lmm_pool: pool_abc local l_mirror_count=0 local l_comp_count=1 local l_stripe_count=2 local l_stripe_size=3 local l_stripe_pool=4 local layout_info layout_info=($($LFS getstripe $getstripe_opts $OLDNAME \ 2>/dev/null | awk '{ print $2 }')) # If rsync copies Lustre xattrs properly in the future # (i.e. before the file data, so that it preserves # striping) then we don't need this getstripe stuff. UNLINK="-u" [ -n "$OPT_POOL" ] || stripe_pool=${layout_info[$l_stripe_pool]} mirror_count=${layout_info[$l_mirror_count]} if $OPT_AUTOSTRIPE; then local filekb=$((${nlink_type[$nlink_idx_size]} / 1024)) read stripe_count OBJ_MAX_KB < <(calc_stripe \ "$OLDNAME" "$filekb" "$OBJ_MAX_KB") [ -z "$stripe_count" ] && exit 1 [ $stripe_count -lt 1 ] && stripe_count=1 else [ -n "$stripe_count" ] || stripe_count=${layout_info[$l_stripe_count]} fi [ -n "$stripe_size" ] || stripe_size=${layout_info[$l_stripe_size]} [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK="" fi if $OPT_DEBUG; then local parent_count local parent_size local parent_layout if $OPT_RESTRIPE; then parent_layout=($($LFS getstripe $getstripe_opts\ -d "$olddir" 2>/dev/null | awk '{print $2 }')) parent_count=${parent_layout[$l_stripe_count]} parent_size=${parent_layout[$l_stripe_size]} stripe_pool=${parent_layout[$l_stripe_pool]} mirror_count=${parent_layout[$l_mirror_count]} fi $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}" [ -n "$stripe_pool" ] && $ECHO -n ",pool=${stripe_pool}" [[ $mirror_count -gt 1 ]] && $ECHO -n ",mirror_count=${mirror_count}" $ECHO -n " " fi if $OPT_DRYRUN; then $ECHO " dry run, skipped" continue fi if ! $OPT_COPY && ! $OPT_COMP && [[ ${layout_info[$l_comp_count]} > 0 ]]; then layout+="--copy $OLDNAME" OPT_COPY=true fi if ! $OPT_COPY && ! $OPT_COMP; then [ -n "$stripe_count" ] && layout+="-c $stripe_count " [ -n "$stripe_size" ] && layout+="-S $stripe_size " [ -n "$OPT_POOL" -a -n "$stripe_pool" ] && layout+="-p $stripe_pool " [[ $mirror_count -gt 1 ]] && layout+="-N $mirror_count " fi layout+="$OPT_LAYOUT" # detect other hard links and store them on a global # list so we don't re-migrate them if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] || mntpoint=$(df -P "$OLDNAME" | awk 'NR==2 { print $NF }') if [ -z "$mntpoint" ]; then echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2 continue fi hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null) if $OPT_RSYNC && [ $? -ne 0 ]; then echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2 continue fi hlinks+=("$OLDNAME") else hlinks= fi # first try to migrate via Lustre tools, then fall back to rsync if ! $OPT_RSYNC; then if $OPT_DEBUG; then echo -e "\n$LFS migrate $layout \"$OLDNAME\"" fi if $LFS migrate $layout "$OLDNAME"; then $ECHO "done" # no-op if hlinks empty for 1-link files for link in ${hlinks[*]}; do add_to_set "$fid" "$link" done continue elif $OPT_NO_RSYNC; then echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2 continue else $ECHO -n "falling back to rsync: " OPT_RSYNC=true fi fi local oldfile=$(basename "$OLDNAME") NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX") if [ $? -ne 0 -o -z "$NEWNAME" ]; then echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2 continue fi if [ "$UNLINK" ]; then if ! $LFS setstripe $layout "$NEWNAME"; then echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2 exit 2 fi fi # we use --inplace, since we created our own temp file already if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then echo -e "\r$OLDNAME: copy error, exiting" 1>&2 exit 4 fi if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then echo -e "\r$NEWNAME: compare failed, exiting" 1>&2 exit 8 fi if ! mv "$NEWNAME" "$OLDNAME"; then echo -e "\r$OLDNAME: rename error, exiting" 1>&2 exit 12 fi $ECHO "done rsync" # no-op if hlinks empty for 1-link files for link in ${hlinks[*]}; do if [ "$link" != "$OLDNAME" ]; then ln -f "$OLDNAME" "$link" fi add_to_set "$fid" "$link" done # If the number of hlinks exceeds the space in the xattrs, # when the final path is statted it will have a link count # of 1 (all other links will point to the new inode). # This flag indicates that even paths with a link count of # 1 are potentially part of a link set. (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true done } if [ "$#" -eq 0 ]; then if $OPT_NULL; then lfs_migrate else tr '\n' '\0' | lfs_migrate fi else while [ "$1" ]; do if [ -d "$1" ]; then $LFS find "$1" -type f -print0 else echo -en "$1\0" fi shift done | lfs_migrate fi