3 # lfs_migrate: a simple tool to copy and check files.
5 # To avoid allocating objects on one or more OSTs, they should be
6 # deactivated on the MDS via "lctl --device {device_number} deactivate",
7 # where {device_number} is from the output of "lctl dl" on the MDS.
9 # To guard against corruption, the file is compared after migration
10 # to verify the copy is correct and the file has not been modified.
11 # This is not a protection against the file being open by another
12 # process, but it would catch the worst cases of in-use files, but
13 # to be 100% safe the administrator needs to ensure this is safe.
16 OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false}
19 RSYNC_WITH_HLINKS=false
20 LFS_MIGRATE_TMP=${TMPDIR:-/tmp}
21 MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)"
23 REMOVE_FID='s/^\[[0-9a-fx:]*\] //'
30 echo "$old_fid $path" >> "$MIGRATED_SET"
36 sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$"
42 grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 |
48 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q]
49 [--auto-stripe|-A [-C <cap>]
50 [--min-free|-M <min_free>] [--max-free|-X <max_free>]]
51 [--pool|-p <pool>] [--stripe-count|-c <stripe_count>]
52 [--stripe-size|-S <stripe_size>]
54 [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0]
56 -A restripe file using an automatically selected stripe count,
57 uses stripe_count = sqrt(size_in_GB) + 1
59 restripe file using the specified <stripe_count>
60 -C <cap> when -A is set, limit the migrated file to use on each OST
61 at most 1/<cap> of the available space of the smallest OST
62 -D do not use direct I/O to copy file contents
63 -h show this usage message
65 when -A is set, an OST must contain more available space than
66 <min_free> KB in order for it to be considered available for
68 --no-rsync do not fall back to rsync mode even if lfs migrate fails
69 -n only print the names of files to be migrated
70 -p <pool> use the specified OST pool for the destination file
71 -q run quietly (don't print filenames or status)
72 --rsync force rsync mode instead of using lfs migrate
73 -R restripe file using default directory striping
74 -s skip file data comparison after migrate
76 restripe file using the specified stripe size
77 -v show verbose debug messages
79 when -A is set, limit the amount of space on each OST that
80 can be considered available for the migration to
82 -y answer 'y' to usage question
83 -0 input file names on stdin are separated by a null character
85 Options '-A', '-c', and '-R' are mutually exclusive.
86 Options '-C', '-M', and '-X' are ignored if '-A' is not set.
88 The --rsync and --no-rsync options may not be specified at the same time.
90 If a directory is an argument, all files in the directory are migrated.
91 If no file/directory is given, the file list is read from standard input.
93 Any arguments that are not explicitly recognized by the script are passed
94 through to the 'lfs migrate' utility.
97 lfs_migrate /mnt/lustre/dir
98 lfs_migrate -p newpool /mnt/lustre/dir
99 lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y
105 rm -f "$MIGRATED_SET"
106 [ -n "$NEWNAME" ] && rm -f "$NEWNAME"
132 # Examine any long options and arguments. getopts does not support long
133 # options, so they must be stripped out and classified as either options
134 # for the script, or passed through to "lfs migrate".
135 while [ -n "$*" ]; do
139 -l|--link) ;; # maintained backward compatibility for now
140 -n) OPT_DRYRUN=true; OPT_YES=true
141 echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;;
142 --dry-run) OPT_DRYRUN=true; OPT_YES=true;;
143 -p|--pool) OPT_POOL="$arg $2"; OPT_LAYOUT+="$OPT_POOL "; shift;;
145 -R|--restripe) OPT_RESTRIPE=true;;
146 -s|--skip) OPT_CHECK=false;;
147 -v|--verbose) OPT_DEBUG=true; ECHO=echo;;
148 -y|--yes) OPT_YES=true;;
150 -b|--block|--non-block|--non-direct|--no-verify)
151 # Always pass non-layout options to 'lfs migrate'
152 OPT_PASSTHROUGH+=("$arg");;
153 --rsync) OPT_RSYNC=true;;
154 --no-rsync) OPT_NO_RSYNC=true;;
155 --copy|--yaml|--file) OPT_COMP=true;
156 # these options have files as arguments, pass both through
157 OPT_LAYOUT+="$arg $2 "; shift;;
158 --auto-stripe|-A) OPT_AUTOSTRIPE=true;;
159 -C) OPT_CAP="$2"; shift;;
160 -D) LFS_OPT_DIRECTIO="-D";;
161 -M|--min-free) OPT_MINFREE="$2"; shift;;
162 -X|--max-free) OPT_MAXFREE="$2"; shift;;
163 -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;;
164 -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;;
165 *) # Pass other non-file layout options to 'lfs migrate'
166 [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg "
171 if $OPT_RESTRIPE || $OPT_AUTOSTRIPE && [ -n "$OPT_LAYOUT" ]; then
172 echo "$PROG error: Options '$OPT_LAYOUT' can't be used with -R or -A" \
175 elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then
176 echo "$PROG error: Option -R can't be used with -c or -S" 1>&2
178 elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then
179 echo "$PROG error: Option -A can't be used with -c" 1>&2
181 elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then
182 echo "$PROG error: Option -A can't be used with -R" 1>&2
186 if $OPT_RSYNC && $OPT_NO_RSYNC; then
187 echo "$PROG: Options --rsync and --no-rsync may not be" \
188 "specified at the same time." 1>&2
194 echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2
195 echo "Use it only when you are sure migrated files are unused." 1>&2
197 echo "If emptying an OST that is active on the MDS, new files may" 1>&2
198 echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2
199 echo " lctl set_param osp.<fsname>-OSTNNNN*.max_create_count=0'" 1>&2
200 echo "on each MDS using the OST(s) being emptied." 1>&2
201 echo -n "Continue? (y/n) "
203 [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1
206 # if rsync has --xattr support, then try to copy the xattrs.
207 $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X"
208 $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A"
209 # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189)
210 strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=:
212 # rsync creates its temporary files with lenient permissions, even if
213 # permissions on the original files are more strict. Tighten umask here
214 # to avoid the brief window where unprivileged users might be able to
215 # access the temporary file.
218 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST.
219 function calc_stripe()
224 local filegb=$((filekb / 1048576))
226 local ost_max_count=0
228 # Files up to 1GB will have 1 stripe if they fit within the object max
229 if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then
230 echo 1 "$obj_max_kb" && return
233 stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) ||
234 { echo "cannot auto calculate stripe count" >&2; return; }
236 if [ -z "$obj_max_kb" ]; then
237 local ost_min_kb=$((1 << 62))
239 # Calculate cap on object size at 1% of smallest OST
240 # but only include OSTs that have 256MB+ available space
241 while IFS='' read avail; do
242 [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] &&
244 if [ $avail -ge $OPT_MINFREE ]; then
245 ost_max_count=$((ost_max_count + 1))
246 if [ $avail -lt $ost_min_kb ]; then
250 done < <($LFS df $OPT_POOL $OLDNAME | awk '/OST/ { print $4 }')
252 if [ $ost_max_count -eq 0 ]; then
253 # no OSTs with enough space, stripe over all of them
258 if (( ost_min_kb == (1 << 62) )); then
259 echo "warning: unable to determine minimum OST size, " \
260 "object size not capped" >&2
261 echo "$stripe_count" "0"
265 obj_max_kb=$((ost_min_kb / $OPT_CAP))
266 elif [ $obj_max_kb -eq 0 ]; then
267 echo "warning: unable to determine minimum OST size " \
268 "from previous migrate, object size not capped" >&2
269 echo "$stripe_count" "$obj_max_kb"
273 # If disk usage would exceed the cap, increase the number of stripes.
274 # Round up to the nearest MB to ensure file will fit.
275 (( filekb > stripe_count * obj_max_kb )) &&
276 stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb))
278 # Limit the count to the number of eligible OSTs
279 if [ "$stripe_count" -gt $ost_max_count ]; then
280 echo "$ost_max_count" "$obj_max_kb"
282 echo "$stripe_count" "$obj_max_kb"
290 while IFS='' read -d '' OLDNAME; do
295 $ECHO -n "$OLDNAME: "
297 # avoid duplicate stat call by fetching all attrs at once
298 local nlink_idx_link=0 # %h is the hard link count
299 local nlink_idx_type=1 # %F is "regular file", ignore others
300 local nlink_idx_file=2 # "file" is here
301 local nlink_idx_size=3 # %s is file size in bytes
302 local nlink_idx_dev=4 # %D is the underlying device number
303 # nlink_type=(1 regular file 1234 0x810)
304 local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \
307 # skip non-regular files, since they don't have any objects
308 # and there is no point in trying to migrate them.
309 if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then
310 echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2
314 # working out write perms is hard, let the shell do it
315 if [ ! -w "$OLDNAME" ]; then
316 echo -e "\r$OLDNAME: no write permission, skipped" 1>&2
320 if $OPT_DRYRUN && ! $OPT_DEBUG; then
321 $ECHO "dry run, skipped"
325 # xattrs use absolute file paths, so ensure provided path is
326 # also absolute so that the names can be compared
327 local oldname_absolute=$(readlink -f "$OLDNAME")
328 if [ -z "$oldname_absolute" ]; then
329 echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2
332 OLDNAME=$oldname_absolute
334 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] ||
335 $RSYNC_WITH_HLINKS; then
336 fid=$($LFS path2fid "$OLDNAME" 2> /dev/null)
337 if [ $? -ne 0 ]; then
338 echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2
342 # don't migrate a hard link if it was already migrated
343 if path_in_set "$OLDNAME"; then
344 $ECHO "already migrated via another hard link"
348 # There is limited space available in the xattrs
349 # to store all of the hard links for a file, so it's
350 # possible that $OLDNAME is part of a link set but is
351 # not listed in xattrs and therefore not listed as
353 local migrated=$(old_fid_in_set "$fid")
354 if [ -n "$migrated" ]; then
355 $ECHO "already migrated via another hard link"
356 # Only the rsync case has to relink. The
357 # "lfs migrate" case keeps the same inode so
358 # all of the links are already correct.
359 $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] &&
360 ln -f "$migrated" "$OLDNAME"
362 add_to_set "$fid" "$OLDNAME"
367 local stripe_size="$OPT_STRIPE_SIZE"
368 local stripe_count="$OPT_STRIPE_COUNT"
369 local stripe_opts="-N --comp-count -c -S -p -y"
370 local parent_count=""
372 local stripe_pool="${OPT_POOL#-p }"
375 # avoid multiple getstripe calls
376 # lcm_mirror_count: 1
378 # lmm_stripe_count: 1
379 # lmm_stripe_size: 1048576
381 local l_mirror_count=0
383 local l_stripe_count=2
384 local l_stripe_size=3
385 local l_stripe_pool=4
386 local layout_info=($($LFS getstripe $stripe_opts $OLDNAME \
387 2>/dev/null | awk '{ print $2 }'))
389 layout="${OPT_PASSTHROUGH[@]} "
391 if $OPT_RESTRIPE; then
393 layout+="--copy $(dirname $OLDNAME)"
396 # If rsync copies Lustre xattrs properly in the future
397 # (i.e. before the file data, so that it preserves
398 # striping) then we don't need this getstripe stuff.
401 [ -n "$OPT_POOL" ] ||
402 stripe_pool=${layout_info[$l_stripe_pool]}
403 mirror_count=${layout_info[$l_mirror_count]}
405 if $OPT_AUTOSTRIPE; then
406 local filekb=$((${nlink_type[$nlink_idx_size]} /
409 read stripe_count OBJ_MAX_KB < <(calc_stripe \
410 "$OLDNAME" "$filekb" "$OBJ_MAX_KB")
411 [ -z "$stripe_count" ] && exit 1
412 [ $stripe_count -lt 1 ] && stripe_count=1
414 [ -n "$stripe_count" ] ||
415 stripe_count=${layout_info[$l_stripe_count]}
417 [ -n "$stripe_size" ] ||
418 stripe_size=${layout_info[$l_stripe_size]}
420 [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK=""
428 if $OPT_RESTRIPE; then
429 parent_layout=($($LFS getstripe $stripe_opts \
430 -d $(dirname $OLDNAME) 2>/dev/null |
432 parent_count=${parent_layout[$l_stripe_count]}
433 parent_size=${parent_layout[$l_stripe_size]}
434 stripe_pool=${parent_layout[$l_stripe_pool]}
435 mirror_count=${parent_layout[$l_mirror_count]}
438 $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}"
439 [ -n "$stripe_pool" ] &&
440 $ECHO -n ",pool=${stripe_pool}"
441 [[ $mirror_count -gt 1 ]] &&
442 $ECHO -n ",mirror_count=${mirror_count}"
447 $ECHO " dry run, skipped"
451 if ! $OPT_COMP && [ ${layout_info[$l_comp_count]} -gt 0 ]; then
452 layout+="--copy $OLDNAME"
456 [ -n "$stripe_count" ] && layout+="-c $stripe_count "
457 [ -n "$stripe_size" ] && layout+="-S $stripe_size "
458 [ -n "$OPT_POOL" -a -n "$stripe_pool" ] &&
459 layout+="-p $stripe_pool "
460 [[ $mirror_count -gt 1 ]] && layout+="-N $mirror_count "
462 layout+="$OPT_LAYOUT"
464 # detect other hard links and store them on a global
465 # list so we don't re-migrate them
466 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then
467 [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] ||
468 mntpoint=$(df -P "$OLDNAME" |
469 awk 'NR==2 { print $NF }')
470 if [ -z "$mntpoint" ]; then
471 echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2
474 hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null)
475 if $OPT_RSYNC && [ $? -ne 0 ]; then
476 echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2
484 # first try to migrate via Lustre tools, then fall back to rsync
485 if ! $OPT_RSYNC; then
486 if $LFS migrate $layout "$OLDNAME"; then
488 # no-op if hlinks empty for 1-link files
489 for link in ${hlinks[*]}; do
490 add_to_set "$fid" "$link"
493 elif $OPT_NO_RSYNC; then
494 echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2
497 $ECHO -n "falling back to rsync: "
502 local olddir=$(dirname $OLDNAME)
503 local oldfile=$(basename $OLDNAME)
504 NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX")
505 if [ $? -ne 0 -o -z "$NEWNAME" ]; then
506 echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2
510 if [ "$UNLINK" ]; then
511 if ! $LFS setstripe $layout "$NEWNAME"; then
512 echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2
517 # we use --inplace, since we created our own temp file already
518 if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then
519 echo -e "\r$OLDNAME: copy error, exiting" 1>&2
523 if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then
524 echo -e "\r$NEWNAME: compare failed, exiting" 1>&2
528 if ! mv "$NEWNAME" "$OLDNAME"; then
529 echo -e "\r$OLDNAME: rename error, exiting" 1>&2
534 # no-op if hlinks empty for 1-link files
535 for link in ${hlinks[*]}; do
536 if [ "$link" != "$OLDNAME" ]; then
537 ln -f "$OLDNAME" "$link"
539 add_to_set "$fid" "$link"
542 # If the number of hlinks exceeds the space in the xattrs,
543 # when the final path is statted it will have a link count
544 # of 1 (all other links will point to the new inode).
545 # This flag indicates that even paths with a link count of
546 # 1 are potentially part of a link set.
547 (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true
551 if [ "$#" -eq 0 ]; then
555 tr '\n' '\0' | lfs_migrate
560 $LFS find "$1" -type f -print0