3 # lfs_migrate: a simple tool to copy and check files.
5 # To avoid allocating objects on one or more OSTs, they should be
6 # deactivated on the MDS via "lctl --device {device_number} deactivate",
7 # where {device_number} is from the output of "lctl dl" on the MDS.
9 # To guard against corruption, the file is compared after migration
10 # to verify the copy is correct and the file has not been modified.
11 # This is not a protection against the file being open by another
12 # process, but it would catch the worst cases of in-use files, but
13 # to be 100% safe the administrator needs to ensure this is safe.
16 OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false}
19 RSYNC_WITH_HLINKS=false
20 LFS_MIGRATE_TMP=${TMPDIR:-/tmp}
21 MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)"
23 REMOVE_FID='s/^\[[0-9a-fx:]*\] //'
30 echo "$old_fid $path" >> "$MIGRATED_SET"
36 sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$"
42 grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 |
48 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q]
49 [--auto-stripe|-A [-C <cap>]
50 [--min-free|-M <min_free>] [--max-free|-X <max_free>]]
51 [--pool|-p <pool>] [--stripe-count|-c <stripe_count>]
52 [--stripe-size|-S <stripe_size>]
54 [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0]
56 -A restripe file using an automatically selected stripe count,
57 uses stripe_count = sqrt(size_in_GB) + 1
59 restripe file using the specified <stripe_count>
60 -C <cap> when -A is set, limit the migrated file to use on each OST
61 at most 1/<cap> of the available space of the smallest OST
62 -D do not use direct I/O to copy file contents
63 -h show this usage message
65 when -A is set, an OST must contain more available space than
66 <min_free> KB in order for it to be considered available for
68 --no-rsync do not fall back to rsync mode even if lfs migrate fails
69 -n only print the names of files to be migrated
70 -p <pool> use the specified OST pool for the destination file
71 -q run quietly (don't print filenames or status)
72 --rsync force rsync mode instead of using lfs migrate
73 -R restripe file using default directory striping
74 -s skip file data comparison after migrate
76 restripe file using the specified stripe size
77 -v show verbose debug messages
79 when -A is set, limit the amount of space on each OST that
80 can be considered available for the migration to
82 -y answer 'y' to usage question
83 -0 input file names on stdin are separated by a null character
85 Options '-A', '-c', and '-R' are mutually exclusive.
86 Options '-C', '-M', and '-X' are ignored if '-A' is not set.
88 The --rsync and --no-rsync options may not be specified at the same time.
90 If a directory is an argument, all files in the directory are migrated.
91 If no file/directory is given, the file list is read from standard input.
93 Any arguments that are not explicitly recognized by the script are passed
94 through to the 'lfs migrate' utility.
97 lfs_migrate /mnt/lustre/dir
98 lfs_migrate -p newpool /mnt/lustre/dir
99 lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y
105 rm -f "$MIGRATED_SET"
106 [ -n "$NEWNAME" ] && rm -f "$NEWNAME"
131 # Examine any long options and arguments. getopts does not support long
132 # options, so they must be stripped out and classified as either options
133 # for the script, or passed through to "lfs migrate".
134 while [ -n "$*" ]; do
138 -l|--link) ;; # maintained backward compatibility for now
139 -n) OPT_DRYRUN=true; OPT_YES=true
140 echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;;
141 --dry-run) OPT_DRYRUN=true; OPT_YES=true;;
142 -p|--pool) OPT_POOL="$arg $2"; OPT_LAYOUT+="$OPT_POOL "; shift;;
144 -R|--restripe) OPT_RESTRIPE=true;;
145 -s|--skip) OPT_CHECK=false;;
146 -v|--verbose) OPT_DEBUG=true; ECHO=echo;;
147 -y|--yes) OPT_YES=true;;
149 -b|--block|--non-block|--non-direct|--no-verify)
150 # Always pass non-layout options to 'lfs migrate'
151 OPT_PASSTHROUGH+=("$arg");;
152 --rsync) OPT_RSYNC=true;;
153 --no-rsync) OPT_NO_RSYNC=true;;
154 --copy|--yaml|--file)
155 # these options have files as arguments, pass both through
156 OPT_LAYOUT+="$arg $2 "; shift;;
157 --auto-stripe|-A) OPT_AUTOSTRIPE=true;;
158 -C) OPT_CAP="$2"; shift;;
159 -D) LFS_OPT_DIRECTIO="-D";;
160 -M|--min-free) OPT_MINFREE="$2"; shift;;
161 -X|--max-free) OPT_MAXFREE="$2"; shift;;
162 -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;;
163 -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;;
164 *) # Pass other non-file layout options to 'lfs migrate'
165 [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg "
170 if $OPT_RESTRIPE && [ -n "$OPT_LAYOUT" ]; then
171 echo "$PROG: Options $OPT_LAYOUT cannot be used with the -R option" 1>&2
173 elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then
174 echo "$(basename $0): Options -c <stripe_count> and -S <stripe_size> "\
175 "may not be specified at the same time as the -R option." 1>&2
177 elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then
179 echo "$(basename $0) error: The -c <stripe_count> option may not" 1>&2
180 echo "be specified at the same time as the -A option." 1>&2
182 elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then
184 echo "$(basename $0) error: The -A option may not be specified at" 1>&2
185 echo "the same time as the -R option." 1>&2
189 if $OPT_RSYNC && $OPT_NO_RSYNC; then
190 echo "$PROG: Options --rsync and --no-rsync may not be" \
191 "specified at the same time." 1>&2
197 echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2
198 echo "Use it only when you are sure migrated files are unused." 1>&2
200 echo "If emptying an OST that is active on the MDS, new files may" 1>&2
201 echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2
202 echo " lctl set_param osp.<fsname>-OSTNNNN*.max_create_count=0'" 1>&2
203 echo "on each MDS using the OST(s) being emptied." 1>&2
204 echo -n "Continue? (y/n) "
206 [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1
209 # if rsync has --xattr support, then try to copy the xattrs.
210 $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X"
211 $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A"
212 # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189)
213 strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=:
215 # rsync creates its temporary files with lenient permissions, even if
216 # permissions on the original files are more strict. Tighten umask here
217 # to avoid the brief window where unprivileged users might be able to
218 # access the temporary file.
221 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST.
222 function calc_stripe()
227 local filegb=$((filekb / 1048576))
229 local ost_max_count=0
231 # Files up to 1GB will have 1 stripe if they fit within the object max
232 if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then
233 echo 1 "$obj_max_kb" && return
236 stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) ||
237 { echo "cannot auto calculate stripe count" >&2; return; }
239 if [ -z "$obj_max_kb" ]; then
240 local ost_min_kb=$((1 << 62))
242 # Calculate cap on object size at 1% of smallest OST
243 # but only include OSTs that have 256MB+ available space
244 while IFS='' read avail; do
245 [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] &&
247 if [ $avail -ge $OPT_MINFREE ]; then
248 ost_max_count=$((ost_max_count + 1))
249 if [ $avail -lt $ost_min_kb ]; then
253 done < <($LFS df $OPT_POOL $OLDNAME | awk '/OST/ { print $4 }')
255 if [ $ost_max_count -eq 0 ]; then
256 # no OSTs with enough space, stripe over all of them
261 if (( ost_min_kb == (1 << 62) )); then
262 echo "warning: unable to determine minimum OST size, " \
263 "object size not capped" >&2
264 echo "$stripe_count" "0"
268 obj_max_kb=$((ost_min_kb / $OPT_CAP))
269 elif [ $obj_max_kb -eq 0 ]; then
270 echo "warning: unable to determine minimum OST size " \
271 "from previous migrate, object size not capped" >&2
272 echo "$stripe_count" "$obj_max_kb"
276 # If disk usage would exceed the cap, increase the number of stripes.
277 # Round up to the nearest MB to ensure file will fit.
278 (( filekb > stripe_count * obj_max_kb )) &&
279 stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb))
281 # Limit the count to the number of eligible OSTs
282 if [ "$stripe_count" -gt $ost_max_count ]; then
283 echo "$ost_max_count" "$obj_max_kb"
285 echo "$stripe_count" "$obj_max_kb"
293 while IFS='' read -d '' OLDNAME; do
295 local stripe_size="$OPT_STRIPE_SIZE"
296 local stripe_count="$OPT_STRIPE_COUNT"
297 local parent_count=""
299 local stripe_pool="${OPT_POOL#-p }"
304 $ECHO -n "$OLDNAME: "
306 # avoid duplicate stat call by fetching all attrs at once
307 local nlink_idx_link=0 # %h is the hard link count
308 local nlink_idx_type=1 # %F is "regular file", ignore others
309 local nlink_idx_file=2 # "file" is here
310 local nlink_idx_size=3 # %s is file size in bytes
311 local nlink_idx_dev=4 # %D is the underlying device number
312 # nlink_type=(1 regular file 1234 0x810)
313 local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \
316 # skip non-regular files, since they don't have any objects
317 # and there is no point in trying to migrate them.
318 if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then
319 echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2
323 # working out write perms is hard, let the shell do it
324 if [ ! -w "$OLDNAME" ]; then
325 echo -e "\r$OLDNAME: no write permission, skipped" 1>&2
329 if $OPT_DRYRUN && ! $OPT_DEBUG; then
330 $ECHO "dry run, skipped"
334 # xattrs use absolute file paths, so ensure provided path is
335 # also absolute so that the names can be compared
336 local oldname_absolute=$(readlink -f "$OLDNAME")
337 if [ -z "$oldname_absolute" ]; then
338 echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2
341 OLDNAME=$oldname_absolute
343 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] ||
344 $RSYNC_WITH_HLINKS; then
345 fid=$($LFS path2fid "$OLDNAME" 2> /dev/null)
346 if [ $? -ne 0 ]; then
347 echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2
351 # don't migrate a hard link if it was already migrated
352 if path_in_set "$OLDNAME"; then
353 $ECHO "already migrated via another hard link"
357 # There is limited space available in the xattrs
358 # to store all of the hard links for a file, so it's
359 # possible that $OLDNAME is part of a link set but is
360 # not listed in xattrs and therefore not listed as
362 local migrated=$(old_fid_in_set "$fid")
363 if [ -n "$migrated" ]; then
364 $ECHO "already migrated via another hard link"
365 # Only the rsync case has to relink. The
366 # "lfs migrate" case keeps the same inode so
367 # all of the links are already correct.
368 $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] &&
369 ln -f "$migrated" "$OLDNAME"
371 add_to_set "$fid" "$OLDNAME"
376 if $OPT_RESTRIPE; then
379 # If rsync copies Lustre xattrs properly in the future
380 # (i.e. before the file data, so that it preserves
381 # striping) then we don't need this getstripe stuff.
384 [ -n "$OPT_POOL" ] ||
385 stripe_pool=$($LFS getstripe -p "$OLDNAME" 2>/dev/null)
386 mirror_count=$($LFS getstripe -N "$OLDFILE" 2>/dev/null)
388 if $OPT_AUTOSTRIPE; then
389 local filekb=$((${nlink_type[$nlink_idx_size]} /
392 read stripe_count OBJ_MAX_KB < <(calc_stripe \
393 "$OLDNAME" "$filekb" "$OBJ_MAX_KB")
394 [ -z "$stripe_count" ] && exit 1
395 [ $stripe_count -lt 1 ] && stripe_count=1
397 [ -n "$stripe_count" ] ||
398 stripe_count=$($LFS getstripe -c "$OLDNAME" \
401 [ -n "$stripe_size" ] ||
402 stripe_size=$($LFS getstripe -S "$OLDNAME" 2> /dev/null)
404 [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK=""
411 if $OPT_RESTRIPE; then
412 parent_count=$($LFS getstripe -c \
413 $(dirname "$OLDNAME") 2> \
415 parent_size=$($LFS getstripe -S \
416 $(dirname "$OLDNAME") 2> \
418 stripe_pool=$($LFS getstripe --pool \
419 $(dirname "$OLDNAME") 2> \
421 mirror_count=$($LFS getstripe -N \
422 $(dirname "$OLDFILE") 2> \
426 $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}"
427 [ -n "$stripe_pool" ] &&
428 $ECHO -n ",pool=${stripe_pool}"
429 [ -n "$mirror_count" ] &&
430 $ECHO -n ",mirror_count=${mirror_count}"
435 $ECHO " dry run, skipped"
439 layout="${OPT_PASSTHROUGH[@]} "
440 [ -n "$stripe_count" ] && layout+="-c $stripe_count "
441 [ -n "$stripe_size" ] && layout+="-S $stripe_size "
442 [ -z "$OPT_POOL" -a -n "$stripe_pool" ] &&
443 layout+="-p $stripe_pool "
444 [ -n "$mirror_count" ] && layout+="-N $mirror_count "
445 layout+="$OPT_LAYOUT"
447 # detect other hard links and store them on a global
448 # list so we don't re-migrate them
449 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then
450 [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] ||
451 mntpoint=$(df -P "$OLDNAME" |
452 awk 'NR==2 { print $NF }')
453 if [ -z "$mntpoint" ]; then
454 echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2
457 hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null)
458 if $OPT_RSYNC && [ $? -ne 0 ]; then
459 echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2
467 # first try to migrate via Lustre tools, then fall back to rsync
468 if ! $OPT_RSYNC; then
469 if $LFS migrate $layout "$OLDNAME"; then
471 # no-op if hlinks empty for 1-link files
472 for link in ${hlinks[*]}; do
473 add_to_set "$fid" "$link"
476 elif $OPT_NO_RSYNC; then
477 echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2
480 $ECHO -n "falling back to rsync: "
485 local olddir=$(dirname $OLDNAME)
486 local oldfile=$(basename $OLDNAME)
487 NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX")
488 if [ $? -ne 0 -o -z "$NEWNAME" ]; then
489 echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2
493 if [ "$UNLINK" ]; then
494 if ! $LFS setstripe $layout "$NEWNAME"; then
495 echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2
500 # we use --inplace, since we created our own temp file already
501 if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then
502 echo -e "\r$OLDNAME: copy error, exiting" 1>&2
506 if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then
507 echo -e "\r$NEWNAME: compare failed, exiting" 1>&2
511 if ! mv "$NEWNAME" "$OLDNAME"; then
512 echo -e "\r$OLDNAME: rename error, exiting" 1>&2
517 # no-op if hlinks empty for 1-link files
518 for link in ${hlinks[*]}; do
519 if [ "$link" != "$OLDNAME" ]; then
520 ln -f "$OLDNAME" "$link"
522 add_to_set "$fid" "$link"
525 # If the number of hlinks exceeds the space in the xattrs,
526 # when the final path is statted it will have a link count
527 # of 1 (all other links will point to the new inode).
528 # This flag indicates that even paths with a link count of
529 # 1 are potentially part of a link set.
530 (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true
534 if [ "$#" -eq 0 ]; then
538 tr '\n' '\0' | lfs_migrate
543 $LFS find "$1" -type f -print0