3 # lfs_migrate: a simple tool to copy and check files.
5 # To avoid allocating objects on one or more OSTs, they should be
6 # deactivated on the MDS via "lctl --device {device_number} deactivate",
7 # where {device_number} is from the output of "lctl dl" on the MDS.
9 # To guard against corruption, the file is compared after migration
10 # to verify the copy is correct and the file has not been modified.
11 # This is not a protection against the file being open by another
12 # process, but it would catch the worst cases of in-use files, but
13 # to be 100% safe the administrator needs to ensure this is safe.
18 RSYNC_WITH_HLINKS=false
19 LFS_MIGRATE_TMP=${TMPDIR:-/tmp}
20 MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)"
22 REMOVE_FID='s/^\[[0-9a-fx:]*\] //'
29 echo "$old_fid $path" >> "$MIGRATED_SET"
35 sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$"
41 grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 |
47 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q]
48 [--auto-stripe|-A [-C <cap>]
49 [--min-free|-M <min_free>] [--max-free|-X <max_free>]]
50 [--pool|-p <pool>] [--stripe-count|-c <stripe_count>]
51 [--stripe-size|-S <stripe_size>]
53 [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0]
55 -A restripe file using an automatically selected stripe count,
56 uses stripe_count = sqrt(size_in_GB) + 1
58 restripe file using the specified <stripe_count>
59 -C <cap> when -A is set, limit the migrated file to use on each OST
60 at most 1/<cap> of the available space of the smallest OST
61 -D do not use direct I/O to copy file contents
62 -h show this usage message
64 when -A is set, an OST must contain more available space than
65 <min_free> KB in order for it to be considered available for
67 --no-rsync do not fall back to rsync even if lfs migrate fails (default)
68 -n only print the names of files to be migrated
69 -p <pool> use the specified OST pool for the destination file
70 -q run quietly (don't print filenames or status)
71 --rsync force rsync mode instead of using lfs migrate
72 -R restripe file using default directory striping
73 -s skip file data comparison after migrate
75 restripe file using the specified stripe size
76 -v show verbose debug messages
78 when -A is set, limit the amount of space on each OST that
79 can be considered available for the migration to
81 -y answer 'y' to usage question (only when --rsync used)
82 -0 input file names on stdin are separated by a null character
84 Options '-A', and '-R' are mutually exclusive with each other, and any
85 specific layout (e.g. any specific parameters like '-c', '-S', '-E', '-p').
86 Options '-C', '-M', and '-X' are ignored if '-A' is not set.
88 If a directory is an argument, all files in the directory are migrated.
89 If no file/directory is given, the file list is read from standard input.
91 Any arguments that are not explicitly recognized by the script are passed
92 through to the 'lfs migrate' utility.
94 If emptying an active OST, new files may continue to be allocated there.
95 To prevent this, on each MDS run the following for each OST being emptied:
97 lctl set_param osp.<fsname>-OSTxxxx*.max_create_count=0
100 lfs_migrate /mnt/lustre/dir
101 lfs_migrate -p newpool /mnt/lustre/dir
102 lfs find /test -O test-OST0004 -size +4G | lfs_migrate
108 rm -f "$MIGRATED_SET"
109 [ -n "$NEWNAME" ] && rm -f "$NEWNAME"
136 # Examine any long options and arguments. getopts does not support long
137 # options, so they must be stripped out and classified as either options
138 # for the script, or passed through to "lfs migrate".
139 while [ -n "$*" ]; do
143 -l|--link) ;; # maintained backward compatibility for now
144 -n) OPT_DRYRUN=true; OPT_YES=true
145 echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;;
146 --dry-run) OPT_DRYRUN=true; OPT_YES=true;;
147 -p|--pool) # if within a component, pass through pool
148 $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_POOL="-p $2";
151 -R|--restripe) OPT_RESTRIPE=true;;
152 -s|--skip) OPT_CHECK=false;;
153 -v|--verbose) OPT_DEBUG=true; ECHO=echo;;
154 -y|--yes) OPT_YES=true;;
156 -b|--block|--non-block|--non-direct|-D|--no-verify)
157 # Always pass non-layout options to 'lfs migrate'
158 OPT_PASSTHROUGH+=("$arg");;
159 --rsync) OPT_RSYNC=true; OPT_NO_RSYNC=false;;
160 --no-rsync) OPT_NO_RSYNC=true; OPT_RSYNC=false;;
161 --copy|--yaml|--file) OPT_COPY=true;
162 # these options have files as arguments, pass both through
163 OPT_LAYOUT+="$arg $2 "; shift;;
164 --auto-stripe|-A) OPT_AUTOSTRIPE=true;;
165 -C) OPT_CAP="$2"; shift;;
166 -D) LFS_OPT_DIRECTIO="-D";;
167 -E|--comp-end|--component-end) OPT_COMP=true; OPT_LAYOUT+="$arg ";;
168 -M|--min-free) OPT_MINFREE="$2"; shift;;
169 -X|--max-free) OPT_MAXFREE="$2"; shift;;
170 -c|--stripe-count) # if within a component, pass through stripe_count
171 $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_STRIPE_COUNT="$2"
173 -S|--stripe-size) # if within a component, pass through stripe_size
174 $OPT_COMP && OPT_LAYOUT+="$arg $2 " || OPT_STRIPE_SIZE="$2"
176 *) # Pass other non-file layout options to 'lfs migrate'
177 [[ -e "$arg" ]] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg "
182 if ( $OPT_RESTRIPE || $OPT_AUTOSTRIPE ) &&
183 [[ -n "$OPT_STRIPE_COUNT" || -n "$OPT_STRIPE_SIZE" || -n "$OPT_POOL" ]]; then
184 echo "$PROG error: option -R or -A cannot be used with -c, -S, or -p" 1>&2
186 elif ( $OPT_RESTRIPE || $OPT_AUTOSTRIPE ) && [[ -n "$OPT_LAYOUT" ]]; then
187 echo "$PROG error: option -R or -A cannot be used with $OPT_LAYOUT" 1>&2
189 elif $OPT_RESTRIPE && $OPT_AUTOSTRIPE; then
190 echo "$PROG error: option -R cannot be used with -A" 1>&2
194 if $OPT_RSYNC && ! $OPT_YES; then
196 echo "'lfs_migrate --rsync' is NOT SAFE for moving in-use files." 1>&2
197 echo "Use it only when you are sure migrated files are unused." 1>&2
199 echo -n "Continue? (y/n) "
201 [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1
204 # if rsync has --xattr support, then try to copy the xattrs.
205 $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X"
206 $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A"
207 # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189)
208 strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=:
210 # rsync creates its temporary files with lenient permissions, even if
211 # permissions on the original files are more strict. Tighten umask here
212 # to avoid the brief window where unprivileged users might be able to
213 # access the temporary file.
216 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST.
217 function calc_stripe()
222 local filegb=$((filekb / 1048576))
224 local ost_max_count=0
226 # Files up to 1GB will have 1 stripe if they fit within the object max
227 if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then
228 echo 1 "$obj_max_kb" && return
231 stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) ||
232 { echo "cannot auto calculate stripe count" >&2; return; }
234 if [ -z "$obj_max_kb" ]; then
235 local ost_min_kb=$((1 << 62))
237 # Calculate cap on object size at 1% of smallest OST
238 # but only include OSTs that have 256MB+ available space
239 while IFS='' read avail; do
240 [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] &&
242 if [ $avail -ge $OPT_MINFREE ]; then
243 ost_max_count=$((ost_max_count + 1))
244 if [ $avail -lt $ost_min_kb ]; then
248 done < <($LFS df $OPT_POOL "$OLDNAME" | awk '/OST/ { print $4 }')
250 if [ $ost_max_count -eq 0 ]; then
251 # no OSTs with enough space, stripe over all of them
256 if (( ost_min_kb == (1 << 62) )); then
257 echo "warning: unable to determine minimum OST size, " \
258 "object size not capped" >&2
259 echo "$stripe_count" "0"
263 obj_max_kb=$((ost_min_kb / $OPT_CAP))
264 elif [ $obj_max_kb -eq 0 ]; then
265 echo "warning: unable to determine minimum OST size " \
266 "from previous migrate, object size not capped" >&2
267 echo "$stripe_count" "$obj_max_kb"
271 # If disk usage would exceed the cap, increase the number of stripes.
272 # Round up to the nearest MB to ensure file will fit.
273 (( filekb > stripe_count * obj_max_kb )) &&
274 stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb))
276 # Limit the count to the number of eligible OSTs
277 if [ "$stripe_count" -gt $ost_max_count ]; then
278 echo "$ost_max_count" "$obj_max_kb"
280 echo "$stripe_count" "$obj_max_kb"
288 while IFS='' read -d '' OLDNAME; do
293 $ECHO -n "$OLDNAME: "
295 # avoid duplicate stat call by fetching all attrs at once
296 local nlink_idx_link=0 # %h is the hard link count
297 local nlink_idx_type=1 # %F is "regular file", ignore others
298 local nlink_idx_file=2 # "file" is here
299 local nlink_idx_size=3 # %s is file size in bytes
300 local nlink_idx_dev=4 # %D is the underlying device number
301 # nlink_type=(1 regular file 1234 0x810)
302 local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \
305 # skip non-regular files, since they don't have any objects
306 # and there is no point in trying to migrate them.
307 if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then
308 echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2
312 # working out write perms is hard, let the shell do it
313 if [ ! -w "$OLDNAME" ]; then
314 echo -e "\r$OLDNAME: no write permission, skipped" 1>&2
318 if $OPT_DRYRUN && ! $OPT_DEBUG; then
319 $ECHO "dry run, skipped"
323 # xattrs use absolute file paths, so ensure provided path is
324 # also absolute so that the names can be compared
325 local oldname_absolute=$(readlink -f "$OLDNAME")
326 if [ -z "$oldname_absolute" ]; then
327 echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2
330 OLDNAME=$oldname_absolute
332 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] ||
333 $RSYNC_WITH_HLINKS; then
334 fid=$($LFS path2fid "$OLDNAME" 2> /dev/null)
335 if [ $? -ne 0 ]; then
336 echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2
340 # don't migrate a hard link if it was already migrated
341 if path_in_set "$OLDNAME"; then
342 $ECHO "already migrated via another hard link"
346 # There is limited space available in the xattrs
347 # to store all of the hard links for a file, so it's
348 # possible that $OLDNAME is part of a link set but is
349 # not listed in xattrs and therefore not listed as
351 local migrated=$(old_fid_in_set "$fid")
352 if [ -n "$migrated" ]; then
353 $ECHO "already migrated via another hard link"
354 # Only the rsync case has to relink. The
355 # "lfs migrate" case keeps the same inode so
356 # all of the links are already correct.
357 $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] &&
358 ln -f "$migrated" "$OLDNAME"
360 add_to_set "$fid" "$OLDNAME"
365 local olddir=$(dirname "$OLDNAME")
366 local stripe_size="$OPT_STRIPE_SIZE"
367 local stripe_count="$OPT_STRIPE_COUNT"
368 local getstripe_opts="-N --comp-count -c -S -p -y"
369 local parent_count=""
371 local stripe_pool="${OPT_POOL#-p }"
375 layout="${OPT_PASSTHROUGH[@]} "
377 if $OPT_RESTRIPE; then
379 layout+="--copy $olddir"
381 elif ! $OPT_COMP; then
382 # avoid multiple getstripe calls
383 # lcm_mirror_count: 1
385 # lmm_stripe_count: 1
386 # lmm_stripe_size: 1048576
388 local l_mirror_count=0
390 local l_stripe_count=2
391 local l_stripe_size=3
392 local l_stripe_pool=4
395 layout_info=($($LFS getstripe $getstripe_opts $OLDNAME \
396 2>/dev/null | awk '{ print $2 }'))
397 # If rsync copies Lustre xattrs properly in the future
398 # (i.e. before the file data, so that it preserves
399 # striping) then we don't need this getstripe stuff.
402 [ -n "$OPT_POOL" ] ||
403 stripe_pool=${layout_info[$l_stripe_pool]}
404 mirror_count=${layout_info[$l_mirror_count]}
406 if $OPT_AUTOSTRIPE; then
407 local filekb=$((${nlink_type[$nlink_idx_size]} /
410 read stripe_count OBJ_MAX_KB < <(calc_stripe \
411 "$OLDNAME" "$filekb" "$OBJ_MAX_KB")
412 [ -z "$stripe_count" ] && exit 1
413 [ $stripe_count -lt 1 ] && stripe_count=1
415 [ -n "$stripe_count" ] ||
416 stripe_count=${layout_info[$l_stripe_count]}
418 [ -n "$stripe_size" ] ||
419 stripe_size=${layout_info[$l_stripe_size]}
421 [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK=""
429 if $OPT_RESTRIPE; then
430 parent_layout=($($LFS getstripe $getstripe_opts\
431 -d "$olddir" 2>/dev/null |
433 parent_count=${parent_layout[$l_stripe_count]}
434 parent_size=${parent_layout[$l_stripe_size]}
435 stripe_pool=${parent_layout[$l_stripe_pool]}
436 mirror_count=${parent_layout[$l_mirror_count]}
439 $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}"
440 [ -n "$stripe_pool" ] &&
441 $ECHO -n ",pool=${stripe_pool}"
442 [[ $mirror_count -gt 1 ]] &&
443 $ECHO -n ",mirror_count=${mirror_count}"
448 $ECHO " dry run, skipped"
452 if ! $OPT_COPY && ! $OPT_COMP &&
453 [[ ${layout_info[$l_comp_count]} > 0 ]]; then
454 layout+="--copy $OLDNAME"
457 if ! $OPT_COPY && ! $OPT_COMP; then
458 [ -n "$stripe_count" ] && layout+="-c $stripe_count "
459 [ -n "$stripe_size" ] && layout+="-S $stripe_size "
460 [ -n "$OPT_POOL" -a -n "$stripe_pool" ] &&
461 layout+="-p $stripe_pool "
462 [[ $mirror_count -gt 1 ]] && layout+="-N $mirror_count "
464 layout+="$OPT_LAYOUT"
466 # detect other hard links and store them on a global
467 # list so we don't re-migrate them
468 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then
469 [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] ||
470 mntpoint=$(df -P "$OLDNAME" |
471 awk 'NR==2 { print $NF }')
472 if [ -z "$mntpoint" ]; then
473 echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2
476 hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null)
477 if $OPT_RSYNC && [ $? -ne 0 ]; then
478 echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2
486 # first try to migrate via Lustre tools, then fall back to rsync
487 if ! $OPT_RSYNC; then
489 echo -e "\n$LFS migrate $layout \"$OLDNAME\""
491 if $LFS migrate $layout "$OLDNAME"; then
493 # no-op if hlinks empty for 1-link files
494 for link in ${hlinks[*]}; do
495 add_to_set "$fid" "$link"
498 elif $OPT_NO_RSYNC; then
499 echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2
502 $ECHO -n "falling back to rsync: "
507 local oldfile=$(basename "$OLDNAME")
508 NEWNAME=$(mktemp $UNLINK "$olddir/.$oldfile.XXXXXX")
509 if [ $? -ne 0 -o -z "$NEWNAME" ]; then
510 echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2
514 if [ "$UNLINK" ]; then
515 if ! $LFS setstripe $layout "$NEWNAME"; then
516 echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2
521 # we use --inplace, since we created our own temp file already
522 if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then
523 echo -e "\r$OLDNAME: copy error, exiting" 1>&2
527 if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then
528 echo -e "\r$NEWNAME: compare failed, exiting" 1>&2
532 if ! mv "$NEWNAME" "$OLDNAME"; then
533 echo -e "\r$OLDNAME: rename error, exiting" 1>&2
538 # no-op if hlinks empty for 1-link files
539 for link in ${hlinks[*]}; do
540 if [ "$link" != "$OLDNAME" ]; then
541 ln -f "$OLDNAME" "$link"
543 add_to_set "$fid" "$link"
546 # If the number of hlinks exceeds the space in the xattrs,
547 # when the final path is statted it will have a link count
548 # of 1 (all other links will point to the new inode).
549 # This flag indicates that even paths with a link count of
550 # 1 are potentially part of a link set.
551 (( ${#hlinks[*]} == 1 )) || RSYNC_WITH_HLINKS=true
555 if [ "$#" -eq 0 ]; then
559 tr '\n' '\0' | lfs_migrate
564 $LFS find "$1" -type f -print0