3 # lfs_migrate: a simple tool to copy and check files.
5 # To avoid allocating objects on one or more OSTs, they should be
6 # deactivated on the MDS via "lctl --device {device_number} deactivate",
7 # where {device_number} is from the output of "lctl dl" on the MDS.
9 # To guard against corruption, the file is compared after migration
10 # to verify the copy is correct and the file has not been modified.
11 # This is not a protection against the file being open by another
12 # process, but it would catch the worst cases of in-use files, but
13 # to be 100% safe the administrator needs to ensure this is safe.
16 OPT_RSYNC=${LFS_MIGRATE_RSYNC_MODE:-false}
19 RSYNC_WITH_HLINKS=false
20 LFS_MIGRATE_TMP=${TMPDIR:-/tmp}
21 MIGRATED_SET="$(mktemp ${LFS_MIGRATE_TMP}/lfs_migrate.links.XXXXXX)"
23 REMOVE_FID='s/^\[[0-9a-fx:]*\] //'
30 echo "$old_fid $path" >> "$MIGRATED_SET"
36 sed -e "$REMOVE_FID" $MIGRATED_SET | grep -q "^$path$"
42 grep "^\\$old_fid" "$MIGRATED_SET" | head -n 1 |
48 usage: lfs_migrate [--dry-run|-n] [--help|-h] [--no-rsync|--rsync] [--quiet|-q]
49 [--auto-stripe|-A [-C <cap>]
50 [--min-free|-M <min_free>] [--max-free|-X <max_free>]]
51 [--stripe-count|-c <stripe_count>]
52 [--stripe-size|-S <stripe_size>]
54 [--restripe|-R] [--skip|-s] [--verbose|-v] [--yes|-y] [-0]
56 -A restripe file using an automatically selected stripe count,
57 uses stripe_count = sqrt(size_in_GB) + 1
59 restripe file using the specified <stripe_count>
60 -C <cap> when -A is set, limit the migrated file to use on each OST
61 at most 1/<cap> of the available space of the smallest OST
62 -D do not use direct I/O to copy file contents
63 -h show this usage message
65 when -A is set, an OST must contain more available space than
66 <min_free> KB in order for it to be considered available for
68 --no-rsync do not fall back to rsync mode even if lfs migrate fails
69 -n only print the names of files to be migrated
70 -q run quietly (don't print filenames or status)
71 --rsync force rsync mode instead of using lfs migrate
72 -R restripe file using default directory striping
73 -s skip file data comparison after migrate
75 restripe file using the specified stripe size
76 -v show verbose debug messages
78 when -A is set, limit the amount of space on each OST that
79 can be considered available for the migration to
81 -y answer 'y' to usage question
82 -0 input file names on stdin are separated by a null character
84 Options '-A', '-c', and '-R' are mutually exclusive.
85 Options '-C', '-M', and '-X' are ignored if '-A' is not set.
87 The --rsync and --no-rsync options may not be specified at the same time.
89 If a directory is an argument, all files in the directory are migrated.
90 If no file/directory is given, the file list is read from standard input.
92 Any arguments that are not explicitly recognized by the script are passed
93 through to the 'lfs migrate' utility.
96 lfs_migrate /mnt/lustre/dir
97 lfs_migrate -p newpool /mnt/lustre/dir
98 lfs find /test -O test-OST0004 -size +4G | lfs_migrate -y
104 rm -f "$MIGRATED_SET"
105 [ -n "$NEWNAME" ] && rm -f "$NEWNAME"
129 # Examine any long options and arguments. getopts does not support long
130 # options, so they must be stripped out and classified as either options
131 # for the script, or passed through to "lfs migrate".
132 while [ -n "$*" ]; do
136 -l|--link) ;; # maintained backward compatibility for now
137 -n|--dry-run) OPT_DRYRUN=true; OPT_YES=true
138 echo "$PROG: -n deprecated, use --dry-run or --non-block" 1>&2;;
140 -R|--restripe) OPT_RESTRIPE=true;;
141 -s|--skip) OPT_CHECK=false;;
142 -v|--verbose) OPT_DEBUG=true; ECHO=echo;;
143 -y|--yes) OPT_YES=true;;
145 -b|--block|--non-block|--non-direct|--no-verify)
146 # Always pass non-layout options to 'lfs migrate'
147 OPT_PASSTHROUGH+=("$arg");;
148 --rsync) OPT_RSYNC=true;;
149 --no-rsync) OPT_NO_RSYNC=true;;
150 --copy|--yaml|--file)
151 # these options have files as arguments, pass both through
152 OPT_LAYOUT+="$arg $2"; shift;;
153 --auto-stripe|-A) OPT_AUTOSTRIPE=true;;
154 -C) OPT_CAP="$2"; shift;;
155 -D) LFS_OPT_DIRECTIO="-D";;
156 -M|--min-free) OPT_MINFREE="$2"; shift;;
157 -X|--max-free) OPT_MAXFREE="$2"; shift;;
158 -c|--stripe-count) OPT_STRIPE_COUNT="$2"; shift;;
159 -S|--stripe-size) OPT_STRIPE_SIZE="$2"; shift;;
160 *) # Pass other non-file layout options to 'lfs migrate'
161 [ -e "$arg" ] && OPT_FILE+="$arg " && break || OPT_LAYOUT+="$arg "
166 if $OPT_RESTRIPE && [ -n "$OPT_LAYOUT" ]; then
167 echo "$PROG: Options $OPT_LAYOUT cannot be used with the -R option" 1>&2
169 elif $OPT_RESTRIPE && [[ "$OPT_STRIPE_COUNT" || "$OPT_STRIPE_SIZE" ]]; then
170 echo "$(basename $0): Options -c <stripe_count> and -S <stripe_size> "\
171 "may not be specified at the same time as the -R option." 1>&2
173 elif $OPT_AUTOSTRIPE && [ -n "$OPT_STRIPE_COUNT" ]; then
175 echo "$(basename $0) error: The -c <stripe_count> option may not" 1>&2
176 echo "be specified at the same time as the -A option." 1>&2
178 elif $OPT_AUTOSTRIPE && $OPT_RESTRIPE; then
180 echo "$(basename $0) error: The -A option may not be specified at" 1>&2
181 echo "the same time as the -R option." 1>&2
185 if $OPT_RSYNC && $OPT_NO_RSYNC; then
186 echo "$PROG: Options --rsync and --no-rsync may not be" \
187 "specified at the same time." 1>&2
193 echo "lfs_migrate is currently NOT SAFE for moving in-use files." 1>&2
194 echo "Use it only when you are sure migrated files are unused." 1>&2
196 echo "If emptying an OST that is active on the MDS, new files may" 1>&2
197 echo "use it. To stop allocating any new objects on OSTNNNN run:" 1>&2
198 echo " lctl set_param osp.<fsname>-OSTNNNN*.max_create_count=0'" 1>&2
199 echo "on each MDS using the OST(s) being emptied." 1>&2
200 echo -n "Continue? (y/n) "
202 [ "$CHECK" != "y" -a "$CHECK" != "yes" ] && exit 1
205 # if rsync has --xattr support, then try to copy the xattrs.
206 $RSYNC --help 2>&1 | grep -q xattr && RSYNC_OPTS="$RSYNC_OPTS -X"
207 $RSYNC --help 2>&1 | grep -q acls && RSYNC_OPTS="$RSYNC_OPTS -A"
208 # If rsync copies lustre xattrs in the future, then we can skip lfs (bug 22189)
209 strings $(which $RSYNC) 2>&1 | grep -q lustre && LFS=:
211 # rsync creates its temporary files with lenient permissions, even if
212 # permissions on the original files are more strict. Tighten umask here
213 # to avoid the brief window where unprivileged users might be able to
214 # access the temporary file.
217 # Use stripe count = sqrt(size_in_GB) + 1, but cap object size per OST.
218 function calc_stripe()
223 local filegb=$((filekb / 1048576))
225 local ost_max_count=0
227 # Files up to 1GB will have 1 stripe if they fit within the object max
228 if [[ $filegb -lt 1 && "$obj_max_kb" && $filekb -le $obj_max_kb ]]; then
229 echo 1 "$obj_max_kb" && return
232 stripe_count=$(bc <<< "scale=0; 1 + sqrt($filegb)" 2> /dev/null) ||
233 { echo "cannot auto calculate stripe count" >&2; return; }
235 if [ -z "$obj_max_kb" ]; then
236 local ost_min_kb=$((1 << 62))
238 # Calculate cap on object size at 1% of smallest OST
239 # but only include OSTs that have 256MB+ available space
240 while IFS='' read avail; do
241 [[ "$OPT_MAXFREE" && $avail -gt $OPT_MAXFREE ]] &&
243 if [ $avail -ge $OPT_MINFREE ]; then
244 ost_max_count=$((ost_max_count + 1))
245 if [ $avail -lt $ost_min_kb ]; then
249 done < <($LFS df $OLDNAME | awk '/OST/ { print $4 }')
250 # Once this script supports pools, the lfs df command above
251 # should also include the -p <pool> option to restrict the
252 # listed OSTs to the correct pool.
254 if [ $ost_max_count -eq 0 ]; then
255 echo "no OSTs with sufficient available space" >&2
259 if (( ost_min_kb == (1 << 62) )); then
260 echo "warning: unable to determine minimum OST size, " \
261 "object size not capped" >&2
263 echo "$stripe_count" "$obj_max_kb"
267 obj_max_kb=$((ost_min_kb / $OPT_CAP))
268 elif [ $obj_max_kb -eq 0 ]; then
269 echo "warning: unable to determine minimum OST size " \
270 "from previous migrate, object size not capped" >&2
271 echo "$stripe_count" "$obj_max_kb"
275 # If disk usage would exceed the cap, increase the number of stripes.
276 # Round up to the nearest MB to ensure file will fit.
277 (( filekb > stripe_count * obj_max_kb )) &&
278 stripe_count=$(((filekb + obj_max_kb - 1024) / obj_max_kb))
280 # Limit the count to the number of eligible OSTs
281 if [ "$stripe_count" -gt $ost_max_count ]; then
282 echo "$ost_max_count" "$obj_max_kb"
284 echo "$stripe_count" "$obj_max_kb"
292 while IFS='' read -d '' OLDNAME; do
294 local stripe_size="$OPT_STRIPE_SIZE"
295 local stripe_count="$OPT_STRIPE_COUNT"
296 local parent_count=""
303 $ECHO -n "$OLDNAME: "
305 # avoid duplicate stat call by fetching all attrs at once
306 local nlink_idx_link=0 # %h is the hard link count
307 local nlink_idx_type=1 # %F is "regular file", ignore others
308 local nlink_idx_file=2 # "file" is here
309 local nlink_idx_size=3 # %s is file size in bytes
310 local nlink_idx_dev=4 # %D is the underlying device number
311 # nlink_type=(1 regular file 1234 0x810)
312 local nlink_type=($(LANG=C stat -c "%h %F %s %D" "$OLDNAME" \
315 # skip non-regular files, since they don't have any objects
316 # and there is no point in trying to migrate them.
317 if [ "${nlink_type[$nlink_idx_type]}" != "regular" ]; then
318 echo -e "\r$OLDNAME: not a regular file, skipped" 1>&2
322 # working out write perms is hard, let the shell do it
323 if [ ! -w "$OLDNAME" ]; then
324 echo -e "\r$OLDNAME: no write permission, skipped" 1>&2
328 if $OPT_DRYRUN && ! $OPT_DEBUG; then
329 $ECHO "dry run, skipped"
333 # xattrs use absolute file paths, so ensure provided path is
334 # also absolute so that the names can be compared
335 local oldname_absolute=$(readlink -f "$OLDNAME")
336 if [ -z "$oldname_absolute" ]; then
337 echo -e "\r$OLDNAME: cannot resolve full path, skipped" 1>&2
340 OLDNAME=$oldname_absolute
342 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]] ||
343 $RSYNC_WITH_HLINKS; then
344 fid=$($LFS path2fid "$OLDNAME" 2> /dev/null)
345 if [ $? -ne 0 ]; then
346 echo -e "\r$OLDNAME: cannot get FID, skipping; is this a Lustre file system?" 1>&2
350 # don't migrate a hard link if it was already migrated
351 if path_in_set "$OLDNAME"; then
352 $ECHO "already migrated via another hard link"
356 # There is limited space available in the xattrs
357 # to store all of the hard links for a file, so it's
358 # possible that $OLDNAME is part of a link set but is
359 # not listed in xattrs and therefore not listed as
361 local migrated=$(old_fid_in_set "$fid")
362 if [ -n "$migrated" ]; then
363 $ECHO "already migrated via another hard link"
364 # Only the rsync case has to relink. The
365 # "lfs migrate" case keeps the same inode so
366 # all of the links are already correct.
367 $OPT_RSYNC && [ "$migrated" != "$OLDNAME" ] &&
368 ln -f "$migrated" "$OLDNAME"
370 add_to_set "$fid" "$OLDNAME"
375 if $OPT_RESTRIPE; then
378 # If rsync copies Lustre xattrs properly in the future
379 # (i.e. before the file data, so that it preserves
380 # striping) then we don't need this getstripe stuff.
383 stripe_pool=$($LFS getstripe -p "$OLDNAME" 2> /dev/null)
384 mirror_count=$($LFS getstripe -N "$OLDFILE" 2> /dev/null)
386 if $OPT_AUTOSTRIPE; then
387 local filekb=$((${nlink_type[$nlink_idx_size]} /
390 read stripe_count OBJ_MAX_KB < <(calc_stripe \
391 "$OLDNAME" "$filekb" "$OBJ_MAX_KB")
392 [ -z "$stripe_count" ] && exit 1
393 [ $stripe_count -lt 1 ] && stripe_count=1
395 [ "$OPT_STRIPE_COUNT" ] && stripe_count=$OPT_STRIPE_COUNT ||
396 stripe_count=$($LFS getstripe -c "$OLDNAME" \
399 [ -z "$stripe_size" ] &&
400 stripe_size=$($LFS getstripe -S "$OLDNAME" 2> /dev/null)
402 [ -z "$stripe_count" -o -z "$stripe_size" ] && UNLINK=""
409 if $OPT_RESTRIPE; then
410 parent_count=$($LFS getstripe -c \
411 $(dirname "$OLDNAME") 2> \
413 parent_size=$($LFS getstripe -S \
414 $(dirname "$OLDNAME") 2> \
416 stripe_pool=$($LFS getstripe --pool \
417 $(dirname "$OLDNAME") 2> \
419 mirror_count=$($LFS getstripe -N \
420 $(dirname "$OLDFILE") 2> \
424 $ECHO -n "stripe_count=${stripe_count:-$parent_count},stripe_size=${stripe_size:-$parent_size}"
425 [ -n "$stripe_pool" ] &&
426 $ECHO -n ",pool=${stripe_pool}"
427 [ -n "$mirror_count" ] &&
428 $ECHO -n ",mirror_count=${mirror_count}"
433 $ECHO " dry run, skipped"
437 [ -n "$stripe_count" ] && stripe_count="-c $stripe_count"
438 [ -n "$stripe_size" ] && stripe_size="-S $stripe_size"
439 [ -n "$stripe_pool" ] && stripe_pool="-p $stripe_pool"
440 [ -n "$mirror_count" ] && mirror_count="-N $mirror_count"
441 layout="$stripe_count $stripe_size $stripe_pool $mirror_count \
444 # detect other hard links and store them on a global
445 # list so we don't re-migrate them
446 if [[ ${nlink_type[$nlink_idx_link]} -gt 1 ]]; then
447 [ "${nlink_type[$nlink_idx_dev]}" == "$last_dev" ] ||
448 mntpoint=$(df -P "$OLDNAME" |
449 awk 'NR==2 { print $NF }')
450 if [ -z "$mntpoint" ]; then
451 echo -e "\r$OLDNAME: cannot determine mount point; skipped" 1>&2
454 hlinks=$($LFS fid2path "$mntpoint" "$fid" 2> /dev/null)
455 if $OPT_RSYNC && [ $? -ne 0 ]; then
456 echo -e "\r$OLDNAME: cannot determine hard link paths, skipped" 1>&2
464 # first try to migrate via Lustre tools, then fall back to rsync
465 if ! $OPT_RSYNC; then
466 if $LFS migrate "${OPT_PASSTHROUGH[@]}" $layout \
469 # no-op if hlinks empty for 1-link files
470 for link in ${hlinks[*]}; do
471 add_to_set "$fid" "$link"
474 elif $OPT_NO_RSYNC; then
475 echo -e "\r$OLDNAME: refusing to fall back to rsync, skipped" 1>&2
478 $ECHO -n "falling back to rsync: "
483 NEWNAME=$(mktemp $UNLINK "$OLDNAME-lfs_migrate.tmp.XXXXXX")
484 if [ $? -ne 0 -o -z "$NEWNAME" ]; then
485 echo -e "\r$OLDNAME: cannot make temp file, skipped" 1>&2
489 if [ "$UNLINK" ]; then
490 if ! $LFS setstripe "${OPT_PASSTHROUGH[@]}" $layout \
492 echo -e "\r$NEWNAME: setstripe failed, exiting" 1>&2
497 # we use --inplace, since we created our own temp file already
498 if ! $RSYNC -a --inplace $RSYNC_OPTS "$OLDNAME" "$NEWNAME";then
499 echo -e "\r$OLDNAME: copy error, exiting" 1>&2
503 if $OPT_CHECK && ! cmp -s "$OLDNAME" "$NEWNAME"; then
504 echo -e "\r$NEWNAME: compare failed, exiting" 1>&2
508 if ! mv "$NEWNAME" "$OLDNAME"; then
509 echo -e "\r$OLDNAME: rename error, exiting" 1>&2
514 # no-op if hlinks empty for 1-link files
515 for link in ${hlinks[*]}; do
516 if [ "$link" != "$OLDNAME" ]; then
517 ln -f "$OLDNAME" "$link"
519 add_to_set "$fid" "$link"
522 # If the number of hlinks exceeds the space in the xattrs,
523 # when the final path is statted it will have a link count
524 # of 1 (all other links will point to the new inode).
525 # This flag indicates that even paths with a link count of
526 # 1 are potentially part of a link set.
527 [ ${#hlinks[*]} -gt 1 ] && RSYNC_WITH_HLINKS=true
531 if [ "$#" -eq 0 ]; then
535 tr '\n' '\0' | lfs_migrate
540 $LFS find "$1" -type f -print0