3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 # bug number for skipped test:
17 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT "
18 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
20 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
23 require_dsh_mds || exit 0
27 if ! check_versions; then
28 skip "It is NOT necessary to test lfsck under interoperation mode"
32 (( $MDS1_VERSION >= $(version_code 2.3.60) )) ||
33 skip "Need MDS version at least 2.3.60"
37 SAVED_MDSSIZE=${MDSSIZE}
38 SAVED_OSTSIZE=${OSTSIZE}
39 SAVED_OSTCOUNT=${OSTCOUNT}
40 # use small MDS + OST size to speed formatting time
41 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
43 [ "$mds1_FSTYPE" == zfs ] && MDSSIZE=300000
45 [ "$ost1_FSTYPE" == zfs ] && OSTSIZE=300000
47 # no need too many OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
51 # build up a clean test environment.
52 REFORMAT="yes" check_and_setup_lustre
54 MDT_DEV="${FSNAME}-MDT0000"
55 OST_DEV="${FSNAME}-OST0000"
56 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
57 START_NAMESPACE="do_facet $SINGLEMDS \
58 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
59 START_LAYOUT="do_facet $SINGLEMDS \
60 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
61 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
62 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
63 SHOW_NAMESPACE="do_facet $SINGLEMDS \
64 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
65 SHOW_LAYOUT="do_facet $SINGLEMDS \
66 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
67 SHOW_LAYOUT_ON_OST="do_facet ost1 \
68 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
69 MOUNT_OPTS_SCRUB="$MDS_MOUNT_OPTS -o user_xattr"
70 MOUNT_OPTS_NOSCRUB="$MDS_MOUNT_OPTS -o user_xattr,noscrub"
71 MOUNT_OPTS_SKIP_LFSCK="-o user_xattr,skip_lfsck"
80 echo "preparing... $nfiles * $ndirs files will be created $(date)."
81 if [ ! -z $igif ]; then
82 #define OBD_FAIL_FID_IGIF 0x1504
83 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
86 cp $LUSTRE/tests/*.sh $DIR/$tdir/
87 if [ $ndirs -gt 0 ]; then
88 createmany -d $DIR/$tdir/d $ndirs
89 createmany -m $DIR/$tdir/f $ndirs
90 if [ $nfiles -gt 0 ]; then
91 for ((i = 0; i < $ndirs; i++)); do
92 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
93 /dev/null || error "createmany $nfiles"
96 createmany -d $DIR/$tdir/e $ndirs
99 if [ ! -z $igif ]; then
100 touch $DIR/$tdir/dummy
101 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
104 echo "prepared $(date)."
107 run_e2fsck_on_mdt0() {
108 [ $mds1_FSTYPE == ldiskfs ] || return 0
110 stop $SINGLEMDS > /dev/null || error "(0) Fail to the stop MDT0"
111 run_e2fsck $(facet_active_host $SINGLEMDS) $(mdsdevname 1) "-n" |
113 run_e2fsck $(facet_active_host $SINGLEMDS) $(mdsdevname 1) "-n"
114 error "(2) Detected inconsistency on MDT0"
116 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
117 error "(3) Fail to start MDT0"
120 wait_all_targets_blocked() {
125 local count=$(do_facet mds1 \
126 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 -w |
127 awk '/^${com}_mdts_${status}/ { print \\\$2 }'")
128 [[ $count -eq $MDSCOUNT ]] || {
129 do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000"
130 error "($err) only $count of $MDSCOUNT MDTs are in ${status}"
139 wait_update_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 |
140 awk '/^${com}_mdts_${status}/ { print \\\$2 }'" \
141 "$MDSCOUNT" $LTIME || {
142 do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000"
143 error "($err) some MDTs are not in ${status}"
150 #define OBD_FAIL_LFSCK_DELAY1 0x1600
151 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
152 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
154 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
156 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
157 [ "$STATUS" == "scanning-phase1" ] ||
158 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
160 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
162 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
163 [ "$STATUS" == "stopped" ] ||
164 error "(6) Expect 'stopped', but got '$STATUS'"
166 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
168 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
169 [ "$STATUS" == "scanning-phase1" ] ||
170 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
173 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
174 mdd.${MDT_DEV}.lfsck_namespace |
175 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
177 error "(9) unexpected status"
180 local repaired=$($SHOW_NAMESPACE |
181 awk '/^updated_phase1/ { print $2 }')
182 [ $repaired -eq 0 ] ||
183 error "(10) Expect nothing to be repaired, but got: $repaired"
185 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
186 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
187 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
188 mdd.${MDT_DEV}.lfsck_namespace |
189 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
191 error "(12) unexpected status"
194 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
195 [ $((scanned1 + 1)) -eq $scanned2 ] ||
196 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
198 echo "stopall, should NOT crash LU-3649"
199 stopall || error "(14) Fail to stopall"
201 run_test 0 "Control LFSCK manually"
206 #define OBD_FAIL_FID_INDIR 0x1501
207 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
208 touch $DIR/$tdir/dummy
210 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
212 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
213 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
214 mdd.${MDT_DEV}.lfsck_namespace |
215 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
217 error "(4) unexpected status"
220 local repaired=$($SHOW_NAMESPACE |
221 awk '/^dirent_repaired/ { print $2 }')
222 # for interop with old server
223 [ -z "$repaired" ] &&
224 repaired=$($SHOW_NAMESPACE |
225 awk '/^updated_phase1/ { print $2 }')
227 [ $repaired -eq 1 ] ||
228 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
232 mount_client $MOUNT || error "(6) Fail to start client!"
234 #define OBD_FAIL_FID_LOOKUP 0x1505
235 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
236 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
240 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
244 [ "$mds1_FSTYPE" != ldiskfs ] &&
245 skip "OI Scrub not implemented for ZFS"
249 #define OBD_FAIL_FID_INLMA 0x1502
250 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
251 touch $DIR/$tdir/dummy
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
255 #define OBD_FAIL_FID_NOLMA 0x1506
256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
257 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
258 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
259 mdd.${MDT_DEV}.lfsck_namespace |
260 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
262 error "(4) unexpected status"
265 local repaired=$($SHOW_NAMESPACE |
266 awk '/^dirent_repaired/ { print $2 }')
267 # for interop with old server
268 [ -z "$repaired" ] &&
269 repaired=$($SHOW_NAMESPACE |
270 awk '/^updated_phase1/ { print $2 }')
272 [ $repaired -eq 1 ] ||
273 error "(5) Fail to repair the missing FID-in-LMA: $repaired"
275 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
278 mount_client $MOUNT || error "(6) Fail to start client!"
280 #define OBD_FAIL_FID_LOOKUP 0x1505
281 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
282 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
284 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
286 run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
291 #define OBD_FAIL_FID_IGIF 0x1504
292 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
293 touch $DIR/$tdir/dummy
295 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
297 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
298 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
299 mdd.${MDT_DEV}.lfsck_namespace |
300 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
302 error "(4) unexpected status"
305 local repaired=$($SHOW_NAMESPACE |
306 awk '/^dirent_repaired/ { print $2 }')
307 # for interop with old server
308 [ -z "$repaired" ] &&
309 repaired=$($SHOW_NAMESPACE |
310 awk '/^updated_phase1/ { print $2 }')
312 [ $repaired -eq 1 ] ||
313 error "(5) Fail to repair lost FID-in-dirent: $repaired"
317 mount_client $MOUNT || error "(6) Fail to start client!"
319 #define OBD_FAIL_FID_LOOKUP 0x1505
320 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
321 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
323 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
325 run_test 1c "LFSCK can find out and repair lost FID-in-dirent"
330 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
331 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
332 touch $DIR/$tdir/dummy
334 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
336 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
337 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
338 mdd.${MDT_DEV}.lfsck_namespace |
339 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
341 error "(4) unexpected status"
344 local repaired=$($SHOW_NAMESPACE |
345 awk '/^linkea_repaired/ { print $2 }')
346 # for interop with old server
347 [ -z "$repaired" ] &&
348 repaired=$($SHOW_NAMESPACE |
349 awk '/^updated_phase2/ { print $2 }')
351 [ $repaired -eq 1 ] ||
352 error "(5) Fail to repair crashed linkEA: $repaired"
356 mount_client $MOUNT || error "(6) Fail to start client!"
358 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
359 error "(7) Fail to stat $DIR/$tdir/dummy"
361 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
362 local dummyname=$($LFS fid2path $DIR $dummyfid)
363 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
364 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
366 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
372 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
373 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
374 touch $DIR/$tdir/dummy
376 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
378 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
379 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
380 mdd.${MDT_DEV}.lfsck_namespace |
381 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
383 error "(4) unexpected status"
386 local repaired=$($SHOW_NAMESPACE |
387 awk '/^updated_phase2/ { print $2 }')
388 [ $repaired -eq 1 ] ||
389 error "(5) Fail to repair crashed linkEA: $repaired"
393 mount_client $MOUNT || error "(6) Fail to start client!"
395 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
396 error "(7) Fail to stat $DIR/$tdir/dummy"
398 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
399 local dummyname=$($LFS fid2path $DIR $dummyfid)
400 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
401 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
403 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
407 (( $MDS1_VERSION > $(version_code 2.4.90) )) ||
408 skip "MDS older than 2.4.90"
412 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
413 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
414 touch $DIR/$tdir/dummy
416 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
418 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
419 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
420 mdd.${MDT_DEV}.lfsck_namespace |
421 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
423 error "(4) unexpected status"
426 local repaired=$($SHOW_NAMESPACE |
427 awk '/^updated_phase2/ { print $2 }')
428 [ $repaired -eq 1 ] ||
429 error "(5) Fail to repair crashed linkEA: $repaired"
433 mount_client $MOUNT || error "(6) Fail to start client!"
435 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
436 error "(7) Fail to stat $DIR/$tdir/dummy"
438 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
439 local dummyname=$($LFS fid2path $DIR $dummyfid)
440 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
441 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
443 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
447 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
448 skip "MDS older than 2.6.50, LU-4788"
452 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
453 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
454 touch $DIR/$tdir/dummy
456 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
458 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
459 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
460 mdd.${MDT_DEV}.lfsck_namespace |
461 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
463 error "(4) unexpected status"
466 local repaired=$($SHOW_NAMESPACE |
467 awk '/^linkea_repaired/ { print $2 }')
468 [ $repaired -eq 1 ] ||
469 error "(5) Fail to repair crashed linkEA: $repaired"
473 mount_client $MOUNT || error "(6) Fail to start client!"
475 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
476 error "(7) Fail to stat $DIR/$tdir/dummy"
478 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
479 local dummyname=$($LFS fid2path $DIR $dummyfid)
480 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
481 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
483 run_test 2d "LFSCK can recover the missing linkEA entry"
487 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
488 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
489 skip "MDS older than 2.6.50, LU-5511"
493 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT1"
495 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
496 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
497 $LFS mkdir -i 0 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT0"
498 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
500 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
502 wait_all_targets_blocked namespace completed 4
504 local repaired=$($SHOW_NAMESPACE |
505 awk '/^linkea_repaired/ { print $2 }')
506 [ $repaired -eq 1 ] ||
507 error "(5) Fail to repair crashed linkEA: $repaired"
509 local fid=$($LFS path2fid $DIR/$tdir/d0/d1)
510 local name=$($LFS fid2path $DIR $fid)
511 [ "$name" == "$DIR/$tdir/d0/d1" ] ||
512 error "(6) Fail to repair linkEA: $fid $name"
514 run_test 2e "namespace LFSCK can verify remote object linkEA"
518 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
519 skip "MDS older than 2.6.50, LU-4788"
523 mkdir $DIR/$tdir/dummy || error "(1) Fail to mkdir"
524 ln $DIR/$tdir/d0/f0 $DIR/$tdir/dummy/f0 || error "(2) Fail to hardlink"
525 ln $DIR/$tdir/d0/f1 $DIR/$tdir/dummy/f1 || error "(3) Fail to hardlink"
527 $LFS mkdir -i 0 $DIR/$tdir/edir || error "(4) Fail to mkdir"
528 touch $DIR/$tdir/edir/f0 || error "(5) Fail to touch"
529 touch $DIR/$tdir/edir/f1 || error "(6) Fail to touch"
531 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
532 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
533 ln $DIR/$tdir/edir/f0 $DIR/$tdir/edir/w0 || error "(7) Fail to hardlink"
535 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
536 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
537 ln $DIR/$tdir/edir/f1 $DIR/$tdir/edir/w1 || error "(8) Fail to hardlink"
539 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
541 $START_NAMESPACE -r || error "(9) Fail to start LFSCK for namespace!"
542 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
543 mdd.${MDT_DEV}.lfsck_namespace |
544 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
546 error "(10) unexpected status"
549 local checked=$($SHOW_NAMESPACE |
550 awk '/^checked_phase2/ { print $2 }')
551 [ $checked -ge 4 ] ||
552 error "(11) Fail to check multiple-linked object: $checked"
554 local repaired=$($SHOW_NAMESPACE |
555 awk '/^multiple_linked_repaired/ { print $2 }')
556 [ $repaired -ge 2 ] ||
557 error "(12) Fail to repair multiple-linked object: $repaired"
559 run_test 3 "LFSCK can verify multiple-linked objects"
563 [ "$mds1_FSTYPE" != ldiskfs ] &&
564 skip "OI Scrub not implemented for ZFS"
567 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
568 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
570 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
571 echo "start $SINGLEMDS with disabling OI scrub"
572 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
573 error "(2) Fail to start MDS!"
575 #define OBD_FAIL_LFSCK_DELAY2 0x1601
576 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
577 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
578 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
579 mdd.${MDT_DEV}.lfsck_namespace |
580 awk '/^flags/ { print \\\$2 }'" "inconsistent" 32 || {
582 error "(5) unexpected status"
585 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
586 [ "$STATUS" == "scanning-phase1" ] ||
587 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
589 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
590 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
591 mdd.${MDT_DEV}.lfsck_namespace |
592 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
594 error "(7) unexpected status"
597 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
598 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
600 local repaired=$($SHOW_NAMESPACE |
601 awk '/^dirent_repaired/ { print $2 }')
602 # for interop with old server
603 [ -z "$repaired" ] &&
604 repaired=$($SHOW_NAMESPACE |
605 awk '/^updated_phase1/ { print $2 }')
607 [ $repaired -ge 9 ] ||
608 error "(9) Fail to re-generate FID-in-dirent: $repaired"
612 mount_client $MOUNT || error "(10) Fail to start client!"
614 #define OBD_FAIL_FID_LOOKUP 0x1505
615 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
616 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
617 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
619 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
623 [ "$mds1_FSTYPE" != ldiskfs ] &&
624 skip "OI Scrub not implemented for ZFS"
627 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
628 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
630 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
631 echo "start $SINGLEMDS with disabling OI scrub"
632 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
633 error "(2) Fail to start MDS!"
635 #define OBD_FAIL_LFSCK_DELAY2 0x1601
636 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
637 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
638 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
639 mdd.${MDT_DEV}.lfsck_namespace |
640 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 32 || {
642 error "(5) unexpected status"
645 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
646 [ "$STATUS" == "scanning-phase1" ] ||
647 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
649 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
650 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
651 mdd.${MDT_DEV}.lfsck_namespace |
652 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
654 error "(7) unexpected status"
657 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
658 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
660 local repaired=$($SHOW_NAMESPACE |
661 awk '/^dirent_repaired/ { print $2 }')
662 # for interop with old server
663 [ -z "$repaired" ] &&
664 repaired=$($SHOW_NAMESPACE |
665 awk '/^updated_phase1/ { print $2 }')
667 [ $repaired -ge 2 ] ||
668 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
672 mount_client $MOUNT || error "(10) Fail to start client!"
674 #define OBD_FAIL_FID_LOOKUP 0x1505
675 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
676 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
678 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
680 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
681 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
682 local dummyname=$($LFS fid2path $DIR $dummyfid)
683 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
684 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
686 run_test 5 "LFSCK can handle IGIF object upgrading"
691 #define OBD_FAIL_LFSCK_DELAY1 0x1600
692 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
693 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
695 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
696 [ "$STATUS" == "scanning-phase1" ] ||
697 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
699 # Sleep 3 sec to guarantee at least one object processed by LFSCK
701 # Fail the LFSCK to guarantee there is at least one checkpoint
702 #define OBD_FAIL_LFSCK_FATAL1 0x1608
703 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
704 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
705 mdd.${MDT_DEV}.lfsck_namespace |
706 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
708 error "(4) unexpected status"
711 local POS0=$($SHOW_NAMESPACE |
712 awk '/^last_checkpoint_position/ { print $2 }' |
715 #define OBD_FAIL_LFSCK_DELAY1 0x1600
716 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
717 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
719 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
720 [ "$STATUS" == "scanning-phase1" ] ||
721 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
723 local POS1=$($SHOW_NAMESPACE |
724 awk '/^latest_start_position/ { print $2 }' |
726 [[ $POS0 -lt $POS1 ]] ||
727 error "(7) Expect larger than: $POS0, but got $POS1"
729 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
730 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
731 mdd.${MDT_DEV}.lfsck_namespace |
732 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
734 error "(8) unexpected status"
737 run_test 6a "LFSCK resumes from last checkpoint (1)"
742 #define OBD_FAIL_LFSCK_DELAY2 0x1601
743 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
744 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
746 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
747 [ "$STATUS" == "scanning-phase1" ] ||
748 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
750 # Sleep 5 sec to guarantee that we are in the directory scanning
752 # Fail the LFSCK to guarantee there is at least one checkpoint
753 #define OBD_FAIL_LFSCK_FATAL2 0x1609
754 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
755 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
756 mdd.${MDT_DEV}.lfsck_namespace |
757 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
759 error "(4) unexpected status"
762 local O_POS0=$($SHOW_NAMESPACE |
763 awk '/^last_checkpoint_position/ { print $2 }' |
766 local D_POS0=$($SHOW_NAMESPACE |
767 awk '/^last_checkpoint_position/ { print $4 }')
769 #define OBD_FAIL_LFSCK_DELAY2 0x1601
770 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
771 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
773 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
774 [ "$STATUS" == "scanning-phase1" ] ||
775 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
777 local O_POS1=$($SHOW_NAMESPACE |
778 awk '/^latest_start_position/ { print $2 }' |
780 local D_POS1=$($SHOW_NAMESPACE |
781 awk '/^latest_start_position/ { print $4 }')
783 echo "Additional debug for 6b"
785 if [ "$D_POS0" == "N/A" -o "$D_POS0" == "0x0" \
786 -o "$D_POS1" == "0x0" -o "$D_POS1" == "N/A" ]; then
787 [[ $O_POS0 -lt $O_POS1 ]] ||
788 error "(7.1) $O_POS1 is not larger than $O_POS0"
790 [[ $D_POS0 -lt $D_POS1 ]] ||
791 error "(7.2) $D_POS1 is not larger than $D_POS0"
794 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
795 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
796 mdd.${MDT_DEV}.lfsck_namespace |
797 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
799 error "(8) unexpected status"
802 run_test 6b "LFSCK resumes from last checkpoint (2)"
809 #define OBD_FAIL_LFSCK_DELAY2 0x1601
810 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
811 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
813 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
814 [ "$STATUS" == "scanning-phase1" ] ||
815 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
817 # Sleep 3 sec to guarantee at least one object processed by LFSCK
819 echo "stop $SINGLEMDS"
820 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
822 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
823 echo "start $SINGLEMDS"
824 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
825 error "(5) Fail to start MDS!"
827 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
828 mdd.${MDT_DEV}.lfsck_namespace |
829 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
831 error "(6) unexpected status"
834 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
840 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
841 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
842 for ((i = 0; i < 20; i++)); do
843 touch $DIR/$tdir/dummy${i}
846 #define OBD_FAIL_LFSCK_DELAY3 0x1602
847 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
848 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
849 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
850 mdd.${MDT_DEV}.lfsck_namespace |
851 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
853 error "(4) unexpected status"
857 echo "stop $SINGLEMDS"
858 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
860 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
861 echo "start $SINGLEMDS"
862 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
863 error "(6) Fail to start MDS!"
865 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
866 mdd.${MDT_DEV}.lfsck_namespace |
867 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
869 error "(7) unexpected status"
872 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
877 formatall > /dev/null
883 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
884 [ "$STATUS" == "init" ] ||
885 error "(2) Expect 'init', but got '$STATUS'"
887 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
888 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
889 mkdir $DIR/$tdir/crashed
891 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
892 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
893 for ((i = 0; i < 5; i++)); do
894 touch $DIR/$tdir/dummy${i}
897 umount_client $MOUNT || error "(3) Fail to stop client!"
899 #define OBD_FAIL_LFSCK_DELAY2 0x1601
900 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
901 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
903 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
904 [ "$STATUS" == "scanning-phase1" ] ||
905 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
907 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
909 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
910 [ "$STATUS" == "stopped" ] ||
911 error "(7) Expect 'stopped', but got '$STATUS'"
913 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
915 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
916 [ "$STATUS" == "scanning-phase1" ] ||
917 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
919 #define OBD_FAIL_LFSCK_FATAL2 0x1609
920 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
921 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
922 mdd.${MDT_DEV}.lfsck_namespace |
923 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
925 error "(10) unexpected status"
928 #define OBD_FAIL_LFSCK_DELAY1 0x1600
929 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
930 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
932 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
933 [ "$STATUS" == "scanning-phase1" ] ||
934 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
936 #define OBD_FAIL_LFSCK_CRASH 0x160a
937 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
940 echo "stop $SINGLEMDS"
941 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
943 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
944 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
946 echo "start $SINGLEMDS"
947 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
948 error "(14) Fail to start MDS!"
950 local timeout=$(max_recovery_time)
953 while [ $timer -lt $timeout ]; do
954 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
955 mdt.${MDT_DEV}.recovery_status |
956 awk '/^status/ { print \\\$2 }'")
957 [ "$STATUS" != "RECOVERING" ] && break;
962 [ $timer != $timeout ] ||
963 error "(14.1) recovery timeout"
965 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
966 [ "$STATUS" == "crashed" ] ||
967 error "(15) Expect 'crashed', but got '$STATUS'"
969 #define OBD_FAIL_LFSCK_DELAY2 0x1601
970 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
971 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
973 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
974 [ "$STATUS" == "scanning-phase1" ] ||
975 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
977 echo "stop $SINGLEMDS"
978 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
980 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
981 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
983 echo "start $SINGLEMDS"
984 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
985 error "(19) Fail to start MDS!"
988 while [ $timer -lt $timeout ]; do
989 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
990 mdt.${MDT_DEV}.recovery_status |
991 awk '/^status/ { print \\\$2 }'")
992 [ "$STATUS" != "RECOVERING" ] && break;
997 [ $timer != $timeout ] ||
998 error "(19.1) recovery timeout"
1000 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1001 [ "$STATUS" == "paused" ] ||
1002 error "(20) Expect 'paused', but got '$STATUS'"
1004 echo "stop $SINGLEMDS"
1005 stop $SINGLEMDS > /dev/null || error "(20.1) Fail to stop MDS!"
1007 echo "start $SINGLEMDS without resume LFSCK"
1008 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SKIP_LFSCK > /dev/null ||
1009 error "(20.2) Fail to start MDS!"
1012 while [ $timer -lt $timeout ]; do
1013 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
1014 mdt.${MDT_DEV}.recovery_status |
1015 awk '/^status/ { print \\\$2 }'")
1016 [ "$STATUS" != "RECOVERING" ] && break;
1018 timer=$((timer + 1))
1021 [ $timer != $timeout ] ||
1022 error "(20.3) recovery timeout"
1024 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1025 [ "$STATUS" == "paused" ] ||
1026 error "(20.4) Expect 'paused', but got '$STATUS'"
1028 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1029 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
1031 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
1032 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1033 mdd.${MDT_DEV}.lfsck_namespace |
1034 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
1036 error "(22) unexpected status"
1039 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
1040 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
1041 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
1043 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1044 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1045 mdd.${MDT_DEV}.lfsck_namespace |
1046 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1048 error "(24) unexpected status"
1051 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
1052 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
1054 run_test 8 "LFSCK state machine"
1057 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1058 skip "Testing on UP system, the speed may be inaccurate."
1062 check_mount_and_prep
1063 $LFS mkdir -i 0 $DIR/$tdir/lfsck || error "(1) Fail to mkdir lfsck"
1064 $LFS setstripe -c 1 -i -1 $DIR/$tdir/lfsck
1065 createmany -o $DIR/$tdir/lfsck/f 5000
1067 local BASE_SPEED1=100
1069 $START_LAYOUT -r -s $BASE_SPEED1 || error "(2) Fail to start LFSCK!"
1072 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
1073 [ "$STATUS" == "scanning-phase1" ] ||
1074 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
1076 local SPEED=$($SHOW_LAYOUT |
1077 awk '/^average_speed_phase1/ { print $2 }')
1079 # There may be time error, normally it should be less than 2 seconds.
1080 # We allow another 20% schedule error.
1082 # MAX_MARGIN = 1.3 = 13 / 10
1083 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1084 RUN_TIME1 * 13 / 10))
1085 [ $SPEED -lt $MAX_SPEED ] || {
1087 log "speed1: $BASE_SPEED1 time1: $RUN_TIME1"
1088 error "(4) Speed $SPEED, expected < $MAX_SPEED"
1091 # adjust speed limit
1092 local BASE_SPEED2=300
1094 do_facet $SINGLEMDS \
1095 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1098 SPEED=$($SHOW_LAYOUT | awk '/^average_speed_phase1/ { print $2 }')
1099 # MIN_MARGIN = 0.7 = 7 / 10
1100 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1101 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1102 (RUN_TIME1 + RUN_TIME2) * 7 / 10))
1103 [ $SPEED -gt $MIN_SPEED ] || {
1104 if [ $mds1_FSTYPE != ldiskfs ]; then
1105 error_ignore LU-5624 \
1106 "(5.1) Got speed $SPEED, expected more than $MIN_SPEED"
1109 "(5.2) Got speed $SPEED, expected more than $MIN_SPEED"
1113 # MAX_MARGIN = 1.3 = 13 / 10
1114 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1115 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1116 (RUN_TIME1 + RUN_TIME2) * 13 / 10))
1117 [ $SPEED -lt $MAX_SPEED ] || {
1119 log "speed1: $BASE_SPEED1 time1: $RUN_TIME1"
1120 log "speed2: $BASE_SPEED2 time2: $RUN_TIME2"
1121 error "(6) Speed $SPEED, expected < $MAX_SPEED"
1124 do_nodes $(comma_list $(mdts_nodes)) \
1125 $LCTL set_param -n mdd.*.lfsck_speed_limit 0
1126 do_nodes $(comma_list $(osts_nodes)) \
1127 $LCTL set_param -n obdfilter.*.lfsck_speed_limit 0
1129 wait_update_facet $SINGLEMDS \
1130 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout |
1131 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
1132 error "(7) Failed to get expected 'completed'"
1134 run_test 9a "LFSCK speed control (1)"
1137 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1138 skip "Testing on UP system, the speed may be inaccurate."
1144 echo "Preparing another 50 * 50 files (with error) at $(date)."
1145 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1146 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1147 createmany -d $DIR/$tdir/d 50
1148 createmany -m $DIR/$tdir/f 50
1149 for ((i = 0; i < 50; i++)); do
1150 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
1153 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
1154 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
1155 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
1156 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1157 mdd.${MDT_DEV}.lfsck_namespace |
1158 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
1160 error "(5) unexpected status"
1163 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1164 echo "Prepared at $(date)."
1166 local BASE_SPEED1=50
1168 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
1171 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1172 [ "$STATUS" == "scanning-phase2" ] ||
1173 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
1175 local SPEED=$($SHOW_NAMESPACE |
1176 awk '/^average_speed_phase2/ { print $2 }')
1177 # There may be time error, normally it should be less than 2 seconds.
1178 # We allow another 20% schedule error.
1180 # MAX_MARGIN = 1.3 = 13 / 10
1181 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1182 RUN_TIME1 * 13 / 10))
1183 [ $SPEED -lt $MAX_SPEED ] || {
1185 log "speed1: $BASE_SPEED1 time1: $RUN_TIME1"
1186 error "(8) Speed $SPEED, expected < $MAX_SPEED"
1189 # adjust speed limit
1190 local BASE_SPEED2=150
1192 do_facet $SINGLEMDS \
1193 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1196 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
1197 # MIN_MARGIN = 0.7 = 7 / 10
1198 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1199 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1200 (RUN_TIME1 + RUN_TIME2) * 7 / 10))
1201 [ $SPEED -gt $MIN_SPEED ] || {
1202 if [ $mds1_FSTYPE != ldiskfs ]; then
1203 error_ignore LU-5624 \
1204 "(9.1) Got speed $SPEED, expected more than $MIN_SPEED"
1207 "(9.2) Got speed $SPEED, expected more than $MIN_SPEED"
1211 # MAX_MARGIN = 1.3 = 13 / 10
1212 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1213 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1214 (RUN_TIME1 + RUN_TIME2) * 13 / 10))
1215 [ $SPEED -lt $MAX_SPEED ] || {
1217 log "speed1: $BASE_SPEED1 time1: $RUN_TIME1"
1218 log "speed2: $BASE_SPEED2 time2: $RUN_TIME2"
1219 error "(10) Speed $SPEED, expected < $MAX_SPEED"
1222 do_nodes $(comma_list $(mdts_nodes)) \
1223 $LCTL set_param -n mdd.*.lfsck_speed_limit 0
1224 do_nodes $(comma_list $(osts_nodes)) \
1225 $LCTL set_param -n obdfilter.*.lfsck_speed_limit 0
1226 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1227 mdd.${MDT_DEV}.lfsck_namespace |
1228 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1230 error "(11) unexpected status"
1233 run_test 9b "LFSCK speed control (2)"
1237 [[ $mds1_FSTYPE == ldiskfs ]] || skip "lookup(..)/linkea on ZFS issue"
1241 echo "Preparing more files with error at $(date)."
1242 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
1243 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
1245 for ((i = 0; i < 1000; i = $((i+2)))); do
1246 mkdir -p $DIR/$tdir/d${i}
1247 touch $DIR/$tdir/f${i}
1248 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1251 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1252 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1254 for ((i = 1; i < 1000; i = $((i+2)))); do
1255 mkdir -p $DIR/$tdir/d${i}
1256 touch $DIR/$tdir/f${i}
1257 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1260 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1261 echo "Prepared at $(date)."
1263 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
1265 umount_client $MOUNT
1266 mount_client $MOUNT || error "(3) Fail to start client!"
1268 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
1271 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1272 [ "$STATUS" == "scanning-phase1" ] ||
1273 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
1275 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
1277 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
1279 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
1281 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
1283 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
1285 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
1287 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
1289 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1290 error "(14) Fail to softlink!"
1292 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1293 [ "$STATUS" == "scanning-phase1" ] ||
1294 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1296 do_nodes $(comma_list $(mdts_nodes)) \
1297 $LCTL set_param -n mdd.*.lfsck_speed_limit 0
1298 do_nodes $(comma_list $(osts_nodes)) \
1299 $LCTL set_param -n obdfilter.*.lfsck_speed_limit 0
1300 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1301 mdd.${MDT_DEV}.lfsck_namespace |
1302 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1304 error "(16) unexpected status"
1307 run_test 10 "System is available during LFSCK scanning"
1310 ost_remove_lastid() {
1313 local rcmd="do_facet ost${ost}"
1315 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1317 # step 1: local mount
1318 mount_fstype ost${ost} || return 1
1319 # step 2: remove the specified LAST_ID
1320 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1322 unmount_fstype ost${ost} || return 2
1326 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1327 skip "MDS older than 2.5.55, LU-1267"
1329 check_mount_and_prep
1330 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1331 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1336 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1338 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1339 error "(2) Fail to start ost1"
1341 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1342 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1344 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1345 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1347 wait_update_facet ost1 "$LCTL get_param -n \
1348 obdfilter.${OST_DEV}.lfsck_layout |
1349 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1351 error "(5) unexpected status"
1354 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1356 wait_update_facet ost1 "$LCTL get_param -n \
1357 obdfilter.${OST_DEV}.lfsck_layout |
1358 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1360 error "(6) unexpected status"
1363 echo "the LAST_ID(s) should have been rebuilt"
1364 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1365 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1367 run_test 11a "LFSCK can rebuild lost last_id"
1370 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1371 skip "MDS older than 2.5.55, LU-1267"
1373 check_mount_and_prep
1374 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1376 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1377 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1378 do_facet ost1 $LCTL set_param fail_loc=0x160d
1380 local count=$(precreated_ost_obj_count 0 0)
1382 createmany -o $DIR/$tdir/f $((count + 32))
1384 local proc_path="${FSNAME}-OST0000-osc-MDT0000"
1385 local seq=$(do_facet mds1 $LCTL get_param -n \
1386 osp.${proc_path}.prealloc_last_seq)
1387 local id_used=$(do_facet mds1 $LCTL get_param -n \
1388 osp.${proc_path}.prealloc_last_id)
1390 umount_client $MOUNT
1391 stop ost1 || error "(1) Fail to stop ost1"
1393 #define OBD_FAIL_OST_ENOSPC 0x215
1394 do_facet ost1 $LCTL set_param fail_loc=0x215
1396 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1397 error "(2) Fail to start ost1"
1399 for ((i = 0; i < 60; i++)); do
1400 id_ost1=$(do_facet ost1 \
1401 "$LCTL get_param -n obdfilter.$ost1_svc.last_id" |
1402 awk -F: "/$seq/ { print \$2 }")
1403 [ -n "$id_ost1" ] && break
1407 echo "the on-disk LAST_ID should be smaller than the expected one"
1408 [ $id_used -gt $id_ost1 ] ||
1409 error "(4) expect id_used '$id_used' > id_ost1 '$id_ost1'"
1411 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1412 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1414 wait_update_facet ost1 \
1415 "$LCTL get_param -n obdfilter.$ost1_svc.lfsck_layout |
1416 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1418 error "(6) unexpected status"
1421 stop ost1 || error "(7) Fail to stop ost1"
1423 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1424 error "(8) Fail to start ost1"
1426 echo "the on-disk LAST_ID should have been rebuilt"
1427 # last_id may be larger than $id_used if objects were created/skipped
1428 wait_update_facet_cond ost1 \
1429 "$LCTL get_param -n obdfilter.$ost1_svc.last_id |
1430 awk -F: '/$seq/ { print \\\$2 }'" "-ge" "$id_used" 60 || {
1431 do_facet ost1 $LCTL get_param obdfilter.$ost1_svc.last_id
1432 error "(9) expect last_id >= id_used $seq:$id_used"
1435 do_facet ost1 $LCTL set_param fail_loc=0
1436 stopall || error "(10) Fail to stopall"
1438 run_test 11b "LFSCK can rebuild crashed last_id"
1441 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
1442 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1443 skip "MDS older than 2.5.55, LU-3950"
1445 check_mount_and_prep
1446 for k in $(seq $MDSCOUNT); do
1447 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1448 createmany -o $DIR/$tdir/${k}/f 100 ||
1449 error "(0) Fail to create 100 files."
1452 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1453 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1454 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1456 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1457 wait_all_targets namespace scanning-phase1 3
1459 echo "Stop namespace LFSCK on all targets by single lctl command."
1460 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1461 error "(4) Fail to stop LFSCK on all devices!"
1463 echo "All the LFSCK targets should be in 'stopped' status."
1464 wait_all_targets_blocked namespace stopped 5
1466 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1467 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1468 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1470 echo "All the LFSCK targets should be in 'completed' status."
1471 wait_all_targets_blocked namespace completed 7
1473 start_full_debug_logging
1475 echo "Start layout LFSCK on all targets by single command (-s 1)."
1476 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1477 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1479 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1480 wait_all_targets layout scanning-phase1 9
1482 echo "Stop layout LFSCK on all targets by single lctl command."
1483 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1484 error "(10) Fail to stop LFSCK on all devices!"
1486 echo "All the LFSCK targets should be in 'stopped' status."
1487 wait_all_targets_blocked layout stopped 11
1489 for k in $(seq $OSTCOUNT); do
1490 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1491 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1492 awk '/^status/ { print $2 }')
1493 [ "$STATUS" == "stopped" ] ||
1494 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1497 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1498 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1499 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1501 echo "All the LFSCK targets should be in 'completed' status."
1502 wait_all_targets_blocked layout completed 14
1504 stop_full_debug_logging
1506 run_test 12a "single command to trigger LFSCK on all devices"
1509 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1510 skip "MDS older than 2.5.55, LU-3950"
1512 check_mount_and_prep
1514 echo "Start LFSCK without '-M' specified."
1515 do_facet mds1 $LCTL lfsck_start -A -r ||
1516 error "(0) Fail to start LFSCK without '-M'"
1518 wait_all_targets_blocked namespace completed 1
1519 wait_all_targets_blocked layout completed 2
1521 local count=$(do_facet mds1 $LCTL dl |
1522 awk '{ print $3 }' | grep mdt | wc -l)
1523 if [ $count -gt 1 ]; then
1525 echo "Start layout LFSCK on the node with multipe targets,"
1526 echo "but not specify '-M'/'-A' option. Should get failure."
1528 do_facet mds1 $LCTL lfsck_start -t layout -r &&
1529 error "(3) Start layout LFSCK should fail" || true
1532 run_test 12b "auto detect Lustre device"
1535 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1536 skip "MDS older than 2.5.55, LU-3593"
1539 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1540 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1541 echo "MDT-object FID."
1544 check_mount_and_prep
1546 echo "Inject failure stub to simulate bad lmm_oi"
1547 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1548 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1549 createmany -o $DIR/$tdir/f 1
1550 $LFS setstripe -E 1M -S 1M -E -1 $DIR/$tdir/f1 ||
1551 error "(0) Fail to create PFL $DIR/$tdir/f1"
1552 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1554 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1555 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1557 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1558 mdd.${MDT_DEV}.lfsck_layout |
1559 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1561 error "(2) unexpected status"
1564 local repaired=$($SHOW_LAYOUT |
1565 awk '/^repaired_others/ { print $2 }')
1566 [ $repaired -eq 2 ] ||
1567 error "(3) Fail to repair crashed lmm_oi: $repaired"
1569 run_test 13 "LFSCK can repair crashed lmm_oi"
1572 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1573 skip "MDS older than 2.5.55, LU-3590"
1576 echo "The OST-object referenced by the MDT-object should be there;"
1577 echo "otherwise, the LFSCK should re-create the missing OST-object."
1578 echo "without '--delay-create-ostobj' option."
1581 check_mount_and_prep
1582 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1584 echo "Inject failure stub to simulate dangling referenced MDT-object"
1585 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1586 do_facet ost1 $LCTL set_param fail_loc=0x1610
1587 local count=$(precreated_ost_obj_count 0 0)
1589 createmany -o $DIR/$tdir/f $((count + 16)) ||
1590 error "(0.1) Fail to create $DIR/$tdir/fx"
1591 touch $DIR/$tdir/guard0
1593 for ((i = 0; i < 16; i++)); do
1594 $LFS setstripe -E 512K -S 256K -o 0 -E 2M \
1595 $DIR/$tdir/f_comp${i} ||
1596 error "(0.2) Fail to create $DIR/$tdir/f_comp${i}"
1598 touch $DIR/$tdir/guard1
1600 do_facet ost1 $LCTL set_param fail_loc=0
1602 start_full_debug_logging
1604 # exhaust other pre-created dangling cases
1605 count=$(precreated_ost_obj_count 0 0)
1606 createmany -o $DIR/$tdir/a $count ||
1607 error "(0.5) Fail to create $count files."
1609 echo "'ls' should fail because of dangling referenced MDT-object"
1610 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1612 echo "Trigger layout LFSCK to find out dangling reference"
1613 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1615 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1616 mdd.${MDT_DEV}.lfsck_layout |
1617 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1619 error "(3) unexpected status"
1622 local repaired=$($SHOW_LAYOUT |
1623 awk '/^repaired_dangling/ { print $2 }')
1624 [ $repaired -ge 32 ] ||
1625 error "(4) Fail to repair dangling reference: $repaired"
1627 echo "'stat' should fail because of not repair dangling by default"
1628 stat $DIR/$tdir/guard0 > /dev/null 2>&1 &&
1629 error "(5.1) stat should fail"
1630 stat $DIR/$tdir/guard1 > /dev/null 2>&1 &&
1631 error "(5.2) stat should fail"
1633 echo "Trigger layout LFSCK to repair dangling reference"
1634 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1636 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1637 mdd.${MDT_DEV}.lfsck_layout |
1638 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1640 error "(7) unexpected status"
1643 # There may be some async LFSCK updates in processing, wait for
1644 # a while until the target reparation has been done. LU-4970.
1646 echo "'stat' should success after layout LFSCK repairing"
1647 wait_update_facet client "stat $DIR/$tdir/guard0 |
1648 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1649 stat $DIR/$tdir/guard0
1651 error "(8.1) unexpected size"
1654 wait_update_facet client "stat $DIR/$tdir/guard1 |
1655 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1656 stat $DIR/$tdir/guard1
1658 error "(8.2) unexpected size"
1661 repaired=$($SHOW_LAYOUT |
1662 awk '/^repaired_dangling/ { print $2 }')
1663 [ $repaired -ge 32 ] ||
1664 error "(9) Fail to repair dangling reference: $repaired"
1666 stop_full_debug_logging
1668 echo "stopall to cleanup object cache"
1671 setupall > /dev/null
1673 run_test 14a "LFSCK can repair MDT-object with dangling LOV EA reference (1)"
1676 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1677 skip "MDS older than 2.5.55, LU-3590"
1680 echo "The OST-object referenced by the MDT-object should be there;"
1681 echo "otherwise, the LFSCK should re-create the missing OST-object."
1682 echo "with '--delay-create-ostobj' option."
1685 check_mount_and_prep
1686 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1688 echo "Inject failure stub to simulate dangling referenced MDT-object"
1689 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1690 do_facet ost1 $LCTL set_param fail_loc=0x1610
1691 local count=$(precreated_ost_obj_count 0 0)
1693 createmany -o $DIR/$tdir/f $((count + 31))
1694 touch $DIR/$tdir/guard
1695 do_facet ost1 $LCTL set_param fail_loc=0
1697 start_full_debug_logging
1699 # exhaust other pre-created dangling cases
1700 count=$(precreated_ost_obj_count 0 0)
1701 createmany -o $DIR/$tdir/a $count ||
1702 error "(0) Fail to create $count files."
1704 echo "'ls' should fail because of dangling referenced MDT-object"
1705 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1707 echo "Trigger layout LFSCK to find out dangling reference"
1708 $START_LAYOUT -r -o -d || error "(2) Fail to start LFSCK for layout!"
1710 wait_all_targets_blocked layout completed 3
1712 local repaired=$($SHOW_LAYOUT |
1713 awk '/^repaired_dangling/ { print $2 }')
1714 [ $repaired -ge 32 ] ||
1715 error "(4) Fail to repair dangling reference: $repaired"
1717 echo "'stat' should fail because of not repair dangling by default"
1718 stat $DIR/$tdir/guard > /dev/null 2>&1 && error "(5) stat should fail"
1720 echo "Trigger layout LFSCK to repair dangling reference"
1721 $START_LAYOUT -r -o -c -d || error "(6) Fail to start LFSCK for layout!"
1723 wait_all_targets_blocked layout completed 7
1725 # There may be some async LFSCK updates in processing, wait for
1726 # a while until the target reparation has been done. LU-4970.
1728 echo "'stat' should success after layout LFSCK repairing"
1729 wait_update_facet client "stat $DIR/$tdir/guard |
1730 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1731 stat $DIR/$tdir/guard
1733 error "(8) unexpected size"
1736 repaired=$($SHOW_LAYOUT |
1737 awk '/^repaired_dangling/ { print $2 }')
1738 [ $repaired -ge 32 ] ||
1739 error "(9) Fail to repair dangling reference: $repaired"
1741 stop_full_debug_logging
1743 echo "stopall to cleanup object cache"
1746 setupall > /dev/null
1748 run_test 14b "LFSCK can repair MDT-object with dangling LOV EA reference (2)"
1751 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1752 skip "MDS older than 2.5.55, LU-3591"
1755 echo "If the OST-object referenced by the MDT-object back points"
1756 echo "to some non-exist MDT-object, then the LFSCK should repair"
1757 echo "the OST-object to back point to the right MDT-object."
1760 check_mount_and_prep
1761 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1763 echo "Inject failure stub to make the OST-object to back point to"
1764 echo "non-exist MDT-object."
1765 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1767 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1611
1768 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1769 $LFS setstripe -E 1M -S 256K -c 1 -E -1 -S 512K -c $OSTCOUNT \
1771 error "(0) Fail to create PFL $DIR/$tdir/f1"
1772 # 'dd' will trigger punch RPC firstly on every OST-objects.
1773 # So even though some OST-object will not be write by 'dd',
1774 # as long as it is allocated (may be NOT allocated in pfl_3b)
1775 # its layout information will be set also.
1776 dd if=/dev/zero of=$DIR/$tdir/f1 bs=4K count=257
1777 cancel_lru_locks osc
1778 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
1780 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1781 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1783 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1784 mdd.${MDT_DEV}.lfsck_layout |
1785 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1787 error "(2) unexpected status"
1790 local repaired=$($SHOW_LAYOUT |
1791 awk '/^repaired_unmatched_pair/ { print $2 }')
1792 [ $repaired -ge 3 ] ||
1793 error "(3) Fail to repair unmatched pair: $repaired"
1795 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1798 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1799 skip "MDS older than 2.5.55, LU-3591"
1802 echo "If the OST-object referenced by the MDT-object back points"
1803 echo "to other MDT-object that doesn't recognize the OST-object,"
1804 echo "then the LFSCK should repair it to back point to the right"
1805 echo "MDT-object (the first one)."
1808 check_mount_and_prep
1809 mkdir -p $DIR/$tdir/0
1810 $LFS setstripe -c 1 -i 0 $DIR/$tdir/0
1811 dd if=/dev/zero of=$DIR/$tdir/0/guard bs=1M count=1
1812 cancel_lru_locks osc
1814 echo "Inject failure stub to make the OST-object to back point to"
1815 echo "other MDT-object"
1818 [ $OSTCOUNT -ge 2 ] && stripes=2
1820 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1821 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1612
1822 dd if=/dev/zero of=$DIR/$tdir/0/f0 bs=1M count=1
1823 $LFS setstripe -E 1M -S 256K -c $stripes -E 2M -S 512K -c 1 \
1825 error "(0) Fail to create PFL $DIR/$tdir/f1"
1826 dd if=/dev/zero of=$DIR/$tdir/f1 bs=1M count=2
1827 cancel_lru_locks osc
1828 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
1830 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1831 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1833 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1834 mdd.${MDT_DEV}.lfsck_layout |
1835 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1837 error "(2) unexpected status"
1840 local repaired=$($SHOW_LAYOUT |
1841 awk '/^repaired_unmatched_pair/ { print $2 }')
1842 [ $repaired -eq 4 ] ||
1843 error "(3) Fail to repair unmatched pair: $repaired"
1845 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1848 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
1849 (( $MDS1_VERSION < $(version_code 2.7.55) )) ||
1850 skip "MDS newer than 2.7.55, LU-6475"
1851 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1852 skip "MDS older than 2.5.55, LU-3591"
1855 echo "According to current metadata migration implementation,"
1856 echo "before the old MDT-object is removed, both the new MDT-object"
1857 echo "and old MDT-object will reference the same LOV layout. Then if"
1858 echo "the layout LFSCK finds the new MDT-object by race, it will"
1859 echo "regard related OST-object(s) as multiple referenced case, and"
1860 echo "will try to create new OST-object(s) for the new MDT-object."
1861 echo "To avoid such trouble, the layout LFSCK needs to lock the old"
1862 echo "MDT-object before confirm the multiple referenced case."
1865 check_mount_and_prep
1866 $LFS mkdir -i 1 $DIR/$tdir/a1
1867 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
1868 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=1
1869 cancel_lru_locks osc
1871 echo "Inject failure stub on MDT1 to delay the migration"
1873 #define OBD_FAIL_MIGRATE_DELAY 0x1803
1874 do_facet mds2 $LCTL set_param fail_val=5 fail_loc=0x1803
1875 echo "Migrate $DIR/$tdir/a1 from MDT1 to MDT0 with delay"
1876 $LFS migrate -m 0 $DIR/$tdir/a1 &
1879 echo "Trigger layout LFSCK to race with the migration"
1880 $START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!"
1882 wait_all_targets_blocked layout completed 2
1884 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
1885 local repaired=$($SHOW_LAYOUT |
1886 awk '/^repaired_unmatched_pair/ { print $2 }')
1887 [ $repaired -eq 1 ] ||
1888 error "(3) Fail to repair unmatched pair: $repaired"
1890 repaired=$($SHOW_LAYOUT |
1891 awk '/^repaired_multiple_referenced/ { print $2 }')
1892 [ $repaired -eq 0 ] ||
1893 error "(4) Unexpectedly repaird multiple references: $repaired"
1895 run_test 15c "LFSCK can repair unmatched MDT-object/OST-object pairs (3)"
1898 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1899 skip "MDS older than 2.5.55, LU-3594"
1902 echo "If the OST-object's owner information does not match the owner"
1903 echo "information stored in the MDT-object, then the LFSCK trust the"
1904 echo "MDT-object and update the OST-object's owner information."
1907 check_mount_and_prep
1908 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1909 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1910 cancel_lru_locks osc
1912 # created but no setattr or write to the file.
1914 chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/d1
1915 $RUNAS createmany -o $DIR/$tdir/d1/o 100 || error "create failed"
1917 echo "Inject failure stub to skip OST-object owner changing"
1918 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1919 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1920 chown 1.1 $DIR/$tdir/f0
1921 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1923 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1926 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1928 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1929 mdd.${MDT_DEV}.lfsck_layout |
1930 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1932 error "(2) unexpected status"
1935 local repaired=$($SHOW_LAYOUT |
1936 awk '/^repaired_inconsistent_owner/ { print $2 }')
1937 [ $repaired -eq 1 ] ||
1938 error "(3) Fail to repair inconsistent owner: $repaired"
1940 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1943 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
1944 skip "MDS older than 2.5.55, LU-3594"
1947 echo "If more than one MDT-objects reference the same OST-object,"
1948 echo "and the OST-object only recognizes one MDT-object, then the"
1949 echo "LFSCK should create new OST-objects for such non-recognized"
1953 check_mount_and_prep
1954 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1956 echo "Inject failure stub to make two MDT-objects to refernce"
1957 echo "the OST-object"
1959 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1960 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1961 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1962 cancel_lru_locks mdc
1963 cancel_lru_locks osc
1965 createmany -o $DIR/$tdir/f 1
1966 cancel_lru_locks mdc
1967 cancel_lru_locks osc
1969 $LFS setstripe -E 2M -S 256K -o 0 -E 4M -S 512K -o 0 \
1971 error "(0) Fail to create PFL $DIR/$tdir/f1"
1972 cancel_lru_locks mdc
1973 cancel_lru_locks osc
1974 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1976 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1977 echo "$DIR/$tdir/f1 and $DIR/$tdir/guard use the same OST-objects"
1978 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1979 [ $size -eq 1048576 ] ||
1980 error "(1.1) f0 (wrong) size should be 1048576, but got $size"
1982 size=$(ls -l $DIR/$tdir/f1 | awk '{ print $5 }')
1983 [ $size -eq 1048576 ] ||
1984 error "(1.2) f1 (wrong) size should be 1048576, but got $size"
1986 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1989 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1991 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1992 mdd.${MDT_DEV}.lfsck_layout |
1993 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1995 error "(3) unexpected status"
1998 local repaired=$($SHOW_LAYOUT |
1999 awk '/^repaired_multiple_referenced/ { print $2 }')
2000 [ $repaired -eq 2 ] ||
2001 error "(4) Fail to repair multiple references: $repaired"
2003 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
2004 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
2005 error "(5) Fail to write f0."
2006 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
2007 [ $size -eq 1048576 ] ||
2008 error "(6) guard size should be 1048576, but got $size"
2010 echo "$DIR/$tdir/f1 and $DIR/$tdir/guard should use diff OST-objects"
2011 dd if=/dev/zero of=$DIR/$tdir/f1 bs=1M count=2 ||
2012 error "(7) Fail to write f1."
2013 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
2014 [ $size -eq 1048576 ] ||
2015 error "(8) guard size should be 1048576, but got $size"
2017 run_test 17 "LFSCK can repair multiple references"
2019 $LCTL set_param debug=+cache > /dev/null
2022 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2023 skip "MDS older than 2.5.55, LU-3336"
2026 echo "The target MDT-object is there, but related stripe information"
2027 echo "is lost or partly lost. The LFSCK should regenerate the missing"
2028 echo "layout EA entries."
2031 check_mount_and_prep
2032 $LFS mkdir -i 0 $DIR/$tdir/a1
2033 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2034 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2036 local saved_size1=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
2038 $LFS path2fid $DIR/$tdir/a1/f1
2039 $LFS getstripe $DIR/$tdir/a1/f1
2041 if [ $MDSCOUNT -ge 2 ]; then
2042 $LFS mkdir -i 1 $DIR/$tdir/a2
2043 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
2044 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2045 $LFS path2fid $DIR/$tdir/a2/f2
2046 $LFS getstripe $DIR/$tdir/a2/f2
2049 $LFS setstripe -E 1M -S 1M -o 0 -E -1 -S 1M $DIR/$tdir/f3 ||
2050 error "(0) Fail to create PFL $DIR/$tdir/f3"
2052 dd if=/dev/zero of=$DIR/$tdir/f3 bs=1M count=2
2054 local saved_size2=$(ls -il $DIR/$tdir/f3 | awk '{ print $6 }')
2056 $LFS path2fid $DIR/$tdir/f3
2057 $LFS getstripe $DIR/$tdir/f3
2059 cancel_lru_locks osc
2061 echo "Inject failure, to make the MDT-object lost its layout EA"
2062 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
2063 do_facet mds1 $LCTL set_param fail_loc=0x1615
2064 chown 1.1 $DIR/$tdir/a1/f1
2066 if [ $MDSCOUNT -ge 2 ]; then
2067 do_facet mds2 $LCTL set_param fail_loc=0x1615
2068 chown 1.1 $DIR/$tdir/a2/f2
2071 chown 1.1 $DIR/$tdir/f3
2076 do_facet mds1 $LCTL set_param fail_loc=0
2077 if [ $MDSCOUNT -ge 2 ]; then
2078 do_facet mds2 $LCTL set_param fail_loc=0
2081 cancel_lru_locks mdc
2082 cancel_lru_locks osc
2084 echo "The file size should be incorrect since layout EA is lost"
2085 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
2086 [ "$cur_size" != "$saved_size1" ] ||
2087 error "(1) Expect incorrect file1 size"
2089 if [ $MDSCOUNT -ge 2 ]; then
2090 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
2091 [ "$cur_size" != "$saved_size1" ] ||
2092 error "(2) Expect incorrect file2 size"
2095 cur_size=$(ls -il $DIR/$tdir/f3 | awk '{ print $6 }')
2096 [ "$cur_size" != "$saved_size2" ] ||
2097 error "(1.2) Expect incorrect file3 size"
2099 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2100 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
2102 for k in $(seq $MDSCOUNT); do
2103 # The LFSCK status query internal is 30 seconds. For the case
2104 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2105 # time to guarantee the status sync up.
2106 wait_update_facet mds${k} "$LCTL get_param -n \
2107 mdd.$(facet_svc mds${k}).lfsck_layout |
2108 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2109 error "(4) MDS${k} is not the expected 'completed'"
2112 for k in $(seq $OSTCOUNT); do
2113 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2114 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2115 awk '/^status/ { print $2 }')
2116 [ "$cur_status" == "completed" ] ||
2117 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2120 local repaired=$(do_facet mds1 $LCTL get_param -n \
2121 mdd.$(facet_svc mds1).lfsck_layout |
2122 awk '/^repaired_orphan/ { print $2 }')
2123 [ $repaired -eq 3 ] ||
2124 error "(6.1) Expect 3 fixed on mds1, but got: $repaired"
2126 if [ $MDSCOUNT -ge 2 ]; then
2127 repaired=$(do_facet mds2 $LCTL get_param -n \
2128 mdd.$(facet_svc mds2).lfsck_layout |
2129 awk '/^repaired_orphan/ { print $2 }')
2130 [ $repaired -eq 2 ] ||
2131 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
2134 $LFS path2fid $DIR/$tdir/a1/f1
2135 $LFS getstripe $DIR/$tdir/a1/f1
2137 if [ $MDSCOUNT -ge 2 ]; then
2138 $LFS path2fid $DIR/$tdir/a2/f2
2139 $LFS getstripe $DIR/$tdir/a2/f2
2142 $LFS path2fid $DIR/$tdir/f3
2143 $LFS getstripe $DIR/$tdir/f3
2145 echo "The file size should be correct after layout LFSCK scanning"
2146 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
2147 [ "$cur_size" == "$saved_size1" ] ||
2148 error "(7) Expect file1 size $saved_size1, but got $cur_size"
2150 if [ $MDSCOUNT -ge 2 ]; then
2151 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
2152 [ "$cur_size" == "$saved_size1" ] ||
2153 error "(8) Expect file2 size $saved_size1, but got $cur_size"
2156 cur_size=$(ls -il $DIR/$tdir/f3 | awk '{ print $6 }')
2157 [ "$cur_size" == "$saved_size2" ] ||
2158 error "(9) Expect file1 size $saved_size2, but got $cur_size"
2160 run_test 18a "Find out orphan OST-object and repair it (1)"
2163 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
2164 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2165 skip "MDS older than 2.5.55, LU-3336"
2168 echo "The target MDT-object is lost. The LFSCK should re-create the"
2169 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
2170 echo "can move it back to normal namespace manually."
2173 check_mount_and_prep
2174 $LFS mkdir -i 0 $DIR/$tdir/a1
2175 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2176 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2177 local saved_size1=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
2178 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2180 $LFS getstripe $DIR/$tdir/a1/f1
2182 if [ $MDSCOUNT -ge 2 ]; then
2183 $LFS mkdir -i 1 $DIR/$tdir/a2
2184 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
2185 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2186 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
2188 $LFS getstripe $DIR/$tdir/a2/f2
2191 $LFS setstripe -E 1M -S 1M -o 0 -E -1 -S 1M $DIR/$tdir/f3 ||
2192 error "(0) Fail to create PFL $DIR/$tdir/f3"
2194 dd if=/dev/zero of=$DIR/$tdir/f3 bs=1M count=2
2196 local saved_size2=$(ls -il $DIR/$tdir/f3 | awk '{ print $6 }')
2197 local fid3=$($LFS path2fid $DIR/$tdir/f3)
2199 $LFS getstripe $DIR/$tdir/f3
2201 cancel_lru_locks osc
2203 echo "Inject failure, to simulate the case of missing the MDT-object"
2204 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2205 do_facet mds1 $LCTL set_param fail_loc=0x1616
2206 rm -f $DIR/$tdir/a1/f1
2208 if [ $MDSCOUNT -ge 2 ]; then
2209 do_facet mds2 $LCTL set_param fail_loc=0x1616
2210 rm -f $DIR/$tdir/a2/f2
2218 do_facet mds1 $LCTL set_param fail_loc=0
2219 if [ $MDSCOUNT -ge 2 ]; then
2220 do_facet mds2 $LCTL set_param fail_loc=0
2223 cancel_lru_locks mdc
2224 cancel_lru_locks osc
2226 # dryrun mode only check orphans, not repaie
2227 echo "Trigger layout LFSCK --dryrun to find out orphan OST-object"
2228 $START_LAYOUT --dryrun -o -r ||
2229 error "Fail to start layout LFSCK in dryrun mode"
2230 wait_all_targets_blocked layout completed 2
2232 local PARAMS=$($SHOW_LAYOUT | awk '/^param/ { print $2 }')
2233 [ "$PARAMS" == "dryrun,all_targets,orphan" ] ||
2234 error "Expect 'dryrun,all_targets,orphan', got '$PARAMS'"
2236 local orphans=$(do_facet mds1 $LCTL get_param -n \
2237 mdd.$(facet_svc mds1).lfsck_layout |
2238 awk '/^inconsistent_orphan/ { print $2 }')
2239 [ $orphans -eq 3 ] ||
2240 error "Expect 3 found on mds1, but got: $orphans"
2242 # orphan parents should not be created
2244 for subdir in $MOUNT/.lustre/lost+found/*; do
2245 [ ! "$(ls -A $subdir)" ] || error "$subdir not empty"
2248 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2249 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2251 for k in $(seq $MDSCOUNT); do
2252 # The LFSCK status query internal is 30 seconds. For the case
2253 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2254 # time to guarantee the status sync up.
2255 wait_update_facet mds${k} "$LCTL get_param -n \
2256 mdd.$(facet_svc mds${k}).lfsck_layout |
2257 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2258 error "(2) MDS${k} is not the expected 'completed'"
2261 for k in $(seq $OSTCOUNT); do
2262 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2263 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2264 awk '/^status/ { print $2 }')
2265 [ "$cur_status" == "completed" ] ||
2266 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2269 local repaired=$(do_facet mds1 $LCTL get_param -n \
2270 mdd.$(facet_svc mds1).lfsck_layout |
2271 awk '/^repaired_orphan/ { print $2 }')
2272 [ $repaired -eq 3 ] ||
2273 error "(4.1) Expect 3 fixed on mds1, but got: $repaired"
2275 if [ $MDSCOUNT -ge 2 ]; then
2276 repaired=$(do_facet mds2 $LCTL get_param -n \
2277 mdd.$(facet_svc mds2).lfsck_layout |
2278 awk '/^repaired_orphan/ { print $2 }')
2279 [ $repaired -eq 2 ] ||
2280 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
2283 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
2284 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
2285 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2287 if [ $MDSCOUNT -ge 2 ]; then
2288 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
2289 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
2292 mv $MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0 $DIR/$tdir/f3 ||
2293 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2295 $LFS path2fid $DIR/$tdir/a1/f1
2296 $LFS getstripe $DIR/$tdir/a1/f1
2298 if [ $MDSCOUNT -ge 2 ]; then
2299 $LFS path2fid $DIR/$tdir/a2/f2
2300 $LFS getstripe $DIR/$tdir/a2/f2
2303 $LFS path2fid $DIR/$tdir/f3
2304 $LFS getstripe $DIR/$tdir/f3
2306 echo "The file size should be correct after layout LFSCK scanning"
2307 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
2308 [ "$cur_size" == "$saved_size1" ] ||
2309 error "(7) Expect file1 size $saved_size1, but got $cur_size"
2311 if [ $MDSCOUNT -ge 2 ]; then
2312 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
2313 [ "$cur_size" == "$saved_size1" ] ||
2314 error "(8) Expect file2 size $saved_size1, but got $cur_size"
2317 cur_size=$(ls -il $DIR/$tdir/f3 | awk '{ print $6 }')
2318 [ "$cur_size" == "$saved_size2" ] ||
2319 error "(9) Expect file1 size $saved_size2, but got $cur_size"
2321 run_test 18b "Find out orphan OST-object and repair it (2)"
2324 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
2325 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2326 skip "MDS older than 2.5.55, LU-3336"
2329 echo "The target MDT-object is lost, and the OST-object FID is missing."
2330 echo "The LFSCK should re-create the MDT-object with new FID under the "
2331 echo "directory .lustre/lost+found/MDTxxxx."
2334 check_mount_and_prep
2335 $LFS mkdir -i 0 $DIR/$tdir/a1
2336 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2338 echo "Inject failure, to simulate the case of missing parent FID"
2339 #define OBD_FAIL_LFSCK_NOPFID 0x1617
2340 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1617
2342 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2343 $LFS getstripe $DIR/$tdir/a1/f1
2345 if [ $MDSCOUNT -ge 2 ]; then
2346 $LFS mkdir -i 1 $DIR/$tdir/a2
2347 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a2
2348 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2349 $LFS getstripe $DIR/$tdir/a2/f2
2352 $LFS setstripe -E 1M -S 1M -o 0 -E -1 -S 1M $DIR/$tdir/f3 ||
2353 error "(0) Fail to create PFL $DIR/$tdir/f3"
2355 dd if=/dev/zero of=$DIR/$tdir/f3 bs=1M count=2
2356 $LFS getstripe $DIR/$tdir/f3
2358 cancel_lru_locks osc
2359 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
2361 echo "Inject failure, to simulate the case of missing the MDT-object"
2362 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2363 do_facet mds1 $LCTL set_param fail_loc=0x1616
2364 rm -f $DIR/$tdir/a1/f1
2366 if [ $MDSCOUNT -ge 2 ]; then
2367 do_facet mds2 $LCTL set_param fail_loc=0x1616
2368 rm -f $DIR/$tdir/a2/f2
2376 do_facet mds1 $LCTL set_param fail_loc=0
2377 if [ $MDSCOUNT -ge 2 ]; then
2378 do_facet mds2 $LCTL set_param fail_loc=0
2381 cancel_lru_locks mdc
2382 cancel_lru_locks osc
2384 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2385 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2387 for k in $(seq $MDSCOUNT); do
2388 # The LFSCK status query internal is 30 seconds. For the case
2389 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2390 # time to guarantee the status sync up.
2391 wait_update_facet mds${k} "$LCTL get_param -n \
2392 mdd.$(facet_svc mds${k}).lfsck_layout |
2393 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2394 error "(2) MDS${k} is not the expected 'completed'"
2397 for k in $(seq $OSTCOUNT); do
2398 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2399 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2400 awk '/^status/ { print $2 }')
2401 [ "$cur_status" == "completed" ] ||
2402 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2405 if [ $MDSCOUNT -ge 2 ]; then
2411 local repaired=$(do_facet mds1 $LCTL get_param -n \
2412 mdd.$(facet_svc mds1).lfsck_layout |
2413 awk '/^repaired_orphan/ { print $2 }')
2414 [ $repaired -eq $expected ] ||
2415 error "(4) Expect $expected fixed on mds1, but got: $repaired"
2417 if [ $MDSCOUNT -ge 2 ]; then
2418 repaired=$(do_facet mds2 $LCTL get_param -n \
2419 mdd.$(facet_svc mds2).lfsck_layout |
2420 awk '/^repaired_orphan/ { print $2 }')
2421 [ $repaired -eq 0 ] ||
2422 error "(5) Expect 0 fixed on mds2, but got: $repaired"
2425 ls -ail $MOUNT/.lustre/lost+found/
2427 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
2428 if [ -d $MOUNT/.lustre/lost+found/MDT0001 ]; then
2429 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-N-*)
2431 error "(6) .lustre/lost+found/MDT0001/ should be empty"
2434 echo "There should be some stub under .lustre/lost+found/MDT0000/"
2435 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2436 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2438 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-N-*)
2439 [ ! -z "$cname" ] ||
2440 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2442 run_test 18c "Find out orphan OST-object and repair it (3)"
2445 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2446 skip "MDS older than 2.5.55, LU-3336"
2449 echo "The target MDT-object layout EA is corrupted, but the right"
2450 echo "OST-object is still alive as orphan. The layout LFSCK will"
2451 echo "not create new OST-object to occupy such slot."
2454 check_mount_and_prep
2456 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2457 echo "guard" > $DIR/$tdir/a1/f1
2458 echo "foo" > $DIR/$tdir/a1/f2
2460 echo "guard" > $DIR/$tdir/a1/f3
2461 $LFS setstripe -E 1M -S 1M -o 0 -E -1 -S 1M $DIR/$tdir/a1/f4 ||
2462 error "(0) Fail to create PFL $DIR/$tdir/a1/f4"
2463 echo "foo" > $DIR/$tdir/a1/f4
2465 local saved_size1=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2466 local saved_size2=$(ls -il $DIR/$tdir/a1/f4 | awk '{ print $6 }')
2467 $LFS path2fid $DIR/$tdir/a1/f1
2468 $LFS getstripe $DIR/$tdir/a1/f1
2469 $LFS path2fid $DIR/$tdir/a1/f2
2470 $LFS getstripe $DIR/$tdir/a1/f2
2471 $LFS path2fid $DIR/$tdir/a1/f3
2472 $LFS getstripe $DIR/$tdir/a1/f3
2473 $LFS path2fid $DIR/$tdir/a1/f4
2474 $LFS getstripe $DIR/$tdir/a1/f4
2475 cancel_lru_locks osc
2477 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2478 echo "to reference the same OST-object (which is f1's OST-obejct)."
2479 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2480 echo "dangling reference case, but f2's old OST-object is there."
2482 echo "The failure also makes $DIR/$tdir/a1/f3 and $DIR/$tdir/a1/f4"
2483 echo "to reference the same OST-object (which is f3's OST-obejct)."
2484 echo "Then drop $DIR/$tdir/a1/f3 and its OST-object, so f4 becomes"
2485 echo "dangling reference case, but f4's old OST-object is there."
2488 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2489 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2490 chown 1.1 $DIR/$tdir/a1/f2
2491 chown 1.1 $DIR/$tdir/a1/f4
2492 rm -f $DIR/$tdir/a1/f1
2493 rm -f $DIR/$tdir/a1/f3
2496 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2498 echo "stopall to cleanup object cache"
2501 setupall > /dev/null
2503 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2504 $START_LAYOUT -r -o -c -d || error "(2) Fail to start LFSCK for layout!"
2506 for k in $(seq $MDSCOUNT); do
2507 # The LFSCK status query internal is 30 seconds. For the case
2508 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2509 # time to guarantee the status sync up.
2510 wait_update_facet mds${k} "$LCTL get_param -n \
2511 mdd.$(facet_svc mds${k}).lfsck_layout |
2512 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2513 error "(3) MDS${k} is not the expected 'completed'"
2516 for k in $(seq $OSTCOUNT); do
2517 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2518 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2519 awk '/^status/ { print $2 }')
2520 [ "$cur_status" == "completed" ] ||
2521 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
2524 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2525 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2526 awk '/^repaired_orphan/ { print $2 }')
2527 [ $repaired -eq 2 ] ||
2528 error "(5) Expect 2 orphans have been fixed, but got: $repaired"
2530 repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2531 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2532 awk '/^repaired_dangling/ { print $2 }')
2533 [ $repaired -eq 0 ] ||
2534 error "(6) Expect 0 dangling has been fixed, but got: $repaired"
2536 echo "The file size should be correct after layout LFSCK scanning"
2537 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2538 [ "$cur_size" == "$saved_size1" ] ||
2539 error "(7) Expect file2 size $saved_size1, but got $cur_size"
2541 cur_size=$(ls -il $DIR/$tdir/a1/f4 | awk '{ print $6 }')
2542 [ "$cur_size" == "$saved_size2" ] ||
2543 error "(8) Expect file4 size $saved_size2, but got $cur_size"
2545 echo "The LFSCK should find back the original data."
2546 cat $DIR/$tdir/a1/f2
2547 $LFS path2fid $DIR/$tdir/a1/f2
2548 $LFS getstripe $DIR/$tdir/a1/f2
2549 cat $DIR/$tdir/a1/f4
2550 $LFS path2fid $DIR/$tdir/a1/f4
2551 $LFS getstripe $DIR/$tdir/a1/f4
2553 run_test 18d "Find out orphan OST-object and repair it (4)"
2556 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
2557 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2558 skip "MDS older than 2.5.55, LU-3336"
2561 echo "The target MDT-object layout EA slot is occpuied by some new"
2562 echo "created OST-object when repair dangling reference case. Such"
2563 echo "conflict OST-object has been modified by others. To keep the"
2564 echo "new data, the LFSCK will create a new file to refernece this"
2565 echo "old orphan OST-object."
2568 check_mount_and_prep
2570 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2571 echo "guard" > $DIR/$tdir/a1/f1
2572 echo "foo" > $DIR/$tdir/a1/f2
2574 echo "guard" > $DIR/$tdir/a1/f3
2575 $LFS setstripe -E 1M -S 1M -o 0 -E -1 -S 1M $DIR/$tdir/a1/f4 ||
2576 error "(0) Fail to create PFL $DIR/$tdir/a1/f4"
2577 echo "foo" > $DIR/$tdir/a1/f4
2579 local saved_size1=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2580 local saved_size2=$(ls -il $DIR/$tdir/a1/f4 | awk '{ print $6 }')
2582 $LFS path2fid $DIR/$tdir/a1/f1
2583 $LFS getstripe $DIR/$tdir/a1/f1
2584 $LFS path2fid $DIR/$tdir/a1/f2
2585 $LFS getstripe $DIR/$tdir/a1/f2
2586 $LFS path2fid $DIR/$tdir/a1/f3
2587 $LFS getstripe $DIR/$tdir/a1/f3
2588 $LFS path2fid $DIR/$tdir/a1/f4
2589 $LFS getstripe $DIR/$tdir/a1/f4
2590 cancel_lru_locks osc
2592 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2593 echo "to reference the same OST-object (which is f1's OST-obejct)."
2594 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2595 echo "dangling reference case, but f2's old OST-object is there."
2597 echo "Also the failure makes $DIR/$tdir/a1/f3 and $DIR/$tdir/a1/f4"
2598 echo "to reference the same OST-object (which is f3's OST-obejct)."
2599 echo "Then drop $DIR/$tdir/a1/f3 and its OST-object, so f4 becomes"
2600 echo "dangling reference case, but f4's old OST-object is there."
2603 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2605 chown 1.1 $DIR/$tdir/a1/f2
2606 chown 1.1 $DIR/$tdir/a1/f4
2607 rm -f $DIR/$tdir/a1/f1
2608 rm -f $DIR/$tdir/a1/f3
2611 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2613 echo "stopall to cleanup object cache"
2616 setupall > /dev/null
2618 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2619 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
2621 start_full_debug_logging
2623 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2624 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2626 wait_update_facet mds1 "$LCTL get_param -n \
2627 mdd.$(facet_svc mds1).lfsck_layout |
2628 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2629 error "(3) MDS1 is not the expected 'scanning-phase2'"
2631 # to guarantee all updates are synced.
2635 echo "Write new data to f2/f4 to modify the new created OST-object."
2636 echo "dummy" >> $DIR/$tdir/a1/f2 || error "write a1/f2 failed"
2637 echo "dummy" >> $DIR/$tdir/a1/f4 || error "write a1/f4 failed"
2639 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2641 for k in $(seq $MDSCOUNT); do
2642 # The LFSCK status query internal is 30 seconds. For the case
2643 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2644 # time to guarantee the status sync up.
2645 wait_update_facet mds${k} "$LCTL get_param -n \
2646 mdd.$(facet_svc mds${k}).lfsck_layout |
2647 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2648 error "(4) MDS${k} is not the expected 'completed'"
2651 for k in $(seq $OSTCOUNT); do
2652 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2653 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2654 awk '/^status/ { print $2 }')
2655 [ "$cur_status" == "completed" ] ||
2656 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2659 stop_full_debug_logging
2661 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2662 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2663 awk '/^repaired_orphan/ { print $2 }')
2664 [ $repaired -eq 2 ] ||
2665 error "(6) Expect 2 orphans have been fixed, but got: $repaired"
2667 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2668 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2669 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2671 local count=$(ls -l $MOUNT/.lustre/lost+found/MDT0000/*-C-* | wc -l)
2672 if [ $count -ne 2 ]; then
2673 ls -l $MOUNT/.lustre/lost+found/MDT0000/*-C-*
2674 error "(8) Expect 2 stubs under lost+found, but got $count"
2677 echo "The stub file should keep the original f2 or f4 data"
2678 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-* | head -n 1)
2679 local cur_size=$(ls -il $cname | awk '{ print $6 }')
2680 [ "$cur_size" != "$saved_size1" -a "$cur_size" != "$saved_size2" ] &&
2681 error "(9) Got unexpected $cur_size"
2684 $LFS path2fid $cname
2685 $LFS getstripe $cname
2687 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-* | tail -n 1)
2688 cur_size=$(ls -il $cname | awk '{ print $6 }')
2689 [ "$cur_size" != "$saved_size1" -a "$cur_size" != "$saved_size2" ] &&
2690 error "(10) Got unexpected $cur_size"
2693 $LFS path2fid $cname
2694 $LFS getstripe $cname
2696 echo "The f2/f4 should contains new data."
2697 cat $DIR/$tdir/a1/f2
2698 $LFS path2fid $DIR/$tdir/a1/f2
2699 $LFS getstripe $DIR/$tdir/a1/f2
2700 cat $DIR/$tdir/a1/f4
2701 $LFS path2fid $DIR/$tdir/a1/f4
2702 $LFS getstripe $DIR/$tdir/a1/f4
2704 run_test 18e "Find out orphan OST-object and repair it (5)"
2707 [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return
2710 echo "The target MDT-object is lost. The LFSCK should re-create the"
2711 echo "MDT-object under .lustre/lost+found/MDTxxxx. If some OST fail"
2712 echo "to verify some OST-object(s) during the first stage-scanning,"
2713 echo "the LFSCK should skip orphan OST-objects for such OST. Others"
2714 echo "should not be affected."
2717 check_mount_and_prep
2718 $LFS mkdir -i 0 $DIR/$tdir/a1
2719 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
2720 dd if=/dev/zero of=$DIR/$tdir/a1/guard bs=1M count=2
2721 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2722 $LFS mkdir -i 0 $DIR/$tdir/a2
2723 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a2
2724 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2725 $LFS getstripe $DIR/$tdir/a1/f1
2726 $LFS getstripe $DIR/$tdir/a2/f2
2728 if [ $MDSCOUNT -ge 2 ]; then
2729 $LFS mkdir -i 1 $DIR/$tdir/a3
2730 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a3
2731 dd if=/dev/zero of=$DIR/$tdir/a3/guard bs=1M count=2
2732 dd if=/dev/zero of=$DIR/$tdir/a3/f3 bs=1M count=2
2733 $LFS mkdir -i 1 $DIR/$tdir/a4
2734 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a4
2735 dd if=/dev/zero of=$DIR/$tdir/a4/f4 bs=1M count=2
2736 $LFS getstripe $DIR/$tdir/a3/f3
2737 $LFS getstripe $DIR/$tdir/a4/f4
2740 cancel_lru_locks osc
2742 echo "Inject failure, to simulate the case of missing the MDT-object"
2743 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2744 do_facet mds1 $LCTL set_param fail_loc=0x1616
2745 rm -f $DIR/$tdir/a1/f1
2746 rm -f $DIR/$tdir/a2/f2
2748 if [ $MDSCOUNT -ge 2 ]; then
2749 do_facet mds2 $LCTL set_param fail_loc=0x1616
2750 rm -f $DIR/$tdir/a3/f3
2751 rm -f $DIR/$tdir/a4/f4
2757 do_facet mds1 $LCTL set_param fail_loc=0
2758 if [ $MDSCOUNT -ge 2 ]; then
2759 do_facet mds2 $LCTL set_param fail_loc=0
2762 cancel_lru_locks mdc
2763 cancel_lru_locks osc
2765 echo "Inject failure, to simulate the OST0 fail to handle"
2766 echo "MDT0 LFSCK request during the first-stage scanning."
2767 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
2768 do_facet mds1 $LCTL set_param fail_loc=0x161c fail_val=0
2770 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2771 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2773 for k in $(seq $MDSCOUNT); do
2774 # The LFSCK status query internal is 30 seconds. For the case
2775 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2776 # time to guarantee the status sync up.
2777 wait_update_facet mds${k} "$LCTL get_param -n \
2778 mdd.$(facet_svc mds${k}).lfsck_layout |
2779 awk '/^status/ { print \\\$2 }'" "partial" $LTIME ||
2780 error "(2) MDS${k} is not the expected 'partial'"
2783 wait_update_facet ost1 "$LCTL get_param -n \
2784 obdfilter.$(facet_svc ost1).lfsck_layout |
2785 awk '/^status/ { print \\\$2 }'" "partial" $LTIME || {
2786 error "(3) OST1 is not the expected 'partial'"
2789 wait_update_facet ost2 "$LCTL get_param -n \
2790 obdfilter.$(facet_svc ost2).lfsck_layout |
2791 awk '/^status/ { print \\\$2 }'" "completed" $LTIME || {
2792 error "(4) OST2 is not the expected 'completed'"
2795 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2797 local repaired=$(do_facet mds1 $LCTL get_param -n \
2798 mdd.$(facet_svc mds1).lfsck_layout |
2799 awk '/^repaired_orphan/ { print $2 }')
2800 [ $repaired -eq 1 ] ||
2801 error "(5) Expect 1 fixed on mds{1}, but got: $repaired"
2803 if [ $MDSCOUNT -ge 2 ]; then
2804 repaired=$(do_facet mds2 $LCTL get_param -n \
2805 mdd.$(facet_svc mds2).lfsck_layout |
2806 awk '/^repaired_orphan/ { print $2 }')
2807 [ $repaired -eq 1 ] ||
2808 error "(6) Expect 1 fixed on mds{2}, but got: $repaired"
2811 echo "Trigger layout LFSCK on all devices again to cleanup"
2812 $START_LAYOUT -r -o || error "(7) Fail to start LFSCK for layout!"
2814 for k in $(seq $MDSCOUNT); do
2815 # The LFSCK status query internal is 30 seconds. For the case
2816 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2817 # time to guarantee the status sync up.
2818 wait_update_facet mds${k} "$LCTL get_param -n \
2819 mdd.$(facet_svc mds${k}).lfsck_layout |
2820 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2821 error "(8) MDS${k} is not the expected 'completed'"
2824 for k in $(seq $OSTCOUNT); do
2825 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2826 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2827 awk '/^status/ { print $2 }')
2828 [ "$cur_status" == "completed" ] ||
2829 error "(9) OST${k} Expect 'completed', but got '$cur_status'"
2833 local repaired=$(do_facet mds1 $LCTL get_param -n \
2834 mdd.$(facet_svc mds1).lfsck_layout |
2835 awk '/^repaired_orphan/ { print $2 }')
2836 [ $repaired -eq 2 ] ||
2837 error "(10) Expect 2 fixed on mds{1}, but got: $repaired"
2839 if [ $MDSCOUNT -ge 2 ]; then
2840 repaired=$(do_facet mds2 $LCTL get_param -n \
2841 mdd.$(facet_svc mds2).lfsck_layout |
2842 awk '/^repaired_orphan/ { print $2 }')
2843 [ $repaired -eq 2 ] ||
2844 error "(11) Expect 2 fixed on mds{2}, but got: $repaired"
2847 run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
2850 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
2853 echo "The target MDT-object is lost, but related OI mapping is there"
2854 echo "The LFSCK should recreate the lost MDT-object without affected"
2855 echo "by the stale OI mapping."
2858 check_mount_and_prep
2859 $LFS mkdir -i 0 $DIR/$tdir/a1
2860 $LFS setstripe -c -1 -i 0 -S 1M $DIR/$tdir/a1
2861 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=$OSTCOUNT
2862 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2864 $LFS getstripe $DIR/$tdir/a1/f1
2865 cancel_lru_locks osc
2867 echo "Inject failure to simulate lost MDT-object but keep OI mapping"
2868 #define OBD_FAIL_LFSCK_LOST_MDTOBJ2 0x162e
2869 do_facet mds1 $LCTL set_param fail_loc=0x162e
2870 rm -f $DIR/$tdir/a1/f1
2872 do_facet mds1 $LCTL set_param fail_loc=0
2873 cancel_lru_locks mdc
2874 cancel_lru_locks osc
2876 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2877 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2879 for k in $(seq $MDSCOUNT); do
2880 # The LFSCK status query internal is 30 seconds. For the case
2881 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2882 # time to guarantee the status sync up.
2883 wait_update_facet mds${k} "$LCTL get_param -n \
2884 mdd.$(facet_svc mds${k}).lfsck_layout |
2885 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2886 error "(2) MDS${k} is not the expected 'completed'"
2889 for k in $(seq $OSTCOUNT); do
2890 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2891 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2892 awk '/^status/ { print $2 }')
2893 [ "$cur_status" == "completed" ] ||
2894 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2897 local repaired=$(do_facet mds1 $LCTL get_param -n \
2898 mdd.$(facet_svc mds1).lfsck_layout |
2899 awk '/^repaired_orphan/ { print $2 }')
2900 [ $repaired -eq $OSTCOUNT ] ||
2901 error "(4) Expect $OSTCOUNT fixed, but got: $repaired"
2903 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
2904 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
2905 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2907 $LFS path2fid $DIR/$tdir/a1/f1
2908 $LFS getstripe $DIR/$tdir/a1/f1
2910 run_test 18g "Find out orphan OST-object and repair it (7)"
2914 echo "The PFL extent crashed. During the first cycle LFSCK scanning,"
2915 echo "the layout LFSCK will keep the bad PFL file(s) there without"
2916 echo "scanning its OST-object(s). Then in the second stage scanning,"
2917 echo "the OST will return related OST-object(s) to the MDT as orphan."
2918 echo "And then the LFSCK on the MDT can rebuild the PFL extent with"
2919 echo "the 'orphan(s)' stripe information."
2922 check_mount_and_prep
2924 $LFS setstripe -E 2M -S 1M -c 1 -E -1 $DIR/$tdir/f0 ||
2925 error "(0) Fail to create PFL $DIR/$tdir/f0"
2927 cat $LUSTRE/tests/test-framework.sh > $DIR/$tdir/f0 ||
2928 error "(1.1) Fail to write $DIR/$tdir/f0"
2930 dd if=$LUSTRE/tests/test-framework.sh of=$DIR/$tdir/f0 bs=1M seek=2 ||
2931 error "(1.2) Fail to write $DIR/$tdir/f0"
2933 cp $DIR/$tdir/f0 $DIR/$tdir/guard
2935 echo "Inject failure stub to simulate bad PFL extent range"
2936 #define OBD_FAIL_LFSCK_BAD_PFL_RANGE 0x162f
2937 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162f
2939 chown 1.1 $DIR/$tdir/f0
2941 cancel_lru_locks mdc
2942 cancel_lru_locks osc
2943 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2945 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 &&
2946 error "(2) Write to bad PFL file should fail"
2948 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
2949 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
2951 for k in $(seq $MDSCOUNT); do
2952 # The LFSCK status query internal is 30 seconds. For the case
2953 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2954 # time to guarantee the status sync up.
2955 wait_update_facet mds${k} "$LCTL get_param -n \
2956 mdd.$(facet_svc mds${k}).lfsck_layout |
2957 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2958 error "(4.1) MDS${k} is not the expected 'completed'"
2961 for k in $(seq $OSTCOUNT); do
2962 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2963 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2964 awk '/^status/ { print $2 }')
2965 [ "$cur_status" == "completed" ] ||
2966 error "(4.2) OST${k} Expect 'completed', but got '$cur_status'"
2970 local repaired=$($SHOW_LAYOUT |
2971 awk '/^repaired_orphan/ { print $2 }')
2972 [ $repaired -eq 2 ] ||
2973 error "(5) Fail to repair crashed PFL range: $repaired"
2975 echo "Data in $DIR/$tdir/f0 should not be broken"
2976 diff $DIR/$tdir/f0 $DIR/$tdir/guard ||
2977 error "(6) Data in $DIR/$tdir/f0 is broken"
2979 echo "Write should succeed after LFSCK repairing the bad PFL range"
2980 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 ||
2981 error "(7) Write should succeed after LFSCK"
2983 run_test 18h "LFSCK can repair crashed PFL extent range"
2985 $LCTL set_param debug=-cache > /dev/null
2988 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
2989 skip "MDS older than 2.5.55, LU-3951"
2991 check_mount_and_prep
2992 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2994 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param -n \
2995 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 0
2997 echo "foo1" > $DIR/$tdir/a0
2998 $LFS setstripe -E 512K -S 512K -o 0 -E -1 -S 1M $DIR/$tdir/a1 ||
2999 error "(0) Fail to create PFL $DIR/$tdir/a1"
3000 echo "foo2" > $DIR/$tdir/a1
3001 echo "guard" > $DIR/$tdir/a2
3002 cancel_lru_locks osc
3004 echo "Inject failure, then client will offer wrong parent FID when read"
3005 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param -n \
3006 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
3008 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
3009 $LCTL set_param fail_loc=0x1619
3011 echo "Read RPC with wrong parent FID should be denied"
3012 cat $DIR/$tdir/a0 && error "(3.1) Read a0 should be denied!"
3013 cat $DIR/$tdir/a1 && error "(3.2) Read a1 should be denied!"
3014 $LCTL set_param fail_loc=0
3016 run_test 19a "OST-object inconsistency self detect"
3019 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
3020 skip "MDS older than 2.5.55, LU-3951"
3022 check_mount_and_prep
3023 $LFS setstripe -c 1 -i 0 $DIR/$tdir
3025 echo "Inject failure stub to make the OST-object to back point to"
3026 echo "non-exist MDT-object"
3028 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param -n \
3029 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 0
3031 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
3032 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1611
3033 echo "foo1" > $DIR/$tdir/f0
3034 $LFS setstripe -E 1M -S 1M -o 0 -E 4M -S 256K $DIR/$tdir/f1 ||
3035 error "(0) Fail to create PFL $DIR/$tdir/f1"
3036 echo "foo2" > $DIR/$tdir/f1
3037 cancel_lru_locks osc
3038 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
3040 do_facet ost1 $LCTL set_param -n \
3041 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 0
3042 echo "Nothing should be fixed since self detect and repair is disabled"
3043 local repaired=$(do_facet ost1 $LCTL get_param -n \
3044 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
3045 awk '/^repaired/ { print $2 }')
3046 [ $repaired -eq 0 ] ||
3047 error "(1) Expected 0 repaired, but got $repaired"
3049 echo "Read RPC with right parent FID should be accepted,"
3050 echo "and cause parent FID on OST to be fixed"
3052 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param -n \
3053 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
3055 cat $DIR/$tdir/f0 || error "(2.1) Read f0 should not be denied!"
3056 cat $DIR/$tdir/f1 || error "(2.2) Read f1 should not be denied!"
3058 repaired=$(do_facet ost1 $LCTL get_param -n \
3059 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
3060 awk '/^repaired/ { print $2 }')
3061 [ $repaired -eq 2 ] ||
3062 error "(3) Expected 1 repaired, but got $repaired"
3064 run_test 19b "OST-object inconsistency self repair"
3066 PATTERN_WITH_HOLE="40000001"
3067 PATTERN_WITHOUT_HOLE="raid0"
3070 [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return
3071 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
3072 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
3073 skip "MDS older than 2.5.55, LU-4887"
3076 echo "The target MDT-object and some of its OST-object are lost."
3077 echo "The LFSCK should find out the left OST-objects and re-create"
3078 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
3079 echo "with the partial OST-objects (LOV EA hole)."
3081 echo "New client can access the file with LOV EA hole via normal"
3082 echo "system tools or commands without crash the system."
3084 echo "For old client, even though it cannot access the file with"
3085 echo "LOV EA hole, it should not cause the system crash."
3088 check_mount_and_prep
3089 $LFS mkdir -i 0 $DIR/$tdir/a1
3090 if [ $OSTCOUNT -gt 2 ]; then
3091 $LFS setstripe -c 3 -i 0 -S 1M $DIR/$tdir/a1
3094 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a1
3098 # 256 blocks on the stripe0.
3099 # 1 block on the stripe1 for 2 OSTs case.
3100 # 256 blocks on the stripe1 for other cases.
3101 # 1 block on the stripe2 if OSTs > 2
3102 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
3103 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
3104 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
3106 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
3107 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
3108 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
3111 $LFS getstripe $DIR/$tdir/a1/f0
3113 $LFS getstripe $DIR/$tdir/a1/f1
3115 $LFS getstripe $DIR/$tdir/a1/f2
3117 if [ $OSTCOUNT -gt 2 ]; then
3118 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
3119 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
3121 $LFS getstripe $DIR/$tdir/a1/f3
3124 cancel_lru_locks osc
3126 echo "Inject failure..."
3127 echo "To simulate f0 lost MDT-object"
3128 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
3129 do_facet mds1 $LCTL set_param fail_loc=0x1616
3130 rm -f $DIR/$tdir/a1/f0
3132 echo "To simulate f1 lost MDT-object and OST-object0"
3133 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
3134 do_facet mds1 $LCTL set_param fail_loc=0x161a
3135 rm -f $DIR/$tdir/a1/f1
3137 echo "To simulate f2 lost MDT-object and OST-object1"
3138 do_facet mds1 $LCTL set_param fail_val=1
3139 rm -f $DIR/$tdir/a1/f2
3141 if [ $OSTCOUNT -gt 2 ]; then
3142 echo "To simulate f3 lost MDT-object and OST-object2"
3143 do_facet mds1 $LCTL set_param fail_val=2
3144 rm -f $DIR/$tdir/a1/f3
3147 umount_client $MOUNT
3150 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
3152 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
3153 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
3155 for k in $(seq $MDSCOUNT); do
3156 # The LFSCK status query internal is 30 seconds. For the case
3157 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3158 # time to guarantee the status sync up.
3159 wait_update_facet mds${k} "$LCTL get_param -n \
3160 mdd.$(facet_svc mds${k}).lfsck_layout |
3161 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3162 error "(2) MDS${k} is not the expected 'completed'"
3165 for k in $(seq $OSTCOUNT); do
3166 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
3167 obdfilter.$(facet_svc ost${k}).lfsck_layout |
3168 awk '/^status/ { print $2 }')
3169 [ "$cur_status" == "completed" ] ||
3170 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
3173 local repaired=$(do_facet mds1 $LCTL get_param -n \
3174 mdd.$(facet_svc mds1).lfsck_layout |
3175 awk '/^repaired_orphan/ { print $2 }')
3176 if [ $OSTCOUNT -gt 2 ]; then
3177 [ $repaired -eq 9 ] ||
3178 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
3180 [ $repaired -eq 4 ] ||
3181 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
3184 mount_client $MOUNT || error "(5.0) Fail to start client!"
3186 LOV_PATTERN_F_HOLE=0x40000000
3189 # ${fid0}-R-0 is the old f0
3191 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
3192 echo "Check $name, which is the old f0"
3194 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
3196 local pattern=$($LFS getstripe -L $name)
3197 [[ "$pattern" = "$PATTERN_WITHOUT_HOLE" ]] ||
3198 error "(5.2) NOT expect pattern flag hole, but got $pattern"
3200 local stripes=$($LFS getstripe -c $name)
3201 if [ $OSTCOUNT -gt 2 ]; then
3202 [ $stripes -eq 3 ] ||
3203 error "(5.3.1) expect the stripe count is 3, but got $stripes"
3205 [ $stripes -eq 2 ] ||
3206 error "(5.3.2) expect the stripe count is 2, but got $stripes"
3209 local size=$(stat $name | awk '/Size:/ { print $2 }')
3210 [ $size -eq $((4096 * $bcount)) ] ||
3211 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
3213 cat $name > /dev/null || error "(5.5) cannot read $name"
3215 echo "dummy" >> $name || error "(5.6) cannot write $name"
3217 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
3219 touch $name || error "(5.8) cannot touch $name"
3221 rm -f $name || error "(5.9) cannot unlink $name"
3224 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
3226 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
3227 if [ $OSTCOUNT -gt 2 ]; then
3228 echo "Check $name, it contains the old f1's stripe1 and stripe2"
3230 echo "Check $name, it contains the old f1's stripe1"
3233 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
3235 pattern=$($LFS getstripe -L $name)
3236 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3237 error "(6.2) expect pattern flag hole, but got $pattern"
3239 stripes=$($LFS getstripe -c $name)
3240 if [ $OSTCOUNT -gt 2 ]; then
3241 [ $stripes -eq 3 ] ||
3242 error "(6.3.1) expect the stripe count is 3, but got $stripes"
3244 [ $stripes -eq 2 ] ||
3245 error "(6.3.2) expect the stripe count is 2, but got $stripes"
3248 size=$(stat $name | awk '/Size:/ { print $2 }')
3249 [ $size -eq $((4096 * $bcount)) ] ||
3250 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
3252 cat $name > /dev/null && error "(6.5) normal read $name should fail"
3254 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3255 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3258 [ $failures -eq 256 ] ||
3259 error "(6.6) expect 256 IO failures, but get $failures"
3261 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3262 [ $size -eq $((4096 * $bcount)) ] ||
3263 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
3265 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
3266 error "(6.8) write to the LOV EA hole should fail"
3268 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
3269 error "(6.9) write to normal stripe should NOT fail"
3271 echo "foo" >> $name && error "(6.10) append write $name should fail"
3273 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
3275 touch $name || error "(6.12) cannot touch $name"
3277 rm -f $name || error "(6.13) cannot unlink $name"
3280 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
3282 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
3283 if [ $OSTCOUNT -gt 2 ]; then
3284 echo "Check $name, it contains the old f2's stripe0 and stripe2"
3286 echo "Check $name, it contains the old f2's stripe0"
3289 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
3291 pattern=$($LFS getstripe -L $name)
3292 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3293 error "(7.2) expect pattern flag hole, but got $pattern"
3295 stripes=$($LFS getstripe -c $name)
3296 size=$(stat $name | awk '/Size:/ { print $2 }')
3297 if [ $OSTCOUNT -gt 2 ]; then
3298 [ $stripes -eq 3 ] ||
3299 error "(7.3.1) expect the stripe count is 3, but got $stripes"
3301 [ $size -eq $((4096 * $bcount)) ] ||
3302 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
3304 cat $name > /dev/null &&
3305 error "(7.5.1) normal read $name should fail"
3307 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3308 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3310 [ $failures -eq 256 ] ||
3311 error "(7.6) expect 256 IO failures, but get $failures"
3313 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3314 [ $size -eq $((4096 * $bcount)) ] ||
3315 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
3317 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
3318 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
3320 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
3321 error "(7.8.1) write to normal stripe should NOT fail"
3323 echo "foo" >> $name &&
3324 error "(7.8.3) append write $name should fail"
3326 chown $RUNAS_ID:$RUNAS_GID $name ||
3327 error "(7.9.1) cannot chown on $name"
3329 touch $name || error "(7.10.1) cannot touch $name"
3331 [ $stripes -eq 2 ] ||
3332 error "(7.3.2) expect the stripe count is 2, but got $stripes"
3335 [ $size -eq $((4096 * (256 + 0))) ] ||
3336 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
3338 cat $name > /dev/null &&
3339 error "(7.5.2) normal read $name should fail"
3341 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3342 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3343 [ $failures -eq 256 ] ||
3344 error "(7.6.2) expect 256 IO failures, but get $failures"
3347 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3348 [ $size -eq $((4096 * $bcount)) ] ||
3349 error "(7.7.2) expect the size $((4096 * $bcount)), got $size"
3351 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
3352 seek=256 && error "(7.8.2) write to the LOV EA hole should fail"
3354 chown $RUNAS_ID:$RUNAS_GID $name ||
3355 error "(7.9.2) cannot chown on $name"
3357 touch $name || error "(7.10.2) cannot touch $name"
3360 rm -f $name || error "(7.11) cannot unlink $name"
3362 [ $OSTCOUNT -le 2 ] && return
3365 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
3367 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
3368 echo "Check $name, which contains the old f3's stripe0 and stripe1"
3370 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
3372 pattern=$($LFS getstripe -L $name)
3373 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3374 error "(8.2) expect pattern flag hole, but got $pattern"
3376 stripes=$($LFS getstripe -c $name)
3377 [ $stripes -eq 3 ] ||
3378 error "(8.3) expect the stripe count is 3, but got $stripes"
3380 size=$(stat $name | awk '/Size:/ { print $2 }')
3382 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
3383 error "(8.4) expect the size $((4096 * 512)), but got $size"
3385 cat $name > /dev/null &&
3386 error "(8.5) normal read $name should fail"
3388 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3389 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3391 [ $failures -eq 256 ] ||
3392 error "(8.6) expect 256 IO failures, but get $failures"
3395 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3396 [ $size -eq $((4096 * $bcount)) ] ||
3397 error "(8.7) expect the size $((4096 * $bcount)), but got $size"
3399 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
3400 seek=512 && error "(8.8) write to the LOV EA hole should fail"
3402 chown $RUNAS_ID:$RUNAS_GID $name ||
3403 error "(8.9) cannot chown on $name"
3405 touch $name || error "(8.10) cannot touch $name"
3407 rm -f $name || error "(8.11) cannot unlink $name"
3409 run_test 20a "Handle the orphan with dummy LOV EA slot properly"
3412 [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return
3413 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
3414 (( $MDS1_VERSION > $(version_code 2.5.55) )) ||
3415 skip "MDS older than 2.5.55, LU-4887"
3418 echo "The target MDT-object and some of its OST-object are lost."
3419 echo "The LFSCK should find out the left OST-objects and re-create"
3420 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
3421 echo "with the partial OST-objects (LOV EA hole)."
3423 echo "New client can access the file with LOV EA hole via normal"
3424 echo "system tools or commands without crash the system - PFL case."
3427 check_mount_and_prep
3429 $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f0 ||
3430 error "(0) Fail to create PFL file $DIR/$tdir/f0"
3431 $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f1 ||
3432 error "(1) Fail to create PFL file $DIR/$tdir/f1"
3433 $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f2 ||
3434 error "(2) Fail to create PFL file $DIR/$tdir/f2"
3436 local bcount=$((256 * 3 + 1))
3438 dd if=/dev/zero of=$DIR/$tdir/f0 bs=4096 count=$bcount
3439 dd if=/dev/zero of=$DIR/$tdir/f1 bs=4096 count=$bcount
3440 dd if=/dev/zero of=$DIR/$tdir/f2 bs=4096 count=$bcount
3442 local fid0=$($LFS path2fid $DIR/$tdir/f0)
3443 local fid1=$($LFS path2fid $DIR/$tdir/f1)
3444 local fid2=$($LFS path2fid $DIR/$tdir/f2)
3447 $LFS getstripe $DIR/$tdir/f0
3449 $LFS getstripe $DIR/$tdir/f1
3451 $LFS getstripe $DIR/$tdir/f2
3453 cancel_lru_locks mdc
3454 cancel_lru_locks osc
3456 echo "Inject failure..."
3457 echo "To simulate f0 lost MDT-object"
3458 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
3459 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1616
3462 echo "To simulate the case of f1 lost MDT-object and "
3463 echo "the first OST-object in each PFL component"
3464 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
3465 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161a
3468 echo "To simulate the case of f2 lost MDT-object and "
3469 echo "the second OST-object in each PFL component"
3470 do_facet $SINGLEMDS $LCTL set_param fail_val=1
3475 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
3477 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
3478 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
3480 for k in $(seq $MDSCOUNT); do
3481 # The LFSCK status query internal is 30 seconds. For the case
3482 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3483 # time to guarantee the status sync up.
3484 wait_update_facet mds${k} "$LCTL get_param -n \
3485 mdd.$(facet_svc mds${k}).lfsck_layout |
3486 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3487 error "(4) MDS${k} is not the expected 'completed'"
3490 for k in $(seq $OSTCOUNT); do
3491 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
3492 obdfilter.$(facet_svc ost${k}).lfsck_layout |
3493 awk '/^status/ { print $2 }')
3494 [ "$cur_status" == "completed" ] ||
3495 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
3498 local repaired=$(do_facet mds1 $LCTL get_param -n \
3499 mdd.$(facet_svc mds1).lfsck_layout |
3500 awk '/^repaired_orphan/ { print $2 }')
3501 [ $repaired -eq 8 ] ||
3502 error "(6) Expect 8 fixed on mds1, but got: $repaired"
3505 # ${fid0}-R-0 is the old f0
3507 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
3508 echo "Check $name, which is the old f0"
3510 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
3512 local pattern=$($LFS getstripe -L -I1 $name)
3513 [[ "$pattern" = "$PATTERN_WITHOUT_HOLE" ]] ||
3514 error "(7.2.1) NOT expect pattern flag hole, but got $pattern"
3516 pattern=$($LFS getstripe -L -I2 $name)
3517 [[ "$pattern" = "$PATTERN_WITHOUT_HOLE" ]] ||
3518 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
3520 local stripes=$($LFS getstripe -c -I1 $name)
3521 [ $stripes -eq 2 ] ||
3522 error "(7.3.1) expect 2 stripes, but got $stripes"
3524 stripes=$($LFS getstripe -c -I2 $name)
3525 [ $stripes -eq 2 ] ||
3526 error "(7.3.2) expect 2 stripes, but got $stripes"
3528 local e_start=$($LFS getstripe -I1 $name |
3529 awk '/lcme_extent.e_start:/ { print $2 }')
3530 [ $e_start -eq 0 ] ||
3531 error "(7.4.1) expect the COMP1 start at 0, got $e_start"
3533 local e_end=$($LFS getstripe -I1 $name |
3534 awk '/lcme_extent.e_end:/ { print $2 }')
3535 [ $e_end -eq 2097152 ] ||
3536 error "(7.4.2) expect the COMP1 end at 2097152, got $e_end"
3538 e_start=$($LFS getstripe -I2 $name |
3539 awk '/lcme_extent.e_start:/ { print $2 }')
3540 [ $e_start -eq 2097152 ] ||
3541 error "(7.5.1) expect the COMP2 start at 2097152, got $e_start"
3543 e_end=$($LFS getstripe -I2 $name |
3544 awk '/lcme_extent.e_end:/ { print $2 }')
3545 [ "$e_end" = "EOF" ] ||
3546 error "(7.5.2) expect the COMP2 end at (EOF), got $e_end"
3548 local size=$(stat $name | awk '/Size:/ { print $2 }')
3549 [ $size -eq $((4096 * $bcount)) ] ||
3550 error "(7.6) expect the size $((4096 * $bcount)), but got $size"
3552 cat $name > /dev/null || error "(7.7) cannot read $name"
3554 echo "dummy" >> $name || error "(7.8) cannot write $name"
3556 chown $RUNAS_ID:$RUNAS_GID $name || error "(7.9) cannot chown on $name"
3558 touch $name || error "(7.10) cannot touch $name"
3560 rm -f $name || error "(7.11) cannot unlink $name"
3563 # ${fid1}-R-0 contains the old f1's second stripe in each COMP
3565 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
3566 echo "Check $name, it contains f1's second OST-object in each COMP"
3568 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
3570 pattern=$($LFS getstripe -L -I1 $name)
3571 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3572 error "(8.2.1) expect pattern flag hole, but got $pattern"
3574 pattern=$($LFS getstripe -L -I2 $name)
3575 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3576 error "(8.2.2) expect pattern flag hole, but got $pattern"
3578 stripes=$($LFS getstripe -c -I1 $name)
3579 [ $stripes -eq 2 ] ||
3580 error "(8.3.2) expect 2 stripes, but got $stripes"
3582 stripes=$($LFS getstripe -c -I2 $name)
3583 [ $stripes -eq 2 ] ||
3584 error "(8.3.2) expect 2 stripes, but got $stripes"
3586 e_start=$($LFS getstripe -I1 $name |
3587 awk '/lcme_extent.e_start:/ { print $2 }')
3588 [ $e_start -eq 0 ] ||
3589 error "(8.4.1) expect the COMP1 start at 0, got $e_start"
3591 e_end=$($LFS getstripe -I1 $name |
3592 awk '/lcme_extent.e_end:/ { print $2 }')
3593 [ $e_end -eq 2097152 ] ||
3594 error "(8.4.2) expect the COMP1 end at 2097152, got $e_end"
3596 e_start=$($LFS getstripe -I2 $name |
3597 awk '/lcme_extent.e_start:/ { print $2 }')
3598 [ $e_start -eq 2097152 ] ||
3599 error "(8.5.1) expect the COMP2 start at 2097152, got $e_start"
3601 e_end=$($LFS getstripe -I2 $name |
3602 awk '/lcme_extent.e_end:/ { print $2 }')
3603 [ "$e_end" = "EOF" ] ||
3604 error "(8.5.2) expect the COMP2 end at (EOF), got $e_end"
3606 size=$(stat $name | awk '/Size:/ { print $2 }')
3607 [ $size -eq $((4096 * $bcount)) ] ||
3608 error "(8.6) expect the size $((4096 * $bcount)), but got $size"
3610 cat $name > /dev/null && error "(8.7) normal read $name should fail"
3612 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3613 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3615 # The first stripe in each COMP was lost
3616 [ $failures -eq 512 ] ||
3617 error "(8.8) expect 512 IO failures, but get $failures"
3619 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3620 [ $size -eq $((4096 * $bcount)) ] ||
3621 error "(8.9) expect the size $((4096 * $bcount)), but got $size"
3623 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
3624 error "(8.10) write to the LOV EA hole should fail"
3626 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
3627 error "(8.11) write to normal stripe should NOT fail"
3629 echo "foo" >> $name && error "(8.12) append write $name should fail"
3631 chown $RUNAS_ID:$RUNAS_GID $name || error "(8.13) cannot chown on $name"
3633 touch $name || error "(8.14) cannot touch $name"
3635 rm -f $name || error "(8.15) cannot unlink $name"
3638 # ${fid2}-R-0 contains the old f2's first stripe in each COMP
3640 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
3641 echo "Check $name, it contains f2's first stripe in each COMP"
3643 $LFS getstripe -v $name || error "(9.1) cannot getstripe on $name"
3645 pattern=$($LFS getstripe -L -I1 $name)
3646 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3647 error "(9.2.1) expect pattern flag hole, but got $pattern"
3649 pattern=$($LFS getstripe -L -I2 $name)
3650 [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
3651 error "(9.2.2) expect pattern flag hole, but got $pattern"
3653 stripes=$($LFS getstripe -c -I1 $name)
3654 [ $stripes -eq 2 ] ||
3655 error "(9.3.2) expect 2 stripes, but got $stripes"
3657 stripes=$($LFS getstripe -c -I2 $name)
3658 [ $stripes -eq 2 ] ||
3659 error "(9.3.2) expect 2 stripes, but got $stripes"
3661 e_start=$($LFS getstripe -I1 $name |
3662 awk '/lcme_extent.e_start:/ { print $2 }')
3663 [ $e_start -eq 0 ] ||
3664 error "(9.4.1) expect the COMP1 start at 0, got $e_start"
3666 e_end=$($LFS getstripe -I1 $name |
3667 awk '/lcme_extent.e_end:/ { print $2 }')
3668 [ $e_end -eq 2097152 ] ||
3669 error "(9.4.2) expect the COMP1 end at 2097152, got $e_end"
3671 e_start=$($LFS getstripe -I2 $name |
3672 awk '/lcme_extent.e_start:/ { print $2 }')
3673 [ $e_start -eq 2097152 ] ||
3674 error "(9.5.1) expect the COMP2 start at 2097152, got $e_start"
3676 e_end=$($LFS getstripe -I2 $name |
3677 awk '/lcme_extent.e_end:/ { print $2 }')
3678 [ "$e_end" = "EOF" ] ||
3679 error "(9.5.2) expect the COMP2 end at (EOF), got $e_end"
3681 size=$(stat $name | awk '/Size:/ { print $2 }')
3682 # The second stripe in COMP was lost, so we do not know there
3683 # have ever been some data before. 'stat' will regard it as
3684 # no data on the lost stripe.
3686 [ $size -eq $((4096 * $bcount)) ] ||
3687 error "(9.6) expect size $((4096 * $bcount)), but got $size"
3689 cat $name > /dev/null &&
3690 error "(9.7) normal read $name should fail"
3692 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
3693 bs=4096 2>&1 | grep "Input/output error" | wc -l)
3694 [ $failures -eq 512 ] ||
3695 error "(9.8) expect 256 IO failures, but get $failures"
3697 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
3698 # The second stripe in COMP was lost, so we do not know there
3699 # have ever been some data before. Since 'dd' skip failure,
3700 # it will regard the lost stripe contains data.
3702 [ $size -eq $((4096 * $bcount)) ] ||
3703 error "(9.9) expect the size $((4096 * $bcount)), but got $size"
3705 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
3706 seek=300 && error "(9.10) write to the LOV EA hole should fail"
3708 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
3709 error "(9.11) write to normal stripe should NOT fail"
3711 echo "foo" >> $name &&
3712 error "(9.12) append write $name should fail"
3714 chown $RUNAS_ID:$RUNAS_GID $name ||
3715 error "(9.13) cannot chown on $name"
3717 touch $name || error "(9.14) cannot touch $name"
3719 rm -f $name || error "(7.15) cannot unlink $name"
3721 run_test 20b "Handle the orphan with dummy LOV EA slot properly - PFL case"
3724 (( $MDS1_VERSION > $(version_code 2.5.59) )) ||
3725 skip "MDS older than 2.5.59, LU-4887"
3727 check_mount_and_prep
3728 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
3730 echo "Start all LFSCK components by default (-s 1)"
3731 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
3732 error "Fail to start LFSCK"
3734 echo "namespace LFSCK should be in 'scanning-phase1' status"
3735 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
3736 [ "$STATUS" == "scanning-phase1" ] ||
3737 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
3739 echo "layout LFSCK should be in 'scanning-phase1' status"
3740 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
3741 [ "$STATUS" == "scanning-phase1" ] ||
3742 error "Expect layout 'scanning-phase1', but got '$STATUS'"
3744 echo "Stop all LFSCK components by default"
3745 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
3746 error "Fail to stop LFSCK"
3748 run_test 21 "run all LFSCK components by default"
3751 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
3752 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
3753 skip "MDS older than 2.6.50, LU-5511"
3756 echo "The parent_A references the child directory via some name entry,"
3757 echo "but the child directory back references another parent_B via its"
3758 echo "".." name entry. The parent_B does not exist. Then the namespace"
3759 echo "LFSCK will repair the child directory's ".." name entry."
3762 check_mount_and_prep
3764 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
3765 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
3767 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
3768 echo "The dummy's dotdot name entry references the guard."
3769 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
3770 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
3771 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
3772 error "(3) Fail to mkdir on MDT0"
3773 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3775 rmdir $DIR/$tdir/guard || error "(4) Fail to rmdir $DIR/$tdir/guard"
3777 echo "Trigger namespace LFSCK to repair unmatched pairs"
3778 $START_NAMESPACE -A -r ||
3779 error "(5) Fail to start LFSCK for namespace"
3781 wait_all_targets_blocked namespace completed 6
3783 local repaired=$($SHOW_NAMESPACE |
3784 awk '/^unmatched_pairs_repaired/ { print $2 }')
3785 [ $repaired -eq 1 ] ||
3786 error "(7) Fail to repair unmatched pairs: $repaired"
3788 echo "'ls' should success after namespace LFSCK repairing"
3789 ls -ail $DIR/$tdir/foo/dummy > /dev/null ||
3790 error "(8) ls should success."
3792 run_test 22a "LFSCK can repair unmatched pairs (1)"
3795 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
3796 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
3797 skip "MDS older than 2.6.50, LU-5511"
3800 echo "The parent_A references the child directory via the name entry_B,"
3801 echo "but the child directory back references another parent_C via its"
3802 echo "".." name entry. The parent_C exists, but there is no the name"
3803 echo "entry_B under the parent_C. Then the namespace LFSCK will repair"
3804 echo "the child directory's ".." name entry and its linkEA."
3807 check_mount_and_prep
3809 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
3810 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
3812 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
3813 echo "and bad linkEA. The dummy's dotdot name entry references the"
3814 echo "guard. The dummy's linkEA references n non-exist name entry."
3815 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
3816 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
3817 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
3818 error "(3) Fail to mkdir on MDT0"
3819 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3821 local dummyfid=$($LFS path2fid $DIR/$tdir/foo/dummy)
3822 echo "fid2path should NOT work on the dummy's FID $dummyfid"
3823 local dummyname=$($LFS fid2path $DIR $dummyfid)
3824 [ "$dummyname" != "$DIR/$tdir/foo/dummy" ] ||
3825 error "(4) fid2path works unexpectedly."
3827 echo "Trigger namespace LFSCK to repair unmatched pairs"
3828 $START_NAMESPACE -A -r ||
3829 error "(5) Fail to start LFSCK for namespace"
3831 wait_all_targets_blocked namespace completed 6
3833 local repaired=$($SHOW_NAMESPACE |
3834 awk '/^unmatched_pairs_repaired/ { print $2 }')
3835 [ $repaired -eq 1 ] ||
3836 error "(7) Fail to repair unmatched pairs: $repaired"
3838 echo "fid2path should work on the dummy's FID $dummyfid after LFSCK"
3839 local dummyname=$($LFS fid2path $DIR $dummyfid)
3840 [ "$dummyname" == "$DIR/$tdir/foo/dummy" ] ||
3841 error "(8) fid2path does not work"
3843 run_test 22b "LFSCK can repair unmatched pairs (2)"
3846 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
3847 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
3848 skip "MDS older than 2.6.50, LU-5512"
3851 echo "The name entry is there, but the MDT-object for such name "
3852 echo "entry does not exist. The namespace LFSCK should find out "
3853 echo "and repair the inconsistency as required."
3856 check_mount_and_prep
3858 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3859 $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
3861 echo "Inject failure stub on MDT1 to simulate dangling name entry"
3862 #define OBD_FAIL_LFSCK_DANGLING2 0x1620
3863 do_facet mds2 $LCTL set_param fail_loc=0x1620
3864 rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
3865 do_facet mds2 $LCTL set_param fail_loc=0
3867 echo "'ls' should fail because of dangling name entry"
3868 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
3870 echo "Trigger namespace LFSCK to find out dangling name entry"
3871 $START_NAMESPACE -A -r ||
3872 error "(5) Fail to start LFSCK for namespace"
3874 wait_all_targets_blocked namespace completed 6
3876 local repaired=$($SHOW_NAMESPACE |
3877 awk '/^dangling_repaired/ { print $2 }')
3878 [ $repaired -eq 1 ] ||
3879 error "(7) Fail to repair dangling name entry: $repaired"
3881 echo "'ls' should fail because not re-create MDT-object by default"
3882 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
3884 echo "Trigger namespace LFSCK again to repair dangling name entry"
3885 $START_NAMESPACE -A -r -C ||
3886 error "(9) Fail to start LFSCK for namespace"
3888 wait_all_targets_blocked namespace completed 10
3890 repaired=$($SHOW_NAMESPACE |
3891 awk '/^dangling_repaired/ { print $2 }')
3892 [ $repaired -eq 1 ] ||
3893 error "(11) Fail to repair dangling name entry: $repaired"
3895 echo "'ls' should success after namespace LFSCK repairing"
3896 ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
3898 run_test 23a "LFSCK can repair dangling name entry (1)"
3901 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
3902 skip "MDS older than 2.6.50, LU-5512"
3905 echo "The objectA has multiple hard links, one of them corresponding"
3906 echo "to the name entry_B. But there is something wrong for the name"
3907 echo "entry_B and cause entry_B to references non-exist object_C."
3908 echo "In the first-stage scanning, the LFSCK will think the entry_B"
3909 echo "as dangling, and re-create the lost object_C. When the LFSCK"
3910 echo "comes to the second-stage scanning, it will find that the"
3911 echo "former re-creating object_C is not proper, and will try to"
3912 echo "replace the object_C with the real object_A."
3915 check_mount_and_prep
3917 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3918 $LFS path2fid $DIR/$tdir/d0
3920 createmany -o $DIR/$tdir/d0/t 10 || error "(1.5) Fail to creatmany"
3922 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3923 $LFS path2fid $DIR/$tdir/d0/f0
3925 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3926 $LFS path2fid $DIR/$tdir/d0/f1
3928 local SEQ0=$($LFS path2fid $DIR/$tdir/d0/f0 | awk -F':' '{print $1}')
3929 local SEQ1=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $1}')
3931 if [ "$SEQ0" != "$SEQ1" ]; then
3932 # To guarantee that the f0 and f1 are in the same FID seq
3933 rm -f $DIR/$tdir/d0/f0 ||
3934 error "(3.1) Fail to unlink $DIR/$tdir/d0/f0"
3935 echo "dummy" > $DIR/$tdir/d0/f0 ||
3936 error "(3.2) Fail to touch on MDT0"
3937 $LFS path2fid $DIR/$tdir/d0/f0
3940 local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}')
3941 OID=$(printf %d $OID)
3943 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3944 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3945 do_facet $SINGLEMDS $LCTL set_param fail_val=$OID fail_loc=0x1621
3946 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3947 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3949 # If there is creation after the dangling injection, it may re-use
3950 # the just released local object (inode) that is referenced by the
3951 # dangling name entry. It will fail the dangling injection.
3952 # So before deleting the target object for the dangling name entry,
3953 # remove some other objects to avoid the target object being reused
3954 # by some potential creations. LU-7429
3955 unlinkmany $DIR/$tdir/d0/t 10 || error "(5.0) Fail to unlinkmany"
3957 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3959 echo "'ls' should fail because of dangling name entry"
3960 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3961 error "(6) ls should fail."
3963 echo "Trigger namespace LFSCK to find out dangling name entry"
3964 $START_NAMESPACE -r -C ||
3965 error "(7) Fail to start LFSCK for namespace"
3967 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3968 mdd.${MDT_DEV}.lfsck_namespace |
3969 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3971 error "(8) unexpected status"
3974 local repaired=$($SHOW_NAMESPACE |
3975 awk '/^dangling_repaired/ { print $2 }')
3976 [ $repaired -eq 1 ] ||
3977 error "(9) Fail to repair dangling name entry: $repaired"
3979 repaired=$($SHOW_NAMESPACE |
3980 awk '/^multiple_linked_repaired/ { print $2 }')
3981 [ $repaired -eq 1 ] ||
3982 error "(10) Fail to drop the former created object: $repaired"
3984 local data=$(cat $DIR/$tdir/d0/foo)
3985 [ "$data" == "dummy" ] ||
3986 error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
3988 run_test 23b "LFSCK can repair dangling name entry (2)"
3991 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3992 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3993 mdd.${MDT_DEV}.lfsck_namespace |
3994 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3996 error "(10) unexpected status"
3999 stop_full_debug_logging
4003 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4004 skip "MDS older than 2.6.50, LU-5512"
4007 echo "The objectA has multiple hard links, one of them corresponding"
4008 echo "to the name entry_B. But there is something wrong for the name"
4009 echo "entry_B and cause entry_B to references non-exist object_C."
4010 echo "In the first-stage scanning, the LFSCK will think the entry_B"
4011 echo "as dangling, and re-create the lost object_C. And then others"
4012 echo "modified the re-created object_C. When the LFSCK comes to the"
4013 echo "second-stage scanning, it will find that the former re-creating"
4014 echo "object_C maybe wrong and try to replace the object_C with the"
4015 echo "real object_A. But because object_C has been modified, so the"
4016 echo "LFSCK cannot replace it."
4019 start_full_debug_logging
4021 check_mount_and_prep
4023 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
4024 parent_fid="$($LFS path2fid $DIR/$tdir/d0)"
4025 echo "parent_fid=$parent_fid"
4027 createmany -o $DIR/$tdir/d0/t 10 || error "(1.5) Fail to creatmany"
4029 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
4030 f0_fid="$($LFS path2fid $DIR/$tdir/d0/f0)"
4031 echo "f0_fid=$f0_fid"
4033 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
4034 f1_fid="$($LFS path2fid $DIR/$tdir/d0/f1)"
4035 echo "f1_fid=$f1_fid"
4037 if [ "${fid_f0/:.*/}" != "${fid_f1/:.*/}" ]; then
4038 # To guarantee that the f0 and f1 are in the same FID seq
4039 rm -f $DIR/$tdir/d0/f0 ||
4040 error "(3.1) Fail to unlink $DIR/$tdir/d0/f0"
4041 echo "dummy" > $DIR/$tdir/d0/f0 ||
4042 error "(3.2) Fail to touch on MDT0"
4043 f0_fid="$($LFS path2fid $DIR/$tdir/d0/f0)"
4044 echo "f0_fid=$f0_fid (replaced)"
4047 local oid=$(awk -F':' '{ printf $2 }' <<< $f1_fid)
4049 echo "Inject failure stub on MDT0 to simulate dangling name entry"
4050 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
4051 do_facet $SINGLEMDS $LCTL set_param fail_val=$oid fail_loc=0x1621
4052 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
4053 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
4055 # If there is creation after the dangling injection, it may re-use
4056 # the just released local object (inode) that is referenced by the
4057 # dangling name entry. It will fail the dangling injection.
4058 # So before deleting the target object for the dangling name entry,
4059 # remove some other objects to avoid the target object being reused
4060 # by some potential creations. LU-7429
4061 unlinkmany $DIR/$tdir/d0/t 10 || error "(5.0) Fail to unlinkmany"
4063 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
4065 echo "'ls' should fail because of dangling name entry"
4066 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
4067 error "(6) ls should fail."
4069 #define OBD_FAIL_LFSCK_DELAY3 0x1602
4070 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
4072 echo "Trigger namespace LFSCK to find out dangling name entry"
4073 $START_NAMESPACE -r -C ||
4074 error "(7) Fail to start LFSCK for namespace"
4076 wait_update_facet client "stat -c%s $DIR/$tdir/d0/foo" "0" $LTIME || {
4077 # While unexpected by the test, it is valid for LFSCK to repair
4078 # the link to the original object before any data is written.
4079 local size=$(stat -c %s $DIR/$tdir/d0/foo)
4081 if [ "$size" = "6" -a "$(<$DIR/$tdir/d0/foo)" = "dummy" ]; then
4082 log "LFSCK repaired file prematurely"
4087 stat $DIR/$tdir/d0/foo
4089 error "(8) unexpected size"
4092 echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
4093 cancel_lru_locks osc
4097 local repaired=$($SHOW_NAMESPACE |
4098 awk '/^dangling_repaired/ { print $2 }')
4099 [ $repaired -eq 1 ] ||
4100 error "(11) Fail to repair dangling name entry: $repaired"
4102 local data=$(cat $DIR/$tdir/d0/foo)
4103 [ "$data" != "dummy" ] ||
4104 error "(12) The $DIR/$tdir/d0/foo should not be recovered"
4106 run_test 23c "LFSCK can repair dangling name entry (3)"
4109 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
4110 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
4111 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4112 skip "MDS older than 2.6.50, LU-5513"
4115 echo "Two MDT-objects back reference the same name entry via their"
4116 echo "each own linkEA entry, but the name entry only references one"
4117 echo "MDT-object. The namespace LFSCK will remove the linkEA entry"
4118 echo "for the MDT-object that is not recognized. If such MDT-object"
4119 echo "has no other linkEA entry after the removing, then the LFSCK"
4120 echo "will add it as orphan under the .lustre/lost+found/MDTxxxx/."
4123 check_mount_and_prep
4125 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4127 mkdir $DIR/$tdir/d0/guard || error "(1) Fail to mkdir guard"
4128 $LFS path2fid $DIR/$tdir/d0/guard
4130 mkdir $DIR/$tdir/d0/dummy || error "(2) Fail to mkdir dummy"
4131 $LFS path2fid $DIR/$tdir/d0/dummy
4134 if [ $mds1_FSTYPE != ldiskfs ]; then
4135 pfid=$($LFS path2fid $DIR/$tdir/d0/guard)
4137 pfid=$($LFS path2fid $DIR/$tdir/d0/dummy)
4140 touch $DIR/$tdir/d0/guard/foo ||
4141 error "(3) Fail to touch $DIR/$tdir/d0/guard/foo"
4143 echo "Inject failure stub on MDT0 to simulate the case that"
4144 echo "the $DIR/$tdir/d0/dummy/foo has the 'bad' linkEA entry"
4145 echo "that references $DIR/$tdir/d0/guard/foo."
4146 echo "Then remove the name entry $DIR/$tdir/d0/dummy/foo."
4147 echo "So the MDT-object $DIR/$tdir/d0/dummy/foo will be left"
4148 echo "there with the same linkEA entry as another MDT-object"
4149 echo "$DIR/$tdir/d0/guard/foo has"
4151 #define OBD_FAIL_LFSCK_MUL_REF 0x1622
4152 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1622
4153 $LFS mkdir -i 0 $DIR/$tdir/d0/dummy/foo ||
4154 error "(4) Fail to mkdir $DIR/$tdir/d0/dummy/foo"
4155 $LFS path2fid $DIR/$tdir/d0/dummy/foo
4156 local cfid=$($LFS path2fid $DIR/$tdir/d0/dummy/foo)
4157 rmdir $DIR/$tdir/d0/dummy/foo ||
4158 error "(5) Fail to remove $DIR/$tdir/d0/dummy/foo name entry"
4159 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4161 echo "stat $DIR/$tdir/d0/dummy/foo should fail"
4162 stat $DIR/$tdir/d0/dummy/foo > /dev/null 2>&1 &&
4163 error "(6) stat successfully unexpectedly"
4165 echo "Trigger namespace LFSCK to repair multiple-referenced name entry"
4166 $START_NAMESPACE -A -r ||
4167 error "(7) Fail to start LFSCK for namespace"
4169 wait_all_targets_blocked namespace completed 8
4171 local repaired=$($SHOW_NAMESPACE |
4172 awk '/^multiple_referenced_repaired/ { print $2 }')
4173 [ $repaired -eq 1 ] ||
4174 error "(9) Fail to repair multiple referenced name entry: $repaired"
4176 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
4177 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
4178 error "(10) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
4180 local cname="$cfid-$pfid-D-0"
4181 ls -ail $MOUNT/.lustre/lost+found/MDT0000/$cname ||
4182 error "(11) .lustre/lost+found/MDT0000/ should not be empty"
4184 run_test 24 "LFSCK can repair multiple-referenced name entry"
4187 [[ $mds1_FSTYPE == ldiskfs ]] || skip "only ldiskfs fixes dirent type"
4188 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4189 skip "MDS older than 2.6.50, LU-5515"
4192 echo "The file type in the name entry does not match the file type"
4193 echo "claimed by the referenced object. Then the LFSCK will update"
4194 echo "the file type in the name entry."
4197 check_mount_and_prep
4199 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4201 echo "Inject failure stub on MDT0 to simulate the case that"
4202 echo "the file type stored in the name entry is wrong."
4204 #define OBD_FAIL_LFSCK_BAD_TYPE 0x1623
4205 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1623
4206 touch $DIR/$tdir/d0/foo || error "(2) Fail to touch $DIR/$tdir/d0/foo"
4207 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4209 echo "Trigger namespace LFSCK to repair bad file type in the name entry"
4210 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace"
4212 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4213 mdd.${MDT_DEV}.lfsck_namespace |
4214 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4216 error "(4) unexpected status"
4219 local repaired=$($SHOW_NAMESPACE |
4220 awk '/^bad_file_type_repaired/ { print $2 }')
4221 [ $repaired -eq 1 ] ||
4222 error "(5) Fail to repair bad file type in name entry: $repaired"
4224 ls -ail $DIR/$tdir/d0 || error "(6) Fail to 'ls' the $DIR/$tdir/d0"
4226 run_test 25 "LFSCK can repair bad file type in the name entry"
4229 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4230 skip "MDS older than 2.6.50, LU-5516"
4233 echo "The local name entry back referenced by the MDT-object is lost."
4234 echo "The namespace LFSCK will add the missing local name entry back"
4235 echo "to the normal namespace."
4238 check_mount_and_prep
4240 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4241 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
4242 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
4244 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
4245 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
4247 echo "Inject failure stub on MDT0 to simulate the case that"
4248 echo "foo's name entry will be removed, but the foo's object"
4249 echo "and its linkEA are kept in the system."
4251 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4252 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
4253 rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
4254 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4256 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
4257 error "(5) 'ls' should fail"
4259 echo "Trigger namespace LFSCK to repair the missing remote name entry"
4260 $START_NAMESPACE -r -A ||
4261 error "(6) Fail to start LFSCK for namespace"
4263 wait_all_targets_blocked namespace completed 7
4265 local repaired=$($SHOW_NAMESPACE |
4266 awk '/^lost_dirent_repaired/ { print $2 }')
4267 [ $repaired -eq 1 ] ||
4268 error "(8) Fail to repair lost dirent: $repaired"
4270 ls -ail $DIR/$tdir/d0/foo ||
4271 error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
4273 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
4274 [ "$foofid" == "$foofid2" ] ||
4275 error "(10) foo's FID changed: $foofid, $foofid2"
4277 run_test 26a "LFSCK can add the missing local name entry back to the namespace"
4280 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4281 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4282 skip "MDS older than 2.6.50, LU-5516"
4285 echo "The remote name entry back referenced by the MDT-object is lost."
4286 echo "The namespace LFSCK will add the missing remote name entry back"
4287 echo "to the normal namespace."
4290 check_mount_and_prep
4292 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4293 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
4294 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
4296 echo "Inject failure stub on MDT0 to simulate the case that"
4297 echo "foo's name entry will be removed, but the foo's object"
4298 echo "and its linkEA are kept in the system."
4300 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4301 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
4302 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
4303 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4305 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
4306 error "(4) 'ls' should fail"
4308 echo "Trigger namespace LFSCK to repair the missing remote name entry"
4309 $START_NAMESPACE -r -A ||
4310 error "(5) Fail to start LFSCK for namespace"
4312 wait_all_targets_blocked namespace completed 6
4314 local repaired=$($SHOW_NAMESPACE |
4315 awk '/^lost_dirent_repaired/ { print $2 }')
4316 [ $repaired -eq 1 ] ||
4317 error "(7) Fail to repair lost dirent: $repaired"
4319 ls -ail $DIR/$tdir/d0/foo ||
4320 error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
4322 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
4323 [ "$foofid" == "$foofid2" ] ||
4324 error "(9) foo's FID changed: $foofid, $foofid2"
4326 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
4329 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
4330 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4331 skip "MDS older than 2.6.50, LU-5516"
4334 echo "The local parent referenced by the MDT-object linkEA is lost."
4335 echo "The namespace LFSCK will re-create the lost parent as orphan."
4338 check_mount_and_prep
4340 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4341 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
4342 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
4343 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
4345 echo "Inject failure stub on MDT0 to simulate the case that"
4346 echo "foo's name entry will be removed, but the foo's object"
4347 echo "and its linkEA are kept in the system. And then remove"
4348 echo "another hard link and the parent directory."
4350 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4351 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
4352 rm -f $DIR/$tdir/d0/foo ||
4353 error "(4) Fail to unlink $DIR/$tdir/d0/foo"
4354 rm -f $DIR/$tdir/d0/dummy ||
4355 error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
4356 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4358 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
4359 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && error "(6) 'ls' should fail"
4361 echo "Trigger namespace LFSCK to repair the lost parent"
4362 $START_NAMESPACE -r -A ||
4363 error "(6) Fail to start LFSCK for namespace"
4365 wait_all_targets_blocked namespace completed 7
4367 local repaired=$($SHOW_NAMESPACE |
4368 awk '/^lost_dirent_repaired/ { print $2 }')
4369 [ $repaired -eq 1 ] ||
4370 error "(8) Fail to repair lost dirent: $repaired"
4372 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
4373 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
4374 error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
4376 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
4378 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
4379 [ ! -z "$cname" ] ||
4380 error "(10) .lustre/lost+found/MDT0000/ should not be empty"
4382 run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
4385 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4386 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
4387 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4388 skip "MDS older than 2.6.50, LU-5516"
4391 echo "The remote parent referenced by the MDT-object linkEA is lost."
4392 echo "The namespace LFSCK will re-create the lost parent as orphan."
4395 check_mount_and_prep
4397 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4398 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
4400 $LFS path2fid $DIR/$tdir/d0
4402 echo "Inject failure stub on MDT0 to simulate the case that"
4403 echo "foo's name entry will be removed, but the foo's object"
4404 echo "and its linkEA are kept in the system. And then remove"
4405 echo "the parent directory."
4407 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4408 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
4409 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
4410 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4412 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
4413 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && error "(5) 'ls' should fail"
4415 echo "Trigger namespace LFSCK to repair the missing remote name entry"
4416 $START_NAMESPACE -r -A ||
4417 error "(6) Fail to start LFSCK for namespace"
4419 wait_all_targets_blocked namespace completed 7
4421 local repaired=$($SHOW_NAMESPACE |
4422 awk '/^lost_dirent_repaired/ { print $2 }')
4423 [ $repaired -eq 1 ] ||
4424 error "(8) Fail to repair lost dirent: $repaired"
4426 ls -ail $MOUNT/.lustre/lost+found/
4428 echo "There should be an orphan under .lustre/lost+found/MDT0001/"
4429 [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
4430 error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
4432 ls -ail $MOUNT/.lustre/lost+found/MDT0001/
4434 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
4435 [ ! -z "$cname" ] ||
4436 error "(10) .lustre/lost+found/MDT0001/ should not be empty"
4438 run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
4441 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4442 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4443 skip "MDS older than 2.6.50, LU-5506"
4446 echo "The target name entry is lost. The LFSCK should insert the"
4447 echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if"
4448 echo "the MDT (on which the orphan MDT-object resides) has ever"
4449 echo "failed to respond some name entry verification during the"
4450 echo "first stage-scanning, then the LFSCK should skip to handle"
4451 echo "orphan MDT-object on this MDT. But other MDTs should not"
4455 check_mount_and_prep
4456 $LFS mkdir -i 0 $DIR/$tdir/d1
4457 $LFS mkdir -i 1 $DIR/$tdir/d1/a1
4458 $LFS mkdir -i 1 $DIR/$tdir/d1/a2
4460 $LFS mkdir -i 1 $DIR/$tdir/d2
4461 $LFS mkdir -i 0 $DIR/$tdir/d2/a1
4462 $LFS mkdir -i 0 $DIR/$tdir/d2/a2
4464 echo "Inject failure stub on MDT0 to simulate the case that"
4465 echo "d1/a1's name entry will be removed, but the d1/a1's object"
4466 echo "and its linkEA are kept in the system. And the case that"
4467 echo "d2/a2's name entry will be removed, but the d2/a2's object"
4468 echo "and its linkEA are kept in the system."
4470 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4471 do_facet mds1 $LCTL set_param fail_loc=0x1624
4472 do_facet mds2 $LCTL set_param fail_loc=0x1624
4473 rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1"
4474 rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2"
4475 do_facet mds1 $LCTL set_param fail_loc=0
4476 do_facet mds2 $LCTL set_param fail_loc=0
4478 cancel_lru_locks mdc
4479 cancel_lru_locks osc
4481 echo "Inject failure, to simulate the MDT0 fail to handle"
4482 echo "MDT1 LFSCK request during the first-stage scanning."
4483 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
4484 do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0
4486 echo "Trigger namespace LFSCK on all devices to find out orphan object"
4487 $START_NAMESPACE -r -A ||
4488 error "(3) Fail to start LFSCK for namespace"
4490 wait_update_facet mds1 "$LCTL get_param -n \
4491 mdd.$(facet_svc mds1).lfsck_namespace |
4492 awk '/^status/ { print \\\$2 }'" "partial" 32 || {
4493 error "(4) mds1 is not the expected 'partial'"
4496 wait_update_facet mds2 "$LCTL get_param -n \
4497 mdd.$(facet_svc mds2).lfsck_namespace |
4498 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4499 error "(5) mds2 is not the expected 'completed'"
4502 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
4504 local repaired=$(do_facet mds1 $LCTL get_param -n \
4505 mdd.$(facet_svc mds1).lfsck_namespace |
4506 awk '/^lost_dirent_repaired/ { print $2 }')
4507 [ $repaired -eq 0 ] ||
4508 error "(6) Expect 0 fixed on mds1, but got: $repaired"
4510 repaired=$(do_facet mds2 $LCTL get_param -n \
4511 mdd.$(facet_svc mds2).lfsck_namespace |
4512 awk '/^lost_dirent_repaired/ { print $2 }')
4513 [ $repaired -eq 1 ] ||
4514 error "(7) Expect 1 fixed on mds2, but got: $repaired"
4516 echo "Trigger namespace LFSCK on all devices again to cleanup"
4517 $START_NAMESPACE -r -A ||
4518 error "(8) Fail to start LFSCK for namespace"
4520 wait_all_targets_blocked namespace completed 9
4522 local repaired=$(do_facet mds1 $LCTL get_param -n \
4523 mdd.$(facet_svc mds1).lfsck_namespace |
4524 awk '/^lost_dirent_repaired/ { print $2 }')
4525 [ $repaired -eq 1 ] ||
4526 error "(10) Expect 1 fixed on mds1, but got: $repaired"
4528 repaired=$(do_facet mds2 $LCTL get_param -n \
4529 mdd.$(facet_svc mds2).lfsck_namespace |
4530 awk '/^lost_dirent_repaired/ { print $2 }')
4531 [ $repaired -eq 0 ] ||
4532 error "(11) Expect 0 fixed on mds2, but got: $repaired"
4534 run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects"
4537 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4538 skip "MDS older than 2.6.50, LU-5517"
4541 echo "The object's nlink attribute is larger than the object's known"
4542 echo "name entries count. The LFSCK will repair the object's nlink"
4543 echo "attribute to match the known name entries count"
4546 check_mount_and_prep
4548 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4549 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
4551 echo "Inject failure stub on MDT0 to simulate the case that foo's"
4552 echo "nlink attribute is larger than its name entries count."
4554 #define OBD_FAIL_LFSCK_MORE_NLINK 0x1625
4555 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1625
4556 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
4557 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
4558 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4560 cancel_lru_locks mdc
4561 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
4562 [ $count -eq 3 ] || error "(4) Cannot inject error: $count"
4564 echo "Trigger namespace LFSCK to repair the nlink count"
4565 $START_NAMESPACE -r -A ||
4566 error "(5) Fail to start LFSCK for namespace"
4568 wait_all_targets_blocked namespace completed 6
4570 local repaired=$($SHOW_NAMESPACE |
4571 awk '/^nlinks_repaired/ { print $2 }')
4572 [ $repaired -eq 1 ] ||
4573 error "(7) Fail to repair nlink count: $repaired"
4575 cancel_lru_locks mdc
4576 count=$(stat --format=%h $DIR/$tdir/d0/foo)
4577 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
4579 # Disable 29a, we only allow nlink to be updated if the known linkEA
4580 # entries is larger than nlink count.
4582 #run_test 29a "LFSCK can repair bad nlink count (1)"
4585 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4586 skip "MDS older than 2.6.50, LU-5517"
4589 echo "The object's nlink attribute is smaller than the object's known"
4590 echo "name entries count. The LFSCK will repair the object's nlink"
4591 echo "attribute to match the known name entries count"
4594 check_mount_and_prep
4596 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
4597 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
4599 echo "Inject failure stub on MDT0 to simulate the case that foo's"
4600 echo "nlink attribute is smaller than its name entries count."
4602 #define OBD_FAIL_LFSCK_LESS_NLINK 0x1626
4603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1626
4604 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
4605 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
4606 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4608 cancel_lru_locks mdc
4609 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
4610 [ $count -eq 1 ] || error "(4) Cannot inject error: $count"
4612 echo "Trigger namespace LFSCK to repair the nlink count"
4613 $START_NAMESPACE -r -A ||
4614 error "(5) Fail to start LFSCK for namespace"
4616 wait_all_targets_blocked namespace completed 6
4618 local repaired=$($SHOW_NAMESPACE |
4619 awk '/^nlinks_repaired/ { print $2 }')
4620 [ $repaired -eq 1 ] ||
4621 error "(7) Fail to repair nlink count: $repaired"
4623 cancel_lru_locks mdc
4624 count=$(stat --format=%h $DIR/$tdir/d0/foo)
4625 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
4627 run_test 29b "LFSCK can repair bad nlink count (2)"
4631 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4632 skip "MDS older than 2.6.50, LU-5517"
4635 echo "The namespace LFSCK will create many hard links to the target"
4636 echo "file as to exceed the linkEA size limitation. Under such case"
4637 echo "the linkEA will be marked as overflow that will prevent the"
4638 echo "target file to be migrated. Then remove some hard links to"
4639 echo "make the left hard links to be held within the linkEA size"
4640 echo "limitation. But before the namespace LFSCK adding all the"
4641 echo "missed linkEA entries back, the overflow mark (timestamp)"
4642 echo "will not be cleared."
4645 check_mount_and_prep
4647 mkdir -p $DIR/$tdir/guard || error "(0.1) Fail to mkdir"
4648 $LFS mkdir -i $((MDSCOUNT - 1)) $DIR/$tdir/foo ||
4649 error "(0.2) Fail to mkdir"
4650 touch $DIR/$tdir/guard/f0 || error "(1) Fail to create"
4651 local oldfid=$($LFS path2fid $DIR/$tdir/guard/f0)
4653 # define MAX_LINKEA_SIZE 4096
4654 # sizeof(link_ea_header) = 24
4655 # sizeof(link_ea_entry) = 18
4656 # nlink_min=$(((MAX_LINKEA_SIZE - sizeof(link_ea_header)) /
4657 # (sizeof(link_ea_entry) + name_length))
4658 # If the average name length is 12 bytes, then 150 hard links
4659 # is totally enough to overflow the linkEA
4660 echo "Create 150 hard links should succeed although the linkEA overflow"
4661 createmany -l $DIR/$tdir/guard/f0 $DIR/$tdir/foo/ttttttttttt 150 ||
4662 error "(2) Fail to hard link"
4664 cancel_lru_locks mdc
4665 if [ $MDSCOUNT -ge 2 ]; then
4666 $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null &&
4667 error "(3.1) Migrate should fail"
4669 echo "The object with linkEA overflow should NOT be migrated"
4670 local newfid=$($LFS path2fid $DIR/$tdir/guard/f0)
4671 [ "$newfid" == "$oldfid" ] ||
4672 error "(3.2) Migrate should fail: $newfid != $oldfid"
4675 # Remove 100 hard links, then the linkEA should have space
4676 # to hold the missed linkEA entries.
4677 echo "Remove 100 hard links to save space for the missed linkEA entries"
4678 unlinkmany $DIR/$tdir/foo/ttttttttttt 100 || error "(4) Fail to unlink"
4680 if [ $MDSCOUNT -ge 2 ]; then
4681 $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null &&
4682 error "(5.1) Migrate should fail"
4684 # The overflow timestamp is still there, so migration will fail.
4685 local newfid=$($LFS path2fid $DIR/$tdir/guard/f0)
4686 [ "$newfid" == "$oldfid" ] ||
4687 error "(5.2) Migrate should fail: $newfid != $oldfid"
4690 # sleep 3 seconds to guarantee that the overflow is recognized
4693 echo "Trigger namespace LFSCK to clear the overflow timestamp"
4694 $START_NAMESPACE -r -A ||
4695 error "(6) Fail to start LFSCK for namespace"
4697 wait_all_targets_blocked namespace completed 7
4699 local repaired=$($SHOW_NAMESPACE |
4700 awk '/^linkea_overflow_cleared/ { print $2 }')
4701 [ $repaired -eq 1 ] ||
4702 error "(8) Fail to clear linkea overflow: $repaired"
4704 repaired=$($SHOW_NAMESPACE |
4705 awk '/^nlinks_repaired/ { print $2 }')
4706 [ $repaired -eq 0 ] ||
4707 error "(9) Unexpected nlink repaired: $repaired"
4709 if [ $MDSCOUNT -ge 2 ]; then
4710 $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null ||
4711 error "(10.1) Migrate failure"
4713 # Migration should succeed after clear the overflow timestamp.
4714 local newfid=$($LFS path2fid $DIR/$tdir/guard/f0)
4715 [ "$newfid" != "$oldfid" ] ||
4716 error "(10.2) Migrate should succeed"
4718 ls -l $DIR/$tdir/foo > /dev/null ||
4719 error "(11) 'ls' failed after migration"
4722 rm -f $DIR/$tdir/guard/f0 || error "(12) Fail to unlink f0"
4723 rm -rf $DIR/$tdir/foo || error "(13) Fail to rmdir foo"
4725 run_test 29c "verify linkEA size limitation"
4728 [[ $mds1_FSTYPE == ldiskfs ]] || skip "only ldiskfs has lost+found"
4729 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
4730 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4731 skip "MDS older than 2.6.50, LU-5518"
4734 echo "The namespace LFSCK will move the orphans from backend"
4735 echo "/lost+found directory to normal client visible namespace"
4736 echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
4739 check_mount_and_prep
4741 $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
4742 touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
4744 echo "Inject failure stub on MDT0 to simulate the case that"
4745 echo "directory d0 has no linkEA entry, then the LFSCK will"
4746 echo "move it into .lustre/lost+found/MDTxxxx/ later."
4748 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
4749 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
4750 mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
4751 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4753 local pfid=$($LFS path2fid $DIR/$tdir/foo)
4754 local cfid=$($LFS path2fid $DIR/$tdir/foo/d0)
4756 touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
4757 mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
4759 echo "Inject failure stub on MDT0 to simulate the case that the"
4760 echo "object's name entry will be removed, but not destroy the"
4761 echo "object. Then backend e2fsck will handle it as orphan and"
4762 echo "add them into the backend /lost+found directory."
4764 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
4765 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
4766 rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
4767 rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
4768 rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
4769 rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
4770 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4772 umount_client $MOUNT || error "(10) Fail to stop client!"
4774 stop $SINGLEMDS || error "(11) Fail to stop MDT0"
4777 run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
4778 error "(12) Fail to run e2fsck"
4780 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
4781 error "(13) Fail to start MDT0"
4783 echo "Trigger namespace LFSCK to recover backend orphans"
4784 $START_NAMESPACE -r -A ||
4785 error "(14) Fail to start LFSCK for namespace"
4787 wait_all_targets_blocked namespace completed 15
4789 local repaired=$($SHOW_NAMESPACE |
4790 awk '/^local_lost_found_moved/ { print $2 }')
4791 [ $repaired -ge 4 ] ||
4792 error "(16) Fail to recover backend orphans: $repaired"
4794 mount_client $MOUNT || error "(17) Fail to start client!"
4796 stat $DIR/$tdir/foo/f0 || error "(18) f0 is not recovered"
4798 ls -ail $MOUNT/.lustre/lost+found/
4800 echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
4801 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
4802 error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
4804 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
4806 local cname=$MOUNT/.lustre/lost+found/MDT0000/${cfid}-${pfid}-D-0
4807 [ ! -z "$cname" ] || error "(20) d0 is not recovered"
4809 stat ${cname}/d1 || error "(21) d1 is not recovered"
4810 stat ${cname}/f1 || error "(22) f1 is not recovered"
4812 run_test 30 "LFSCK can recover the orphans from backend /lost+found"
4815 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4816 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4817 skip "MDS older than 2.6.50, LU-5519"
4820 echo "For the name entry under a striped directory, if the name"
4821 echo "hash does not match the shard, then the LFSCK will repair"
4822 echo "the bad name entry"
4825 check_mount_and_prep
4827 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4828 error "(1) Fail to create striped directory"
4830 echo "Inject failure stub on client to simulate the case that"
4831 echo "some name entry should be inserted into other non-first"
4832 echo "shard, but inserted into the first shard by wrong"
4834 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
4835 $LCTL set_param fail_loc=0x1628 fail_val=0
4836 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
4837 error "(2) Fail to create file under striped directory"
4838 $LCTL set_param fail_loc=0 fail_val=0
4840 echo "Trigger namespace LFSCK to repair bad name hash"
4841 $START_NAMESPACE -r -A ||
4842 error "(3) Fail to start LFSCK for namespace"
4844 wait_all_targets_blocked namespace completed 4
4846 local repaired=$($SHOW_NAMESPACE |
4847 awk '/^name_hash_repaired/ { print $2 }')
4848 [ $repaired -ge 1 ] ||
4849 error "(5) Fail to repair bad name hash: $repaired"
4851 local rc=$($LFS find -H badtype $DIR/$tdir/striped_dir | wc -l)
4853 error "Fail to find flag bad type: $rc"
4855 umount_client $MOUNT || error "(6) umount failed"
4856 mount_client $MOUNT || error "(7) mount failed"
4858 for ((i = 0; i < $MDSCOUNT; i++)); do
4859 stat $DIR/$tdir/striped_dir/d$i ||
4860 error "(8) Fail to stat d$i after LFSCK"
4861 rmdir $DIR/$tdir/striped_dir/d$i ||
4862 error "(9) Fail to unlink d$i after LFSCK"
4865 rmdir $DIR/$tdir/striped_dir ||
4866 error "(10) Fail to remove the striped directory after LFSCK"
4868 run_test 31a "The LFSCK can find/repair the name entry with bad name hash (1)"
4871 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4872 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4873 skip "MDS older than 2.6.50, LU-5519"
4876 echo "For the name entry under a striped directory, if the name"
4877 echo "hash does not match the shard, then the LFSCK will repair"
4878 echo "the bad name entry"
4881 check_mount_and_prep
4883 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4884 error "(1) Fail to create striped directory"
4886 echo "Inject failure stub on client to simulate the case that"
4887 echo "some name entry should be inserted into other non-second"
4888 echo "shard, but inserted into the secod shard by wrong"
4890 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
4891 $LCTL set_param fail_loc=0x1628 fail_val=1
4892 createmany -d $DIR/$tdir/striped_dir/d $((MDSCOUNT * 5)) ||
4893 error "(2) Fail to create file under striped directory"
4894 $LCTL set_param fail_loc=0 fail_val=0
4896 echo "Trigger namespace LFSCK to repair bad name hash"
4897 $START_NAMESPACE -r -A ||
4898 error "(3) Fail to start LFSCK for namespace"
4900 wait_all_targets_blocked namespace completed 4
4902 local repaired=$(do_facet mds2 $LCTL get_param -n \
4903 mdd.$(facet_svc mds2).lfsck_namespace |
4904 awk '/^name_hash_repaired/ { print $2 }')
4905 echo "repaired $repaired name entries with bad hash"
4906 [ $repaired -ge 1 ] ||
4907 error "(5) Fail to repair bad name hash: $repaired"
4909 umount_client $MOUNT || error "(6) umount failed"
4910 mount_client $MOUNT || error "(7) mount failed"
4912 for ((i = 0; i < $((MDSCOUNT * 5)); i++)); do
4913 stat $DIR/$tdir/striped_dir/d$i ||
4914 error "(8) Fail to stat d$i after LFSCK"
4915 rmdir $DIR/$tdir/striped_dir/d$i ||
4916 error "(9) Fail to unlink d$i after LFSCK"
4919 rmdir $DIR/$tdir/striped_dir ||
4920 error "(10) Fail to remove the striped directory after LFSCK"
4922 run_test 31b "The LFSCK can find/repair the name entry with bad name hash (2)"
4925 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4926 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4927 skip "MDS older than 2.6.50, LU-5519"
4930 echo "For some reason, the master MDT-object of the striped directory"
4931 echo "may lost its master LMV EA. If nobody created files under the"
4932 echo "master directly after the master LMV EA lost, then the LFSCK"
4933 echo "should re-generate the master LMV EA."
4936 check_mount_and_prep
4938 echo "Inject failure stub on MDT0 to simulate the case that the"
4939 echo "master MDT-object of the striped directory lost the LMV EA."
4941 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
4942 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
4943 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4944 error "(1) Fail to create striped directory"
4945 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
4947 echo "Trigger namespace LFSCK to re-generate master LMV EA"
4948 $START_NAMESPACE -r -A ||
4949 error "(2) Fail to start LFSCK for namespace"
4951 wait_all_targets_blocked namespace completed 3
4953 local repaired=$($SHOW_NAMESPACE |
4954 awk '/^striped_dirs_repaired/ { print $2 }')
4955 [ $repaired -eq 1 ] ||
4956 error "(4) Fail to re-generate master LMV EA: $repaired"
4958 local rc=$($LFS find -H lostlmv $DIR/$tdir/striped_dir | wc -l)
4959 [ $rc -eq 1 ] || error "Fail to find flag lost LMV: $rc"
4961 umount_client $MOUNT || error "(5) umount failed"
4962 mount_client $MOUNT || error "(6) mount failed"
4964 local empty=$(ls $DIR/$tdir/striped_dir/)
4965 [ -z "$empty" ] || error "(7) The master LMV EA is not repaired: $empty"
4967 rmdir $DIR/$tdir/striped_dir ||
4968 error "(8) Fail to remove the striped directory after LFSCK"
4970 run_test 31c "Re-generate the lost master LMV EA for striped directory"
4973 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
4974 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
4975 skip "MDS older than 2.6.50, LU-5519"
4978 echo "For some reason, the master MDT-object of the striped directory"
4979 echo "may lost its master LMV EA. If somebody created files under the"
4980 echo "master directly after the master LMV EA lost, then the LFSCK"
4981 echo "should NOT re-generate the master LMV EA, instead, it should"
4982 echo "change the broken striped dirctory as read-only to prevent"
4983 echo "further damage"
4986 check_mount_and_prep
4988 echo "Inject failure stub on MDT0 to simulate the case that the"
4989 echo "master MDT-object of the striped directory lost the LMV EA."
4991 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
4992 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
4993 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4994 error "(1) Fail to create striped directory"
4995 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0
4997 umount_client $MOUNT || error "(2) umount failed"
4998 mount_client $MOUNT || error "(3) mount failed"
5000 touch $DIR/$tdir/striped_dir/dummy ||
5001 error "(4) Fail to touch under broken striped directory"
5003 echo "Trigger namespace LFSCK to find out the inconsistency"
5004 $START_NAMESPACE -r -A ||
5005 error "(5) Fail to start LFSCK for namespace"
5007 wait_all_targets_blocked namespace completed 6
5009 local repaired=$($SHOW_NAMESPACE |
5010 awk '/^striped_dirs_repaired/ { print $2 }')
5011 [ $repaired -eq 0 ] ||
5012 error "(7) Re-generate master LMV EA unexpected: $repaired"
5014 stat $DIR/$tdir/striped_dir/dummy ||
5015 error "(8) Fail to stat $DIR/$tdir/striped_dir/dummy"
5017 touch $DIR/$tdir/striped_dir/foo &&
5018 error "(9) The broken striped directory should be read-only"
5020 chattr -i $DIR/$tdir/striped_dir ||
5021 error "(10) Fail to chattr on the broken striped directory"
5023 rmdir $DIR/$tdir/striped_dir ||
5024 error "(11) Fail to remove the striped directory after LFSCK"
5026 run_test 31d "Set broken striped directory (modified after broken) as read-only"
5029 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
5030 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
5031 skip "MDS older than 2.6.50, LU-5519"
5034 echo "For some reason, the slave MDT-object of the striped directory"
5035 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
5036 echo "slave LMV EA."
5039 check_mount_and_prep
5041 echo "Inject failure stub on MDT0 to simulate the case that the"
5042 echo "slave MDT-object (that resides on the same MDT as the master"
5043 echo "MDT-object resides on) lost the LMV EA."
5045 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
5046 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=0
5047 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
5048 error "(1) Fail to create striped directory"
5049 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
5051 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
5052 $START_NAMESPACE -r -A ||
5053 error "(2) Fail to start LFSCK for namespace"
5055 wait_all_targets_blocked namespace completed 3
5057 local repaired=$($SHOW_NAMESPACE |
5058 awk '/^striped_shards_repaired/ { print $2 }')
5059 [ $repaired -eq 1 ] ||
5060 error "(4) Fail to re-generate slave LMV EA: $repaired"
5062 rmdir $DIR/$tdir/striped_dir ||
5063 error "(5) Fail to remove the striped directory after LFSCK"
5065 run_test 31e "Re-generate the lost slave LMV EA for striped directory (1)"
5068 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
5069 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
5070 skip "MDS older than 2.6.50, LU-5519"
5073 echo "For some reason, the slave MDT-object of the striped directory"
5074 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
5075 echo "slave LMV EA."
5078 check_mount_and_prep
5080 echo "Inject failure stub on MDT0 to simulate the case that the"
5081 echo "slave MDT-object (that resides on different MDT as the master"
5082 echo "MDT-object resides on) lost the LMV EA."
5084 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
5085 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=1
5086 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
5087 error "(1) Fail to create striped directory"
5088 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
5090 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
5091 $START_NAMESPACE -r -A ||
5092 error "(2) Fail to start LFSCK for namespace"
5094 wait_all_targets_blocked namespace completed 3
5096 local repaired=$(do_facet mds2 $LCTL get_param -n \
5097 mdd.$(facet_svc mds2).lfsck_namespace |
5098 awk '/^striped_shards_repaired/ { print $2 }')
5099 [ $repaired -eq 1 ] ||
5100 error "(4) Fail to re-generate slave LMV EA: $repaired"
5102 rmdir $DIR/$tdir/striped_dir ||
5103 error "(5) Fail to remove the striped directory after LFSCK"
5105 run_test 31f "Re-generate the lost slave LMV EA for striped directory (2)"
5108 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
5109 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
5110 skip "MDS older than 2.6.50, LU-5519"
5113 echo "For some reason, the stripe index in the slave LMV EA is"
5114 echo "corrupted. The LFSCK should repair the slave LMV EA."
5117 check_mount_and_prep
5119 echo "Inject failure stub on MDT0 to simulate the case that the"
5120 echo "slave LMV EA on the first shard of the striped directory"
5121 echo "claims the same index as the second shard claims"
5123 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
5124 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162b fail_val=0
5125 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
5126 error "(1) Fail to create striped directory"
5127 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
5129 echo "Trigger namespace LFSCK to repair the slave LMV EA"
5130 $START_NAMESPACE -r -A ||
5131 error "(2) Fail to start LFSCK for namespace"
5133 wait_all_targets_blocked namespace completed 3
5135 local repaired=$($SHOW_NAMESPACE |
5136 awk '/^striped_shards_repaired/ { print $2 }')
5137 [ $repaired -eq 1 ] ||
5138 error "(4) Fail to repair slave LMV EA: $repaired"
5140 umount_client $MOUNT || error "(5) umount failed"
5141 mount_client $MOUNT || error "(6) mount failed"
5143 touch $DIR/$tdir/striped_dir/foo ||
5144 error "(7) Fail to touch file after the LFSCK"
5146 rm -f $DIR/$tdir/striped_dir/foo ||
5147 error "(8) Fail to unlink file after the LFSCK"
5149 rmdir $DIR/$tdir/striped_dir ||
5150 error "(9) Fail to remove the striped directory after LFSCK"
5152 run_test 31g "Repair the corrupted slave LMV EA"
5155 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
5156 (( $MDS1_VERSION > $(version_code 2.6.50) )) ||
5157 skip "MDS older than 2.6.50, LU-5519"
5160 echo "For some reason, the shard's name entry in the striped"
5161 echo "directory may be corrupted. The LFSCK should repair the"
5162 echo "bad shard's name entry."
5165 check_mount_and_prep
5167 echo "Inject failure stub on MDT0 to simulate the case that the"
5168 echo "first shard's name entry in the striped directory claims"
5169 echo "the same index as the second shard's name entry claims."
5171 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
5172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162c fail_val=0
5173 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
5174 error "(1) Fail to create striped directory"
5175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
5177 echo "Trigger namespace LFSCK to repair the shard's name entry"
5178 $START_NAMESPACE -r -A ||
5179 error "(2) Fail to start LFSCK for namespace"
5181 wait_all_targets_blocked namespace completed 3
5183 local repaired=$($SHOW_NAMESPACE |
5184 awk '/^dirent_repaired/ { print $2 }')
5185 [ $repaired -eq 1 ] ||
5186 error "(4) Fail to repair shard's name entry: $repaired"
5188 umount_client $MOUNT || error "(5) umount failed"
5189 mount_client $MOUNT || error "(6) mount failed"
5191 touch $DIR/$tdir/striped_dir/foo ||
5192 error "(7) Fail to touch file after the LFSCK"
5194 rm -f $DIR/$tdir/striped_dir/foo ||
5195 error "(8) Fail to unlink file after the LFSCK"
5197 rmdir $DIR/$tdir/striped_dir ||
5198 error "(9) Fail to remove the striped directory after LFSCK"
5200 run_test 31h "Repair the corrupted shard's name entry"
5205 umount_client $MOUNT
5207 #define OBD_FAIL_LFSCK_ENGINE_DELAY 0x162d
5208 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d
5209 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
5211 local STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
5212 [ "$STATUS" == "scanning-phase1" ] ||
5213 error "(2) Expect 'scanning-phase1', but got '$STATUS'"
5216 stop ost1 > /dev/null || error "(3) Fail to stop OST1!"
5218 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
5222 $STOP_LFSCK || error "(4) Fail to stop LFSCK!"
5224 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
5225 error "(5) Fail to start ost1"
5227 run_test 32a "stop LFSCK when some OST failed"
5231 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
5234 $LFS mkdir -i 1 $DIR/$tdir/dp ||
5235 error "(1) Fail to create $DIR/$tdir/dp"
5236 $LFS mkdir -i 0 -c $MDSCOUNT $DIR/$tdir/dp/dc1 ||
5237 error "(2) Fail to create $DIR/$tdir/dp/dc1"
5238 $LFS mkdir -i 0 -c $MDSCOUNT $DIR/$tdir/dp/dc2 ||
5239 error "(3) Fail to create $DIR/$tdir/dp/dc2"
5240 umount_client $MOUNT
5242 #define OBD_FAIL_LFSCK_ENGINE_DELAY 0x162d
5243 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d
5244 $START_NAMESPACE -r -A || error "(4) Fail to start LFSCK for namespace!"
5246 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
5247 mdd.${MDT_DEV}.lfsck_namespace |
5248 awk '/^status/ { print \\\$2 }'" "scanning-phase1" 32 || {
5250 error "(5) unexpected status"
5254 stop mds2 > /dev/null || error "(6) Fail to stop MDT2!"
5256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
5260 $STOP_LFSCK || error "(7) Fail to stop LFSCK!"
5262 start mds2 $(mdsdevname 2) $MOUNT_OPTS_NOSCRUB > /dev/null ||
5263 error "(8) Fail to start MDT2"
5265 run_test 32b "stop LFSCK when some MDT failed"
5271 $START_LAYOUT --dryrun -o -r ||
5272 error "(1) Fail to start layout LFSCK"
5273 wait_all_targets_blocked layout completed 2
5275 local PARAMS=$($SHOW_LAYOUT | awk '/^param/ { print $2 }')
5276 [ "$PARAMS" == "dryrun,all_targets,orphan" ] ||
5277 error "(3) Expect 'dryrun,all_targets,orphan', got '$PARAMS'"
5279 $START_NAMESPACE -e abort -A -r ||
5280 error "(4) Fail to start namespace LFSCK"
5281 wait_all_targets_blocked namespace completed 5
5283 PARAMS=$($SHOW_NAMESPACE | awk '/^param/ { print $2 }')
5284 [ "$PARAMS" == "failout,all_targets" ] ||
5285 error "(6) Expect 'failout,all_targets', got '$PARAMS'"
5287 run_test 33 "check LFSCK paramters"
5291 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
5292 [ "$mds1_FSTYPE" != zfs ] && skip "Only valid for ZFS backend"
5296 #define OBD_FAIL_LFSCK_NO_AGENTOBJ 0x1630
5297 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1630
5298 $LFS mkdir -i 1 $DIR/$tdir/dummy ||
5299 error "(1) Fail to create $DIR/$tdir/dummy"
5301 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
5302 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
5303 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
5304 mdd.${MDT_DEV}.lfsck_namespace |
5305 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
5307 error "(3) unexpected status"
5310 local repaired=$($SHOW_NAMESPACE |
5311 awk '/^dirent_repaired/ { print $2 }')
5312 [ $repaired -eq 1 ] ||
5313 error "(4) Fail to repair the lost agent object: $repaired"
5315 $START_NAMESPACE -r || error "(5) Fail to start LFSCK for namespace!"
5316 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
5317 mdd.${MDT_DEV}.lfsck_namespace |
5318 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
5320 error "(6) unexpected status"
5323 repaired=$($SHOW_NAMESPACE | awk '/^dirent_repaired/ { print $2 }')
5324 [ $repaired -eq 0 ] ||
5325 error "(7) Unexpected repairing: $repaired"
5327 run_test 34 "LFSCK can rebuild the lost agent object"
5331 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
5335 #define OBD_FAIL_LFSCK_NO_AGENTENT 0x1631
5336 do_facet mds2 $LCTL set_param fail_loc=0x1631
5337 $LFS mkdir -i 1 $DIR/$tdir/dummy ||
5338 error "(1) Fail to create $DIR/$tdir/dummy"
5341 do_facet mds2 $LCTL set_param fail_loc=0
5342 $START_NAMESPACE -A -r || error "(2) Fail to start LFSCK for namespace!"
5343 wait_update_facet mds2 "$LCTL get_param -n \
5344 mdd.$(facet_svc mds2).lfsck_namespace |
5345 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
5346 error "(3) MDS${k} is not the expected 'completed'"
5348 local repaired=$(do_facet mds2 $LCTL get_param -n \
5349 mdd.$(facet_svc mds2).lfsck_namespace |
5350 awk '/^agent_entries_repaired/ { print $2 }')
5351 [ $repaired -eq 1 ] ||
5352 error "(4) Fail to repair the lost agent entry: $repaired"
5354 echo "stopall to cleanup object cache"
5357 setupall > /dev/null
5359 $START_NAMESPACE -A -r || error "(5) Fail to start LFSCK for namespace!"
5360 wait_update_facet mds2 "$LCTL get_param -n \
5361 mdd.$(facet_svc mds2).lfsck_namespace |
5362 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
5363 error "(6) MDS${k} is not the expected 'completed'"
5365 repaired=$(do_facet mds2 $LCTL get_param -n \
5366 mdd.$(facet_svc mds2).lfsck_namespace |
5367 awk '/^agent_entries_repaired/ { print $2 }')
5368 [ $repaired -eq 0 ] ||
5369 error "(7) Unexpected repairing: $repaired"
5371 run_test 35 "LFSCK can rebuild the lost agent entry"
5374 [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
5377 echo "The target MDT-object's LOV EA corrupted as to lose one of the "
5378 echo "mirrors information. The layout LFSCK should rebuild the LOV EA "
5379 echo "with the PFID EA of related OST-object(s) belong to the mirror."
5382 check_mount_and_prep
5386 lctl get_param osc.*.*grant*
5387 stack_trap "lfs df $DIR; lfs df -i $DIR; lctl get_param osc.*.*grant*"
5389 $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
5390 -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 ||
5391 error "(0) Fail to create mirror file $DIR/$tdir/f0"
5392 $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
5393 -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f1 ||
5394 error "(1) Fail to create mirror file $DIR/$tdir/f1"
5395 $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
5396 -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f2 ||
5397 error "(2) Fail to create mirror file $DIR/$tdir/f2"
5399 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
5400 error "(3) Fail to write $DIR/$tdir/f0"
5401 dd if=/dev/zero of=$DIR/$tdir/f1 bs=1M count=4 ||
5402 error "(4) Fail to write $DIR/$tdir/f1"
5403 dd if=/dev/zero of=$DIR/$tdir/f2 bs=1M count=4 ||
5404 error "(5) Fail to write $DIR/$tdir/f2"
5406 $LFS mirror resync $DIR/$tdir/f0 ||
5407 error "(6) Fail to resync $DIR/$tdir/f0"
5408 $LFS mirror resync $DIR/$tdir/f1 ||
5409 error "(7) Fail to resync $DIR/$tdir/f1"
5410 $LFS mirror resync $DIR/$tdir/f2 ||
5411 error "(8) Fail to resync $DIR/$tdir/f2"
5413 cancel_lru_locks mdc
5414 cancel_lru_locks osc
5416 $LFS getstripe $DIR/$tdir/f0 ||
5417 error "(9) Fail to getstripe for $DIR/$tdir/f0"
5418 $LFS getstripe $DIR/$tdir/f1 ||
5419 error "(10) Fail to getstripe for $DIR/$tdir/f1"
5420 $LFS getstripe $DIR/$tdir/f2 ||
5421 error "(11) Fail to getstripe for $DIR/$tdir/f2"
5423 echo "Inject failure, to simulate the case of missing one mirror in LOV"
5424 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
5425 do_facet mds1 $LCTL set_param fail_loc=0x1616
5427 $LFS mirror split --mirror-id 1 -d $DIR/$tdir/f0 ||
5428 error "(12) Fail to split 1st mirror from $DIR/$tdir/f0"
5429 $LFS mirror split --mirror-id 2 -d $DIR/$tdir/f1 ||
5430 error "(13) Fail to split 2nd mirror from $DIR/$tdir/f1"
5431 $LFS mirror split --mirror-id 3 -d $DIR/$tdir/f2 ||
5432 error "(14) Fail to split 3rd mirror from $DIR/$tdir/f2"
5436 do_facet mds1 $LCTL set_param fail_loc=0
5438 $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" &&
5439 error "(15) The 1st of mirror is not destroyed"
5440 $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" &&
5441 error "(16) The 2nd of mirror is not destroyed"
5442 $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" &&
5443 error "(17) The 3rd of mirror is not destroyed"
5447 mirrors=$($LFS getstripe -N $DIR/$tdir/f0)
5448 [ $mirrors -eq 2 ] || error "(18) $DIR/$tdir/f0 has $mirrors mirrors"
5449 mirrors=$($LFS getstripe -N $DIR/$tdir/f1)
5450 [ $mirrors -eq 2 ] || error "(19) $DIR/$tdir/f1 has $mirrors mirrors"
5451 mirrors=$($LFS getstripe -N $DIR/$tdir/f2)
5452 [ $mirrors -eq 2 ] || error "(20) $DIR/$tdir/f2 has $mirrors mirrors"
5454 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
5455 $START_LAYOUT -r -o || error "(21) Fail to start LFSCK for layout!"
5457 for k in $(seq $MDSCOUNT); do
5458 # The LFSCK status query internal is 30 seconds. For the case
5459 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
5460 # time to guarantee the status sync up.
5461 wait_update_facet mds${k} "$LCTL get_param -n \
5462 mdd.$(facet_svc mds${k}).lfsck_layout |
5463 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
5464 error "(22) MDS${k} is not the expected 'completed'"
5467 for k in $(seq $OSTCOUNT); do
5468 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
5469 obdfilter.$(facet_svc ost${k}).lfsck_layout |
5470 awk '/^status/ { print $2 }')
5471 [ "$cur_status" == "completed" ] ||
5472 error "(23) OST${k} Expect 'completed', but got '$cur_status'"
5475 local repaired=$(do_facet mds1 $LCTL get_param -n \
5476 mdd.$(facet_svc mds1).lfsck_layout |
5477 awk '/^repaired_orphan/ { print $2 }')
5478 [ $repaired -eq 9 ] ||
5479 error "(24) Expect 9 fixed on mds1, but got: $repaired"
5481 mirrors=$($LFS getstripe -N $DIR/$tdir/f0)
5482 [ $mirrors -eq 3 ] || error "(25) $DIR/$tdir/f0 has $mirrors mirrors"
5483 mirrors=$($LFS getstripe -N $DIR/$tdir/f1)
5484 [ $mirrors -eq 3 ] || error "(26) $DIR/$tdir/f1 has $mirrors mirrors"
5485 mirrors=$($LFS getstripe -N $DIR/$tdir/f2)
5486 [ $mirrors -eq 3 ] || error "(27) $DIR/$tdir/f2 has $mirrors mirrors"
5488 $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" || {
5489 $LFS getstripe $DIR/$tdir/f0
5490 error "(28) The 1st of mirror is not recovered"
5493 $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" || {
5494 $LFS getstripe $DIR/$tdir/f1
5495 error "(29) The 2nd of mirror is not recovered"
5498 $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" || {
5499 $LFS getstripe $DIR/$tdir/f2
5500 error "(30) The 3rd of mirror is not recovered"
5503 run_test 36a "rebuild LOV EA for mirrored file (1)"
5506 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
5507 [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
5510 echo "The mirrored file lost its MDT-object, but relatd OST-objects "
5511 echo "are still there. The layout LFSCK should rebuild the LOV EA "
5512 echo "with the PFID EA of related OST-object(s) belong to the file. "
5515 check_mount_and_prep
5517 $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
5518 -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 ||
5519 error "(0) Fail to create mirror file $DIR/$tdir/f0"
5521 local fid=$($LFS path2fid $DIR/$tdir/f0)
5523 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
5524 error "(1) Fail to write $DIR/$tdir/f0"
5525 $LFS mirror resync $DIR/$tdir/f0 ||
5526 error "(2) Fail to resync $DIR/$tdir/f0"
5528 cancel_lru_locks mdc
5529 cancel_lru_locks osc
5531 $LFS getstripe $DIR/$tdir/f0 ||
5532 error "(3) Fail to getstripe for $DIR/$tdir/f0"
5534 echo "Inject failure, to simulate the case of missing the MDT-object"
5535 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
5536 do_facet mds1 $LCTL set_param fail_loc=0x1616
5537 rm -f $DIR/$tdir/f0 || error "(4) Fail to remove $DIR/$tdir/f0"
5541 do_facet mds1 $LCTL set_param fail_loc=0
5543 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
5544 $START_LAYOUT -r -o || error "(5) Fail to start LFSCK for layout!"
5546 for k in $(seq $MDSCOUNT); do
5547 # The LFSCK status query internal is 30 seconds. For the case
5548 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
5549 # time to guarantee the status sync up.
5550 wait_update_facet mds${k} "$LCTL get_param -n \
5551 mdd.$(facet_svc mds${k}).lfsck_layout |
5552 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
5553 error "(6) MDS${k} is not the expected 'completed'"
5556 for k in $(seq $OSTCOUNT); do
5557 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
5558 obdfilter.$(facet_svc ost${k}).lfsck_layout |
5559 awk '/^status/ { print $2 }')
5560 [ "$cur_status" == "completed" ] ||
5561 error "(7) OST${k} Expect 'completed', but got '$cur_status'"
5564 local count=$(do_facet mds1 $LCTL get_param -n \
5565 mdd.$(facet_svc mds1).lfsck_layout |
5566 awk '/^repaired_orphan/ { print $2 }')
5567 [ $count -eq 9 ] || error "(8) Expect 9 fixed on mds1, but got: $count"
5569 local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0
5570 count=$($LFS getstripe --mirror-count $name)
5571 [ $count -eq 3 ] || error "(9) $DIR/$tdir/f0 has $count mirrors"
5573 count=$($LFS getstripe --component-count $name)
5574 [ $count -eq 6 ] || error "(10) $DIR/$tdir/f0 has $count components"
5576 $LFS getstripe $name | grep "lcme_mirror_id:.*1" || {
5577 $LFS getstripe $name
5578 error "(11) The 1st of mirror is not recovered"
5581 $LFS getstripe $name | grep "lcme_mirror_id:.*2" || {
5582 $LFS getstripe $name
5583 error "(12) The 2nd of mirror is not recovered"
5586 $LFS getstripe $name | grep "lcme_mirror_id:.*3" || {
5587 $LFS getstripe $name
5588 error "(13) The 3rd of mirror is not recovered"
5591 run_test 36b "rebuild LOV EA for mirrored file (2)"
5594 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
5595 [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
5598 echo "The mirrored file has been modified, not resynced yet, then "
5599 echo "lost its MDT-object, but relatd OST-objects are still there. "
5600 echo "The layout LFSCK should rebuild the LOV EA and relatd status "
5601 echo "with the PFID EA of related OST-object(s) belong to the file. "
5604 check_mount_and_prep
5606 $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
5608 error "(0) Fail to create mirror file $DIR/$tdir/f0"
5610 local fid=$($LFS path2fid $DIR/$tdir/f0)
5612 # The 1st dd && resync makes all related OST-objects have been written
5613 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
5614 error "(1.1) Fail to write $DIR/$tdir/f0"
5615 $LFS mirror resync $DIR/$tdir/f0 ||
5616 error "(1.2) Fail to resync $DIR/$tdir/f0"
5617 # The 2nd dd makes one mirror to be stale
5618 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
5619 error "(1.3) Fail to write $DIR/$tdir/f0"
5621 cancel_lru_locks mdc
5622 cancel_lru_locks osc
5624 $LFS getstripe $DIR/$tdir/f0 ||
5625 error "(2) Fail to getstripe for $DIR/$tdir/f0"
5627 local saved_flags1=$($LFS getstripe $DIR/$tdir/f0 | head -n 10 |
5628 awk '/lcme_flags/ { print $2 }')
5629 local saved_flags2=$($LFS getstripe $DIR/$tdir/f0 | tail -n 10 |
5630 awk '/lcme_flags/ { print $2 }')
5632 echo "Inject failure, to simulate the case of missing the MDT-object"
5633 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
5634 do_facet mds1 $LCTL set_param fail_loc=0x1616
5635 rm -f $DIR/$tdir/f0 || error "(3) Fail to remove $DIR/$tdir/f0"
5639 do_facet mds1 $LCTL set_param fail_loc=0
5641 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
5642 $START_LAYOUT -r -o || error "(4) Fail to start LFSCK for layout!"
5644 for k in $(seq $MDSCOUNT); do
5645 # The LFSCK status query internal is 30 seconds. For the case
5646 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
5647 # time to guarantee the status sync up.
5648 wait_update_facet mds${k} "$LCTL get_param -n \
5649 mdd.$(facet_svc mds${k}).lfsck_layout |
5650 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
5651 error "(5) MDS${k} is not the expected 'completed'"
5654 for k in $(seq $OSTCOUNT); do
5655 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
5656 obdfilter.$(facet_svc ost${k}).lfsck_layout |
5657 awk '/^status/ { print $2 }')
5658 [ "$cur_status" == "completed" ] ||
5659 error "(6) OST${k} Expect 'completed', but got '$cur_status'"
5662 local count=$(do_facet mds1 $LCTL get_param -n \
5663 mdd.$(facet_svc mds1).lfsck_layout |
5664 awk '/^repaired_orphan/ { print $2 }')
5665 [ $count -eq 6 ] || error "(7) Expect 9 fixed on mds1, but got: $count"
5667 local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0
5668 count=$($LFS getstripe --mirror-count $name)
5669 [ $count -eq 2 ] || error "(8) $DIR/$tdir/f0 has $count mirrors"
5671 count=$($LFS getstripe --component-count $name)
5672 [ $count -eq 4 ] || error "(9) $DIR/$tdir/f0 has $count components"
5674 local flags=$($LFS getstripe $name | head -n 10 |
5675 awk '/lcme_flags/ { print $2 }')
5676 [ "$flags" == "$saved_flags1" ] || {
5677 $LFS getstripe $name
5678 error "(10) expect flags $saved_flags1, got $flags"
5681 flags=$($LFS getstripe $name | tail -n 10 |
5682 awk '/lcme_flags/ { print $2 }')
5683 [ "$flags" == "$saved_flags2" ] || {
5684 $LFS getstripe $name
5685 error "(11) expect flags $saved_flags2, got $flags"
5688 run_test 36c "rebuild LOV EA for mirrored file (3)"
5694 local t_dir="$DIR/$tdir/d0"
5695 check_mount_and_prep
5697 $LFS mkdir -i 0 $t_dir || error "(2) Fail to mkdir $t_dir on MDT0"
5698 multiop_bg_pause $t_dir D_c || { error "multiop failed: $?"; return 1; }
5702 $START_NAMESPACE -r -A || {
5703 error "(3) Fail to start LFSCK for namespace!"; kill -USR1 $PID; }
5705 wait_all_targets_blocked namespace completed 4
5710 run_test 37 "LFSCK must skip a ORPHAN"
5714 [[ "$MDS1_VERSION" -le $(version_code 2.12.51) ]] &&
5715 skip "Need MDS version newer than 2.12.51"
5717 test_mkdir $DIR/$tdir
5718 local uuid1=$(cat /proc/sys/kernel/random/uuid)
5719 local uuid2=$(cat /proc/sys/kernel/random/uuid)
5721 # create foreign file
5722 $LFS setstripe --foreign=none --flags 0xda05 \
5723 -x "${uuid1}@${uuid2}" $DIR/$tdir/$tfile ||
5724 error "$DIR/$tdir/$tfile: create failed"
5726 $LFS getstripe -v $DIR/$tdir/$tfile |
5727 grep "lfm_magic:.*0x0BD70BD0" ||
5728 error "$DIR/$tdir/$tfile: invalid LOV EA foreign magic"
5729 # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
5730 $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" ||
5731 error "$DIR/$tdir/$tfile: invalid LOV EA foreign size"
5732 $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" ||
5733 error "$DIR/$tdir/$tfile: invalid LOV EA foreign type"
5734 $LFS getstripe -v $DIR/$tdir/$tfile |
5735 grep "lfm_flags:.*0x0000DA05" ||
5736 error "$DIR/$tdir/$tfile: invalid LOV EA foreign flags"
5737 $LFS getstripe $DIR/$tdir/$tfile |
5738 grep "lfm_value:.*${uuid1}@${uuid2}" ||
5739 error "$DIR/$tdir/$tfile: invalid LOV EA foreign value"
5741 # modify striping should fail
5742 $LFS setstripe -c 2 $DIR/$tdir/$tfile &&
5743 error "$DIR/$tdir/$tfile: setstripe should fail"
5745 $START_NAMESPACE -r -A || error "Fail to start LFSCK for namespace"
5747 wait_all_targets_blocked namespace completed 1
5749 # check that "global" namespace_repaired == 0 !!!
5750 local repaired=$(do_facet mds1 \
5751 "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 |
5752 awk '/^namespace_repaired/ { print \\\$2 }'")
5753 [ $repaired -eq 0 ] ||
5754 error "(2) Expect no namespace repair, but got: $repaired"
5756 $START_LAYOUT -A -r || error "Fail to start LFSCK for layout"
5758 wait_all_targets_blocked layout completed 2
5760 # check that "global" layout_repaired == 0 !!!
5761 local repaired=$(do_facet mds1 \
5762 "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 |
5763 awk '/^layout_repaired/ { print \\\$2 }'")
5764 [ $repaired -eq 0 ] ||
5765 error "(2) Expect no layout repair, but got: $repaired"
5767 echo "post-lfsck checks of foreign file"
5769 $LFS getstripe -v $DIR/$tdir/$tfile |
5770 grep "lfm_magic:.*0x0BD70BD0" ||
5771 error "$DIR/$tdir/$tfile: invalid LOV EA foreign magic"
5772 # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
5773 $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" ||
5774 error "$DIR/$tdir/$tfile: invalid LOV EA foreign size"
5775 $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" ||
5776 error "$DIR/$tdir/$tfile: invalid LOV EA foreign type"
5777 $LFS getstripe -v $DIR/$tdir/$tfile |
5778 grep "lfm_flags:.*0x0000DA05" ||
5779 error "$DIR/$tdir/$tfile: invalid LOV EA foreign flags"
5780 $LFS getstripe $DIR/$tdir/$tfile |
5781 grep "lfm_value:.*${uuid1}@${uuid2}" ||
5782 error "$DIR/$tdir/$tfile: invalid LOV EA foreign value"
5784 # modify striping should fail
5785 $LFS setstripe -c 2 $DIR/$tdir/$tfile &&
5786 error "$DIR/$tdir/$tfile: setstripe should fail"
5789 cat $DIR/$tdir/$tfile && "$DIR/$tdir/$tfile: read should fail"
5790 cat /etc/passwd > $DIR/$tdir/$tfile &&
5791 error "$DIR/$tdir/$tfile: write should fail"
5793 #remove foreign file
5794 rm $DIR/$tdir/$tfile ||
5795 error "$DIR/$tdir/$tfile: remove of foreign file has failed"
5797 run_test 38 "LFSCK does not break foreign file and reverse is also true"
5801 [[ "$MDS1_VERSION" -le $(version_code 2.12.51) ]] &&
5802 skip "Need MDS version newer than 2.12.51"
5804 test_mkdir $DIR/$tdir
5805 local uuid1=$(cat /proc/sys/kernel/random/uuid)
5806 local uuid2=$(cat /proc/sys/kernel/random/uuid)
5808 # create foreign dir
5809 $LFS mkdir --foreign=none --xattr="${uuid1}@${uuid2}" --flags=0xda05 \
5810 $DIR/$tdir/${tdir}2 ||
5811 error "$DIR/$tdir/${tdir}2: create failed"
5813 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
5814 grep "lfm_magic:.*0x0CD50CD0" ||
5815 error "$DIR/$tdir/${tdir}2: invalid LMV EA magic"
5816 # lfm_length is LMV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
5817 # - sizeof(lfm_type) - sizeof(lfm_flags)
5818 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" ||
5819 error "$DIR/$tdir/${tdir}2: invalid LMV EA size"
5820 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" ||
5821 error "$DIR/$tdir/${tdir}2: invalid LMV EA type"
5822 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
5823 grep "lfm_flags:.*0x0000DA05" ||
5824 error "$DIR/$tdir/${tdir}2: invalid LMV EA flags"
5825 $LFS getdirstripe $DIR/$tdir/${tdir}2 |
5826 grep "lfm_value.*${uuid1}@${uuid2}" ||
5827 error "$DIR/$tdir/${tdir}2: invalid LMV EA value"
5829 # file create in dir should fail
5830 touch $DIR/$tdir/${tdir}2/$tfile &&
5831 "$DIR/${tdir}2: file create should fail"
5834 chmod 777 $DIR/$tdir/${tdir}2 ||
5835 error "$DIR/${tdir}2: chmod failed"
5838 chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tdir}2 ||
5839 error "$DIR/${tdir}2: chown failed"
5841 $START_NAMESPACE -r -A || error "Fail to start LFSCK for namespace"
5843 wait_all_targets_blocked namespace completed 1
5845 # check that "global" namespace_repaired == 0 !!!
5846 local repaired=$(do_facet mds1 \
5847 "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 |
5848 awk '/^namespace_repaired/ { print \\\$2 }'")
5849 [ $repaired -eq 0 ] ||
5850 error "(2) Expect nothing to be repaired, but got: $repaired"
5852 $START_LAYOUT -A -r || error "Fail to start LFSCK for layout"
5854 wait_all_targets_blocked layout completed 2
5856 # check that "global" layout_repaired == 0 !!!
5857 local repaired=$(do_facet mds1 \
5858 "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 |
5859 awk '/^layout_repaired/ { print \\\$2 }'")
5860 [ $repaired -eq 0 ] ||
5861 error "(2) Expect no layout repair, but got: $repaired"
5863 echo "post-lfsck checks of foreign dir"
5865 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
5866 grep "lfm_magic:.*0x0CD50CD0" ||
5867 error "$DIR/$tdir/${tdir}2: invalid LMV EA magic"
5868 # lfm_length is LMV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
5869 # - sizeof(lfm_type) - sizeof(lfm_flags)
5870 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" ||
5871 error "$DIR/$tdir/${tdir}2: invalid LMV EA size"
5872 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" ||
5873 error "$DIR/$tdir/${tdir}2: invalid LMV EA type"
5874 $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
5875 grep "lfm_flags:.*0x0000DA05" ||
5876 error "$DIR/$tdir/${tdir}2: invalid LMV EA flags"
5877 $LFS getdirstripe $DIR/$tdir/${tdir}2 |
5878 grep "lfm_value.*${uuid1}@${uuid2}" ||
5879 error "$DIR/$tdir/${tdir}2: invalid LMV EA value"
5881 # file create in dir should fail
5882 touch $DIR/$tdir/${tdir}2/$tfile &&
5883 "$DIR/${tdir}2: file create should fail"
5886 chmod 777 $DIR/$tdir/${tdir}2 ||
5887 error "$DIR/${tdir}2: chmod failed"
5890 chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tdir}2 ||
5891 error "$DIR/${tdir}2: chown failed"
5894 rmdir $DIR/$tdir/${tdir}2 ||
5895 error "$DIR/$tdir/${tdir}2: remove of foreign dir has failed"
5897 run_test 39 "LFSCK does not break foreign dir and reverse is also true"
5900 [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs"
5902 check_mount_and_prep
5903 $LFS mkdir -i 1 $DIR/$tdir/dir1
5904 $LFS setstripe -E 1M -c1 -S 1M -E 128M -c2 -S 4M -E eof $DIR/$tdir/dir1
5906 touch $DIR/$tdir/dir1/f1
5907 local layout1=$(get_layout_param $DIR/$tdir/dir1/f1)
5909 echo "Migrate $DIR/$tdir/dir1 from MDT1 to MDT0"
5910 $LFS migrate -m 0 $DIR/$tdir/dir1
5912 echo "trigger LFSCK for layout"
5913 do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t layout -r
5915 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
5916 mdd.${MDT_DEV}.lfsck_layout |
5917 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
5919 error "(2) unexpected status"
5922 local layout2=$(get_layout_param $DIR/$tdir/dir1/f1)
5924 [[ "$layout1" == "$layout2" ]] || error "layout lost after lfsck"
5926 run_test 40a "LFSCK correctly fixes lmm_oi in composite layout"
5928 # restore MDS/OST size
5929 MDSSIZE=${SAVED_MDSSIZE}
5930 OSTSIZE=${SAVED_OSTSIZE}
5931 OSTCOUNT=${SAVED_OSTCOUNT}
5933 # cleanup the system at last
5934 REFORMAT="yes" cleanup_and_setup_lustre
5937 check_and_cleanup_lustre