3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
24 SAVED_MDSSIZE=${MDSSIZE}
25 SAVED_OSTSIZE=${OSTSIZE}
26 SAVED_OSTCOUNT=${OSTCOUNT}
27 # use small MDS + OST size to speed formatting time
28 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
31 # no need too many OSTs, to reduce the format/start/stop overhead
32 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
34 # build up a clean test environment.
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
48 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29 30 31"
51 # DNE does not support striped directory on zfs-based backend yet.
52 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
53 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 31"
57 MDT_DEV="${FSNAME}-MDT0000"
58 OST_DEV="${FSNAME}-OST0000"
59 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
60 START_NAMESPACE="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
62 START_LAYOUT="do_facet $SINGLEMDS \
63 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
64 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
65 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
66 SHOW_NAMESPACE="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
68 SHOW_LAYOUT="do_facet $SINGLEMDS \
69 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
70 SHOW_LAYOUT_ON_OST="do_facet ost1 \
71 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
72 MOUNT_OPTS_SCRUB="-o user_xattr"
73 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
82 echo "preparing... $nfiles * $ndirs files will be created $(date)."
83 if [ ! -z $igif ]; then
84 #define OBD_FAIL_FID_IGIF 0x1504
85 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
88 cp $LUSTRE/tests/*.sh $DIR/$tdir/
89 if [ $ndirs -gt 0 ]; then
90 createmany -d $DIR/$tdir/d $ndirs
91 createmany -m $DIR/$tdir/f $ndirs
92 if [ $nfiles -gt 0 ]; then
93 for ((i = 0; i < $ndirs; i++)); do
94 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
95 /dev/null || error "createmany $nfiles"
98 createmany -d $DIR/$tdir/e $ndirs
101 if [ ! -z $igif ]; then
102 touch $DIR/$tdir/dummy
103 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
106 echo "prepared $(date)."
112 #define OBD_FAIL_LFSCK_DELAY1 0x1600
113 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
114 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
116 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
118 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
119 [ "$STATUS" == "scanning-phase1" ] ||
120 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
122 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
124 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
125 [ "$STATUS" == "stopped" ] ||
126 error "(6) Expect 'stopped', but got '$STATUS'"
128 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
130 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
131 [ "$STATUS" == "scanning-phase1" ] ||
132 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
134 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
135 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
136 mdd.${MDT_DEV}.lfsck_namespace |
137 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
139 error "(9) unexpected status"
142 local repaired=$($SHOW_NAMESPACE |
143 awk '/^updated_phase1/ { print $2 }')
144 [ $repaired -eq 0 ] ||
145 error "(10) Expect nothing to be repaired, but got: $repaired"
147 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
148 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
149 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
150 mdd.${MDT_DEV}.lfsck_namespace |
151 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
153 error "(12) unexpected status"
156 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
157 [ $((scanned1 + 1)) -eq $scanned2 ] ||
158 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
160 echo "stopall, should NOT crash LU-3649"
161 stopall || error "(14) Fail to stopall"
163 run_test 0 "Control LFSCK manually"
166 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
167 skip "OI Scrub not implemented for ZFS" && return
171 #define OBD_FAIL_FID_INDIR 0x1501
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
173 touch $DIR/$tdir/dummy
175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
177 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
178 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
179 mdd.${MDT_DEV}.lfsck_namespace |
180 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
182 error "(4) unexpected status"
185 local repaired=$($SHOW_NAMESPACE |
186 awk '/^dirent_repaired/ { print $2 }')
187 # for interop with old server
188 [ -z "$repaired" ] &&
189 repaired=$($SHOW_NAMESPACE |
190 awk '/^updated_phase1/ { print $2 }')
192 [ $repaired -eq 1 ] ||
193 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
195 mount_client $MOUNT || error "(6) Fail to start client!"
197 #define OBD_FAIL_FID_LOOKUP 0x1505
198 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
199 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
201 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
203 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
207 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
208 skip "OI Scrub not implemented for ZFS" && return
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
221 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
222 mdd.${MDT_DEV}.lfsck_namespace |
223 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
225 error "(4) unexpected status"
228 local repaired=$($SHOW_NAMESPACE |
229 awk '/^dirent_repaired/ { print $2 }')
230 # for interop with old server
231 [ -z "$repaired" ] &&
232 repaired=$($SHOW_NAMESPACE |
233 awk '/^updated_phase1/ { print $2 }')
235 [ $repaired -eq 1 ] ||
236 error "(5) Fail to repair the missing FID-in-LMA: $repaired"
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
239 mount_client $MOUNT || error "(6) Fail to start client!"
241 #define OBD_FAIL_FID_LOOKUP 0x1505
242 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
243 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
245 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
247 run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
252 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
254 touch $DIR/$tdir/dummy
256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
258 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
259 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
260 mdd.${MDT_DEV}.lfsck_namespace |
261 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
263 error "(4) unexpected status"
266 local repaired=$($SHOW_NAMESPACE |
267 awk '/^linkea_repaired/ { print $2 }')
268 # for interop with old server
269 [ -z "$repaired" ] &&
270 repaired=$($SHOW_NAMESPACE |
271 awk '/^updated_phase2/ { print $2 }')
273 [ $repaired -eq 1 ] ||
274 error "(5) Fail to repair crashed linkEA: $repaired"
276 mount_client $MOUNT || error "(6) Fail to start client!"
278 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
279 error "(7) Fail to stat $DIR/$tdir/dummy"
281 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
282 local dummyname=$($LFS fid2path $DIR $dummyfid)
283 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
284 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
286 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
292 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
293 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
294 touch $DIR/$tdir/dummy
296 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
298 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
299 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
300 mdd.${MDT_DEV}.lfsck_namespace |
301 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
303 error "(4) unexpected status"
306 local repaired=$($SHOW_NAMESPACE |
307 awk '/^updated_phase2/ { print $2 }')
308 [ $repaired -eq 1 ] ||
309 error "(5) Fail to repair crashed linkEA: $repaired"
311 mount_client $MOUNT || error "(6) Fail to start client!"
313 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
314 error "(7) Fail to stat $DIR/$tdir/dummy"
316 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
317 local dummyname=$($LFS fid2path $DIR $dummyfid)
318 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
319 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
321 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
327 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
328 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
329 touch $DIR/$tdir/dummy
331 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
333 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
334 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
335 mdd.${MDT_DEV}.lfsck_namespace |
336 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
338 error "(4) unexpected status"
341 local repaired=$($SHOW_NAMESPACE |
342 awk '/^updated_phase2/ { print $2 }')
343 [ $repaired -eq 1 ] ||
344 error "(5) Fail to repair crashed linkEA: $repaired"
346 mount_client $MOUNT || error "(6) Fail to start client!"
348 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
349 error "(7) Fail to stat $DIR/$tdir/dummy"
351 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
352 local dummyname=$($LFS fid2path $DIR $dummyfid)
353 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
354 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
356 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
363 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
364 touch $DIR/$tdir/dummy
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
368 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
369 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
370 mdd.${MDT_DEV}.lfsck_namespace |
371 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
373 error "(4) unexpected status"
376 local repaired=$($SHOW_NAMESPACE |
377 awk '/^linkea_repaired/ { print $2 }')
378 [ $repaired -eq 1 ] ||
379 error "(5) Fail to repair crashed linkEA: $repaired"
381 mount_client $MOUNT || error "(6) Fail to start client!"
383 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
384 error "(7) Fail to stat $DIR/$tdir/dummy"
386 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
387 local dummyname=$($LFS fid2path $DIR $dummyfid)
388 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
389 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
391 run_test 2d "LFSCK can recover the missing linkEA entry"
395 [ $MDSCOUNT -lt 2 ] &&
396 skip "We need at least 2 MDSes for this test" && return
400 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT1"
402 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
404 $LFS mkdir -i 0 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT0"
405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
407 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
408 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
409 mdd.${MDT_DEV}.lfsck_namespace |
410 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
412 error "(4) unexpected status"
415 local repaired=$($SHOW_NAMESPACE |
416 awk '/^linkea_repaired/ { print $2 }')
417 [ $repaired -eq 1 ] ||
418 error "(5) Fail to repair crashed linkEA: $repaired"
420 local fid=$($LFS path2fid $DIR/$tdir/d0/d1)
421 local name=$($LFS fid2path $DIR $fid)
422 [ "$name" == "$DIR/$tdir/d0/d1" ] ||
423 error "(6) Fail to repair linkEA: $fid $name"
425 run_test 2e "namespace LFSCK can verify remote object linkEA"
431 mkdir $DIR/$tdir/dummy || error "(1) Fail to mkdir"
432 ln $DIR/$tdir/d0/f0 $DIR/$tdir/dummy/f0 || error "(2) Fail to hardlink"
433 ln $DIR/$tdir/d0/f1 $DIR/$tdir/dummy/f1 || error "(3) Fail to hardlink"
435 $LFS mkdir -i 0 $DIR/$tdir/edir || error "(4) Fail to mkdir"
436 touch $DIR/$tdir/edir/f0 || error "(5) Fail to touch"
437 touch $DIR/$tdir/edir/f1 || error "(6) Fail to touch"
439 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
440 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
441 ln $DIR/$tdir/edir/f0 $DIR/$tdir/edir/w0 || error "(7) Fail to hardlink"
443 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
444 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
445 ln $DIR/$tdir/edir/f1 $DIR/$tdir/edir/w1 || error "(8) Fail to hardlink"
447 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
449 $START_NAMESPACE -r || error "(9) Fail to start LFSCK for namespace!"
450 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
451 mdd.${MDT_DEV}.lfsck_namespace |
452 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
454 error "(10) unexpected status"
457 local checked=$($SHOW_NAMESPACE |
458 awk '/^checked_phase2/ { print $2 }')
459 [ $checked -ge 4 ] ||
460 error "(11) Fail to check multiple-linked object: $checked"
462 local repaired=$($SHOW_NAMESPACE |
463 awk '/^multiple_linked_repaired/ { print $2 }')
464 [ $repaired -ge 2 ] ||
465 error "(12) Fail to repair multiple-linked object: $repaired"
467 run_test 3 "LFSCK can verify multiple-linked objects"
471 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
472 skip "OI Scrub not implemented for ZFS" && return
475 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
476 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
478 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
479 echo "start $SINGLEMDS with disabling OI scrub"
480 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
481 error "(2) Fail to start MDS!"
483 #define OBD_FAIL_LFSCK_DELAY2 0x1601
484 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
485 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
486 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
487 mdd.${MDT_DEV}.lfsck_namespace |
488 awk '/^flags/ { print \\\$2 }'" "inconsistent" 32 || {
490 error "(5) unexpected status"
493 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
494 [ "$STATUS" == "scanning-phase1" ] ||
495 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
497 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
498 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
499 mdd.${MDT_DEV}.lfsck_namespace |
500 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
502 error "(7) unexpected status"
505 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
506 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
508 local repaired=$($SHOW_NAMESPACE |
509 awk '/^dirent_repaired/ { print $2 }')
510 # for interop with old server
511 [ -z "$repaired" ] &&
512 repaired=$($SHOW_NAMESPACE |
513 awk '/^updated_phase1/ { print $2 }')
515 [ $repaired -ge 9 ] ||
516 error "(9) Fail to re-generate FID-in-dirent: $repaired"
518 mount_client $MOUNT || error "(10) Fail to start client!"
520 #define OBD_FAIL_FID_LOOKUP 0x1505
521 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
522 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
523 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
525 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
529 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
530 skip "OI Scrub not implemented for ZFS" && return
533 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
534 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
536 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
537 echo "start $SINGLEMDS with disabling OI scrub"
538 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
539 error "(2) Fail to start MDS!"
541 #define OBD_FAIL_LFSCK_DELAY2 0x1601
542 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
543 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
544 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
545 mdd.${MDT_DEV}.lfsck_namespace |
546 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 32 || {
548 error "(5) unexpected status"
551 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
552 [ "$STATUS" == "scanning-phase1" ] ||
553 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
555 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
556 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
557 mdd.${MDT_DEV}.lfsck_namespace |
558 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
560 error "(7) unexpected status"
563 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
564 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
566 local repaired=$($SHOW_NAMESPACE |
567 awk '/^dirent_repaired/ { print $2 }')
568 # for interop with old server
569 [ -z "$repaired" ] &&
570 repaired=$($SHOW_NAMESPACE |
571 awk '/^updated_phase1/ { print $2 }')
573 [ $repaired -ge 2 ] ||
574 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
576 mount_client $MOUNT || error "(10) Fail to start client!"
578 #define OBD_FAIL_FID_LOOKUP 0x1505
579 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
580 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
582 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
584 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
585 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
586 local dummyname=$($LFS fid2path $DIR $dummyfid)
587 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
588 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
590 run_test 5 "LFSCK can handle IGIF object upgrading"
595 #define OBD_FAIL_LFSCK_DELAY1 0x1600
596 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
597 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
599 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
600 [ "$STATUS" == "scanning-phase1" ] ||
601 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
603 # Sleep 3 sec to guarantee at least one object processed by LFSCK
605 # Fail the LFSCK to guarantee there is at least one checkpoint
606 #define OBD_FAIL_LFSCK_FATAL1 0x1608
607 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
608 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
609 mdd.${MDT_DEV}.lfsck_namespace |
610 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
612 error "(4) unexpected status"
615 local POS0=$($SHOW_NAMESPACE |
616 awk '/^last_checkpoint_position/ { print $2 }' |
619 #define OBD_FAIL_LFSCK_DELAY1 0x1600
620 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
621 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
623 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
624 [ "$STATUS" == "scanning-phase1" ] ||
625 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
627 local POS1=$($SHOW_NAMESPACE |
628 awk '/^latest_start_position/ { print $2 }' |
630 [[ $POS0 -lt $POS1 ]] ||
631 error "(7) Expect larger than: $POS0, but got $POS1"
633 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
634 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
635 mdd.${MDT_DEV}.lfsck_namespace |
636 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
638 error "(8) unexpected status"
641 run_test 6a "LFSCK resumes from last checkpoint (1)"
646 #define OBD_FAIL_LFSCK_DELAY2 0x1601
647 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
648 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
650 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "scanning-phase1" ] ||
652 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
654 # Sleep 5 sec to guarantee that we are in the directory scanning
656 # Fail the LFSCK to guarantee there is at least one checkpoint
657 #define OBD_FAIL_LFSCK_FATAL2 0x1609
658 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
659 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
660 mdd.${MDT_DEV}.lfsck_namespace |
661 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
663 error "(4) unexpected status"
666 local O_POS0=$($SHOW_NAMESPACE |
667 awk '/^last_checkpoint_position/ { print $2 }' |
670 local D_POS0=$($SHOW_NAMESPACE |
671 awk '/^last_checkpoint_position/ { print $4 }')
673 #define OBD_FAIL_LFSCK_DELAY2 0x1601
674 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
675 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
677 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
678 [ "$STATUS" == "scanning-phase1" ] ||
679 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
681 local O_POS1=$($SHOW_NAMESPACE |
682 awk '/^latest_start_position/ { print $2 }' |
684 local D_POS1=$($SHOW_NAMESPACE |
685 awk '/^latest_start_position/ { print $4 }')
687 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
688 [[ $O_POS0 -lt $O_POS1 ]] ||
689 error "(7.1) $O_POS1 is not larger than $O_POS0"
691 [[ $D_POS0 -lt $D_POS1 ]] ||
692 error "(7.2) $D_POS1 is not larger than $D_POS0"
695 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
696 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
697 mdd.${MDT_DEV}.lfsck_namespace |
698 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
700 error "(8) unexpected status"
703 run_test 6b "LFSCK resumes from last checkpoint (2)"
710 #define OBD_FAIL_LFSCK_DELAY2 0x1601
711 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
712 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
714 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
715 [ "$STATUS" == "scanning-phase1" ] ||
716 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
718 # Sleep 3 sec to guarantee at least one object processed by LFSCK
720 echo "stop $SINGLEMDS"
721 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
723 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
724 echo "start $SINGLEMDS"
725 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
726 error "(5) Fail to start MDS!"
728 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
729 mdd.${MDT_DEV}.lfsck_namespace |
730 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
732 error "(6) unexpected status"
735 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
741 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
743 for ((i = 0; i < 20; i++)); do
744 touch $DIR/$tdir/dummy${i}
747 #define OBD_FAIL_LFSCK_DELAY3 0x1602
748 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
749 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
750 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
751 mdd.${MDT_DEV}.lfsck_namespace |
752 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
754 error "(4) unexpected status"
758 echo "stop $SINGLEMDS"
759 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
761 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
762 echo "start $SINGLEMDS"
763 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
764 error "(6) Fail to start MDS!"
766 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
767 mdd.${MDT_DEV}.lfsck_namespace |
768 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
770 error "(7) unexpected status"
773 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
778 formatall > /dev/null
784 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
785 [ "$STATUS" == "init" ] ||
786 error "(2) Expect 'init', but got '$STATUS'"
788 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
789 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
790 mkdir $DIR/$tdir/crashed
792 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
793 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
794 for ((i = 0; i < 5; i++)); do
795 touch $DIR/$tdir/dummy${i}
798 umount_client $MOUNT || error "(3) Fail to stop client!"
800 #define OBD_FAIL_LFSCK_DELAY2 0x1601
801 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
802 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
804 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
805 [ "$STATUS" == "scanning-phase1" ] ||
806 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
808 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
810 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
811 [ "$STATUS" == "stopped" ] ||
812 error "(7) Expect 'stopped', but got '$STATUS'"
814 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
816 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
817 [ "$STATUS" == "scanning-phase1" ] ||
818 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
820 #define OBD_FAIL_LFSCK_FATAL2 0x1609
821 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
822 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
823 mdd.${MDT_DEV}.lfsck_namespace |
824 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
826 error "(10) unexpected status"
829 #define OBD_FAIL_LFSCK_DELAY1 0x1600
830 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
831 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
833 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
834 [ "$STATUS" == "scanning-phase1" ] ||
835 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
837 #define OBD_FAIL_LFSCK_CRASH 0x160a
838 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
841 echo "stop $SINGLEMDS"
842 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
844 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
845 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
847 echo "start $SINGLEMDS"
848 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
849 error "(14) Fail to start MDS!"
851 local timeout=$(max_recovery_time)
854 while [ $timer -lt $timeout ]; do
855 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
856 mdt.${MDT_DEV}.recovery_status |
857 awk '/^status/ { print \\\$2 }'")
858 [ "$STATUS" != "RECOVERING" ] && break;
863 [ $timer != $timeout ] ||
864 error "(14.1) recovery timeout"
866 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
867 [ "$STATUS" == "crashed" ] ||
868 error "(15) Expect 'crashed', but got '$STATUS'"
870 #define OBD_FAIL_LFSCK_DELAY2 0x1601
871 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
872 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
874 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
875 [ "$STATUS" == "scanning-phase1" ] ||
876 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
878 echo "stop $SINGLEMDS"
879 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
881 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
882 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
884 echo "start $SINGLEMDS"
885 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
886 error "(19) Fail to start MDS!"
889 while [ $timer -lt $timeout ]; do
890 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
891 mdt.${MDT_DEV}.recovery_status |
892 awk '/^status/ { print \\\$2 }'")
893 [ "$STATUS" != "RECOVERING" ] && break;
898 [ $timer != $timeout ] ||
899 error "(19.1) recovery timeout"
901 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
902 [ "$STATUS" == "paused" ] ||
903 error "(20) Expect 'paused', but got '$STATUS'"
905 #define OBD_FAIL_LFSCK_DELAY3 0x1602
906 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
908 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
909 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
910 mdd.${MDT_DEV}.lfsck_namespace |
911 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
913 error "(22) unexpected status"
916 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
917 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
918 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
920 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
921 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
922 mdd.${MDT_DEV}.lfsck_namespace |
923 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
925 error "(24) unexpected status"
928 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
929 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
931 run_test 8 "LFSCK state machine"
934 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
935 skip "Testing on UP system, the speed may be inaccurate."
941 local BASE_SPEED1=100
943 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
946 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
947 [ "$STATUS" == "scanning-phase1" ] ||
948 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
950 local SPEED=$($SHOW_NAMESPACE |
951 awk '/^average_speed_phase1/ { print $2 }')
953 # There may be time error, normally it should be less than 2 seconds.
954 # We allow another 20% schedule error.
956 # MAX_MARGIN = 1.2 = 12 / 10
957 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
958 RUN_TIME1 * 12 / 10))
959 [ $SPEED -lt $MAX_SPEED ] ||
960 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
963 local BASE_SPEED2=300
965 do_facet $SINGLEMDS \
966 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
969 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
970 # MIN_MARGIN = 0.8 = 8 / 10
971 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
972 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
973 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
974 [ $SPEED -gt $MIN_SPEED ] || {
975 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
976 error_ignore LU-5624 \
977 "(5.1) Got speed $SPEED, expected more than $MIN_SPEED"
980 "(5.2) Got speed $SPEED, expected more than $MIN_SPEED"
984 # MAX_MARGIN = 1.2 = 12 / 10
985 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
986 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
987 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
988 [ $SPEED -lt $MAX_SPEED ] ||
989 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
991 do_facet $SINGLEMDS \
992 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
994 wait_update_facet $SINGLEMDS \
995 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
996 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
997 error "(7) Failed to get expected 'completed'"
999 run_test 9a "LFSCK speed control (1)"
1002 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1003 skip "Testing on UP system, the speed may be inaccurate."
1009 echo "Preparing another 50 * 50 files (with error) at $(date)."
1010 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1011 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1012 createmany -d $DIR/$tdir/d 50
1013 createmany -m $DIR/$tdir/f 50
1014 for ((i = 0; i < 50; i++)); do
1015 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
1018 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
1019 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
1020 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
1021 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1022 mdd.${MDT_DEV}.lfsck_namespace |
1023 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
1025 error "(5) unexpected status"
1028 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1029 echo "Prepared at $(date)."
1031 local BASE_SPEED1=50
1033 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
1036 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1037 [ "$STATUS" == "scanning-phase2" ] ||
1038 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
1040 local SPEED=$($SHOW_NAMESPACE |
1041 awk '/^average_speed_phase2/ { print $2 }')
1042 # There may be time error, normally it should be less than 2 seconds.
1043 # We allow another 20% schedule error.
1045 # MAX_MARGIN = 1.2 = 12 / 10
1046 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1047 RUN_TIME1 * 12 / 10))
1048 [ $SPEED -lt $MAX_SPEED ] ||
1049 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
1051 # adjust speed limit
1052 local BASE_SPEED2=150
1054 do_facet $SINGLEMDS \
1055 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1058 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
1059 # MIN_MARGIN = 0.8 = 8 / 10
1060 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1061 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1062 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
1063 [ $SPEED -gt $MIN_SPEED ] || {
1064 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
1065 error_ignore LU-5624 \
1066 "(9.1) Got speed $SPEED, expected more than $MIN_SPEED"
1069 "(9.2) Got speed $SPEED, expected more than $MIN_SPEED"
1073 # MAX_MARGIN = 1.2 = 12 / 10
1074 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1075 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1076 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
1077 [ $SPEED -lt $MAX_SPEED ] ||
1078 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
1080 do_facet $SINGLEMDS \
1081 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1082 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1083 mdd.${MDT_DEV}.lfsck_namespace |
1084 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1086 error "(11) unexpected status"
1089 run_test 9b "LFSCK speed control (2)"
1093 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
1094 skip "lookup(..)/linkea on ZFS issue" && return
1098 echo "Preparing more files with error at $(date)."
1099 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
1100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
1102 for ((i = 0; i < 1000; i = $((i+2)))); do
1103 mkdir -p $DIR/$tdir/d${i}
1104 touch $DIR/$tdir/f${i}
1105 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1108 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1109 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1111 for ((i = 1; i < 1000; i = $((i+2)))); do
1112 mkdir -p $DIR/$tdir/d${i}
1113 touch $DIR/$tdir/f${i}
1114 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1117 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1118 echo "Prepared at $(date)."
1120 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
1122 umount_client $MOUNT
1123 mount_client $MOUNT || error "(3) Fail to start client!"
1125 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
1128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1129 [ "$STATUS" == "scanning-phase1" ] ||
1130 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
1132 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
1134 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
1136 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
1138 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
1140 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
1142 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
1144 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
1146 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1147 error "(14) Fail to softlink!"
1149 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1150 [ "$STATUS" == "scanning-phase1" ] ||
1151 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1153 do_facet $SINGLEMDS \
1154 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1155 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1156 mdd.${MDT_DEV}.lfsck_namespace |
1157 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1159 error "(16) unexpected status"
1162 run_test 10 "System is available during LFSCK scanning"
1165 ost_remove_lastid() {
1168 local rcmd="do_facet ost${ost}"
1170 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1172 # step 1: local mount
1173 mount_fstype ost${ost} || return 1
1174 # step 2: remove the specified LAST_ID
1175 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1177 unmount_fstype ost${ost} || return 2
1181 check_mount_and_prep
1182 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1183 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1188 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1190 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1191 error "(2) Fail to start ost1"
1193 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1194 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1196 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1197 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1199 wait_update_facet ost1 "$LCTL get_param -n \
1200 obdfilter.${OST_DEV}.lfsck_layout |
1201 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1203 error "(5) unexpected status"
1206 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1208 wait_update_facet ost1 "$LCTL get_param -n \
1209 obdfilter.${OST_DEV}.lfsck_layout |
1210 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1212 error "(6) unexpected status"
1215 echo "the LAST_ID(s) should have been rebuilt"
1216 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1217 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1219 run_test 11a "LFSCK can rebuild lost last_id"
1222 check_mount_and_prep
1223 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1225 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1226 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1227 do_facet ost1 $LCTL set_param fail_loc=0x160d
1229 local count=$(precreated_ost_obj_count 0 0)
1231 createmany -o $DIR/$tdir/f $((count + 32))
1233 local proc_path="${FSNAME}-OST0000-osc-MDT0000"
1234 local seq=$(do_facet mds1 $LCTL get_param -n \
1235 osp.${proc_path}.prealloc_last_seq)
1236 local lastid1=$(do_facet ost1 "lctl get_param -n \
1237 obdfilter.${ost1_svc}.last_id" | grep $seq |
1238 awk -F: '{ print $2 }')
1240 umount_client $MOUNT
1241 stop ost1 || error "(1) Fail to stop ost1"
1243 #define OBD_FAIL_OST_ENOSPC 0x215
1244 do_facet ost1 $LCTL set_param fail_loc=0x215
1246 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1247 error "(2) Fail to start ost1"
1249 for ((i = 0; i < 60; i++)); do
1250 lastid2=$(do_facet ost1 "lctl get_param -n \
1251 obdfilter.${ost1_svc}.last_id" | grep $seq |
1252 awk -F: '{ print $2 }')
1253 [ ! -z $lastid2 ] && break;
1257 echo "the on-disk LAST_ID should be smaller than the expected one"
1258 [ $lastid1 -gt $lastid2 ] ||
1259 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1261 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1262 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1264 wait_update_facet ost1 "$LCTL get_param -n \
1265 obdfilter.${OST_DEV}.lfsck_layout |
1266 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1268 error "(6) unexpected status"
1271 stop ost1 || error "(7) Fail to stop ost1"
1273 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1274 error "(8) Fail to start ost1"
1276 echo "the on-disk LAST_ID should have been rebuilt"
1277 wait_update_facet ost1 "$LCTL get_param -n \
1278 obdfilter.${ost1_svc}.last_id | grep $seq |
1279 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1280 do_facet ost1 $LCTL get_param -n \
1281 obdfilter.${ost1_svc}.last_id
1282 error "(9) expect lastid1 $seq:$lastid1"
1285 do_facet ost1 $LCTL set_param fail_loc=0
1286 stopall || error "(10) Fail to stopall"
1288 run_test 11b "LFSCK can rebuild crashed last_id"
1291 [ $MDSCOUNT -lt 2 ] &&
1292 skip "We need at least 2 MDSes for test_12" && return
1294 check_mount_and_prep
1295 for k in $(seq $MDSCOUNT); do
1296 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1297 createmany -o $DIR/$tdir/${k}/f 100 ||
1298 error "(0) Fail to create 100 files."
1301 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1302 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1303 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1305 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1306 for k in $(seq $MDSCOUNT); do
1307 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1308 mdd.$(facet_svc mds${k}).lfsck_namespace |
1309 awk '/^status/ { print $2 }')
1310 [ "$STATUS" == "scanning-phase1" ] ||
1311 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1314 echo "Stop namespace LFSCK on all targets by single lctl command."
1315 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1316 error "(4) Fail to stop LFSCK on all devices!"
1318 echo "All the LFSCK targets should be in 'stopped' status."
1319 for k in $(seq $MDSCOUNT); do
1320 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1321 mdd.$(facet_svc mds${k}).lfsck_namespace |
1322 awk '/^status/ { print $2 }')
1323 [ "$STATUS" == "stopped" ] ||
1324 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1327 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1328 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1329 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1331 echo "All the LFSCK targets should be in 'completed' status."
1332 for k in $(seq $MDSCOUNT); do
1333 wait_update_facet mds${k} "$LCTL get_param -n \
1334 mdd.$(facet_svc mds${k}).lfsck_namespace |
1335 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1336 error "(7) MDS${k} is not the expected 'completed'"
1339 start_full_debug_logging
1341 echo "Start layout LFSCK on all targets by single command (-s 1)."
1342 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1343 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1345 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1346 for k in $(seq $MDSCOUNT); do
1347 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1348 mdd.$(facet_svc mds${k}).lfsck_layout |
1349 awk '/^status/ { print $2 }')
1350 [ "$STATUS" == "scanning-phase1" ] ||
1351 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1354 echo "Stop layout LFSCK on all targets by single lctl command."
1355 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1356 error "(10) Fail to stop LFSCK on all devices!"
1358 echo "All the LFSCK targets should be in 'stopped' status."
1359 for k in $(seq $MDSCOUNT); do
1360 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1361 mdd.$(facet_svc mds${k}).lfsck_layout |
1362 awk '/^status/ { print $2 }')
1363 [ "$STATUS" == "stopped" ] ||
1364 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1367 for k in $(seq $OSTCOUNT); do
1368 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1369 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1370 awk '/^status/ { print $2 }')
1371 [ "$STATUS" == "stopped" ] ||
1372 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1375 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1376 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1377 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1379 echo "All the LFSCK targets should be in 'completed' status."
1380 for k in $(seq $MDSCOUNT); do
1381 # The LFSCK status query internal is 30 seconds. For the case
1382 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1383 # time to guarantee the status sync up.
1384 wait_update_facet mds${k} "$LCTL get_param -n \
1385 mdd.$(facet_svc mds${k}).lfsck_layout |
1386 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1387 error "(14) MDS${k} is not the expected 'completed'"
1390 stop_full_debug_logging
1392 run_test 12 "single command to trigger LFSCK on all devices"
1396 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1397 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1398 echo "MDT-object FID."
1401 check_mount_and_prep
1403 echo "Inject failure stub to simulate bad lmm_oi"
1404 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1406 createmany -o $DIR/$tdir/f 32
1407 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1409 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1410 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1412 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1413 mdd.${MDT_DEV}.lfsck_layout |
1414 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1416 error "(2) unexpected status"
1419 local repaired=$($SHOW_LAYOUT |
1420 awk '/^repaired_others/ { print $2 }')
1421 [ $repaired -eq 32 ] ||
1422 error "(3) Fail to repair crashed lmm_oi: $repaired"
1424 run_test 13 "LFSCK can repair crashed lmm_oi"
1428 echo "The OST-object referenced by the MDT-object should be there;"
1429 echo "otherwise, the LFSCK should re-create the missing OST-object."
1432 check_mount_and_prep
1433 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1435 echo "Inject failure stub to simulate dangling referenced MDT-object"
1436 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1437 do_facet ost1 $LCTL set_param fail_loc=0x1610
1438 local count=$(precreated_ost_obj_count 0 0)
1440 createmany -o $DIR/$tdir/f $((count + 31))
1441 touch $DIR/$tdir/guard
1442 do_facet ost1 $LCTL set_param fail_loc=0
1444 start_full_debug_logging
1446 # exhaust other pre-created dangling cases
1447 count=$(precreated_ost_obj_count 0 0)
1448 createmany -o $DIR/$tdir/a $count ||
1449 error "(0) Fail to create $count files."
1451 echo "'ls' should fail because of dangling referenced MDT-object"
1452 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1454 echo "Trigger layout LFSCK to find out dangling reference"
1455 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1457 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1458 mdd.${MDT_DEV}.lfsck_layout |
1459 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1461 error "(3) unexpected status"
1464 local repaired=$($SHOW_LAYOUT |
1465 awk '/^repaired_dangling/ { print $2 }')
1466 [ $repaired -ge 32 ] ||
1467 error "(4) Fail to repair dangling reference: $repaired"
1469 echo "'stat' should fail because of not repair dangling by default"
1470 stat $DIR/$tdir/guard > /dev/null 2>&1 && error "(5) stat should fail"
1472 echo "Trigger layout LFSCK to repair dangling reference"
1473 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1475 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1476 mdd.${MDT_DEV}.lfsck_layout |
1477 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1479 error "(7) unexpected status"
1482 # There may be some async LFSCK updates in processing, wait for
1483 # a while until the target reparation has been done. LU-4970.
1485 echo "'stat' should success after layout LFSCK repairing"
1486 wait_update_facet client "stat $DIR/$tdir/guard |
1487 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1488 stat $DIR/$tdir/guard
1490 error "(8) unexpected size"
1493 repaired=$($SHOW_LAYOUT |
1494 awk '/^repaired_dangling/ { print $2 }')
1495 [ $repaired -ge 32 ] ||
1496 error "(9) Fail to repair dangling reference: $repaired"
1498 stop_full_debug_logging
1500 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1504 echo "If the OST-object referenced by the MDT-object back points"
1505 echo "to some non-exist MDT-object, then the LFSCK should repair"
1506 echo "the OST-object to back point to the right MDT-object."
1509 check_mount_and_prep
1510 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1512 echo "Inject failure stub to make the OST-object to back point to"
1513 echo "non-exist MDT-object."
1514 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1516 do_facet ost1 $LCTL set_param fail_loc=0x1611
1517 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1518 cancel_lru_locks osc
1519 do_facet ost1 $LCTL set_param fail_loc=0
1521 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1522 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1524 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1525 mdd.${MDT_DEV}.lfsck_layout |
1526 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1528 error "(2) unexpected status"
1531 local repaired=$($SHOW_LAYOUT |
1532 awk '/^repaired_unmatched_pair/ { print $2 }')
1533 [ $repaired -eq 1 ] ||
1534 error "(3) Fail to repair unmatched pair: $repaired"
1536 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1540 echo "If the OST-object referenced by the MDT-object back points"
1541 echo "to other MDT-object that doesn't recognize the OST-object,"
1542 echo "then the LFSCK should repair it to back point to the right"
1543 echo "MDT-object (the first one)."
1546 check_mount_and_prep
1547 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1548 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1549 cancel_lru_locks osc
1551 echo "Inject failure stub to make the OST-object to back point to"
1552 echo "other MDT-object"
1554 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1555 do_facet ost1 $LCTL set_param fail_loc=0x1612
1556 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1557 cancel_lru_locks osc
1558 do_facet ost1 $LCTL set_param fail_loc=0
1560 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1561 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1563 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1564 mdd.${MDT_DEV}.lfsck_layout |
1565 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1567 error "(2) unexpected status"
1570 local repaired=$($SHOW_LAYOUT |
1571 awk '/^repaired_unmatched_pair/ { print $2 }')
1572 [ $repaired -eq 1 ] ||
1573 error "(3) Fail to repair unmatched pair: $repaired"
1575 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1578 [ $MDSCOUNT -lt 2 ] &&
1579 skip "We need at least 2 MDSes for this test" && return
1582 echo "According to current metadata migration implementation,"
1583 echo "before the old MDT-object is removed, both the new MDT-object"
1584 echo "and old MDT-object will reference the same LOV layout. Then if"
1585 echo "the layout LFSCK finds the new MDT-object by race, it will"
1586 echo "regard related OST-object(s) as multiple referenced case, and"
1587 echo "will try to create new OST-object(s) for the new MDT-object."
1588 echo "To avoid such trouble, the layout LFSCK needs to lock the old"
1589 echo "MDT-object before confirm the multiple referenced case."
1592 check_mount_and_prep
1593 $LFS mkdir -i 1 $DIR/$tdir/a1
1594 $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
1595 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=1
1596 cancel_lru_locks osc
1598 echo "Inject failure stub on MDT1 to delay the migration"
1600 #define OBD_FAIL_MIGRATE_DELAY 0x1803
1601 do_facet mds2 $LCTL set_param fail_val=5 fail_loc=0x1803
1602 echo "Migrate $DIR/$tdir/a1 from MDT1 to MDT0 with delay"
1603 $LFS mv -M 0 $DIR/$tdir/a1 &
1606 echo "Trigger layout LFSCK to race with the migration"
1607 $START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!"
1609 for k in $(seq $MDSCOUNT); do
1610 # The LFSCK status query internal is 30 seconds. For the case
1611 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1612 # time to guarantee the status sync up.
1613 wait_update_facet mds${k} "$LCTL get_param -n \
1614 mdd.$(facet_svc mds${k}).lfsck_layout |
1615 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1616 error "(2) MDS${k} is not the expected 'completed'"
1619 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
1620 local repaired=$($SHOW_LAYOUT |
1621 awk '/^repaired_unmatched_pair/ { print $2 }')
1622 [ $repaired -eq 1 ] ||
1623 error "(3) Fail to repair unmatched pair: $repaired"
1625 repaired=$($SHOW_LAYOUT |
1626 awk '/^repaired_multiple_referenced/ { print $2 }')
1627 [ $repaired -eq 0 ] ||
1628 error "(4) Unexpectedly repaird multiple references: $repaired"
1630 run_test 15c "LFSCK can repair unmatched MDT-object/OST-object pairs (3)"
1634 echo "If the OST-object's owner information does not match the owner"
1635 echo "information stored in the MDT-object, then the LFSCK trust the"
1636 echo "MDT-object and update the OST-object's owner information."
1639 check_mount_and_prep
1640 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1641 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1642 cancel_lru_locks osc
1644 echo "Inject failure stub to skip OST-object owner changing"
1645 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1646 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1647 chown 1.1 $DIR/$tdir/f0
1648 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1650 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1653 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1655 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1656 mdd.${MDT_DEV}.lfsck_layout |
1657 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1659 error "(2) unexpected status"
1662 local repaired=$($SHOW_LAYOUT |
1663 awk '/^repaired_inconsistent_owner/ { print $2 }')
1664 [ $repaired -eq 1 ] ||
1665 error "(3) Fail to repair inconsistent owner: $repaired"
1667 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1671 echo "If more than one MDT-objects reference the same OST-object,"
1672 echo "and the OST-object only recognizes one MDT-object, then the"
1673 echo "LFSCK should create new OST-objects for such non-recognized"
1677 check_mount_and_prep
1678 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1680 echo "Inject failure stub to make two MDT-objects to refernce"
1681 echo "the OST-object"
1683 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1684 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1686 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1687 cancel_lru_locks osc
1689 createmany -o $DIR/$tdir/f 1
1691 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1693 cancel_lru_locks mdc
1694 cancel_lru_locks osc
1696 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1697 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1698 [ $size -eq 1048576 ] ||
1699 error "(1) f0 (wrong) size should be 1048576, but got $size"
1701 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1704 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1706 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1707 mdd.${MDT_DEV}.lfsck_layout |
1708 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1710 error "(3) unexpected status"
1713 local repaired=$($SHOW_LAYOUT |
1714 awk '/^repaired_multiple_referenced/ { print $2 }')
1715 [ $repaired -eq 1 ] ||
1716 error "(4) Fail to repair multiple references: $repaired"
1718 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1719 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1720 error "(5) Fail to write f0."
1721 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1722 [ $size -eq 1048576 ] ||
1723 error "(6) guard size should be 1048576, but got $size"
1725 run_test 17 "LFSCK can repair multiple references"
1727 $LCTL set_param debug=+cache > /dev/null
1731 echo "The target MDT-object is there, but related stripe information"
1732 echo "is lost or partly lost. The LFSCK should regenerate the missing"
1733 echo "layout EA entries."
1736 check_mount_and_prep
1737 $LFS mkdir -i 0 $DIR/$tdir/a1
1738 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1739 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1741 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1743 $LFS path2fid $DIR/$tdir/a1/f1
1744 $LFS getstripe $DIR/$tdir/a1/f1
1746 if [ $MDSCOUNT -ge 2 ]; then
1747 $LFS mkdir -i 1 $DIR/$tdir/a2
1748 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1749 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1750 $LFS path2fid $DIR/$tdir/a2/f2
1751 $LFS getstripe $DIR/$tdir/a2/f2
1754 cancel_lru_locks osc
1756 echo "Inject failure, to make the MDT-object lost its layout EA"
1757 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1758 do_facet mds1 $LCTL set_param fail_loc=0x1615
1759 chown 1.1 $DIR/$tdir/a1/f1
1761 if [ $MDSCOUNT -ge 2 ]; then
1762 do_facet mds2 $LCTL set_param fail_loc=0x1615
1763 chown 1.1 $DIR/$tdir/a2/f2
1769 do_facet mds1 $LCTL set_param fail_loc=0
1770 if [ $MDSCOUNT -ge 2 ]; then
1771 do_facet mds2 $LCTL set_param fail_loc=0
1774 cancel_lru_locks mdc
1775 cancel_lru_locks osc
1777 echo "The file size should be incorrect since layout EA is lost"
1778 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1779 [ "$cur_size" != "$saved_size" ] ||
1780 error "(1) Expect incorrect file1 size"
1782 if [ $MDSCOUNT -ge 2 ]; then
1783 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1784 [ "$cur_size" != "$saved_size" ] ||
1785 error "(2) Expect incorrect file2 size"
1788 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1789 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1791 for k in $(seq $MDSCOUNT); do
1792 # The LFSCK status query internal is 30 seconds. For the case
1793 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1794 # time to guarantee the status sync up.
1795 wait_update_facet mds${k} "$LCTL get_param -n \
1796 mdd.$(facet_svc mds${k}).lfsck_layout |
1797 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1798 error "(4) MDS${k} is not the expected 'completed'"
1801 for k in $(seq $OSTCOUNT); do
1802 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1803 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1804 awk '/^status/ { print $2 }')
1805 [ "$cur_status" == "completed" ] ||
1806 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1809 local repaired=$(do_facet mds1 $LCTL get_param -n \
1810 mdd.$(facet_svc mds1).lfsck_layout |
1811 awk '/^repaired_orphan/ { print $2 }')
1812 [ $repaired -eq 1 ] ||
1813 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1815 if [ $MDSCOUNT -ge 2 ]; then
1816 repaired=$(do_facet mds2 $LCTL get_param -n \
1817 mdd.$(facet_svc mds2).lfsck_layout |
1818 awk '/^repaired_orphan/ { print $2 }')
1819 [ $repaired -eq 2 ] ||
1820 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1823 $LFS path2fid $DIR/$tdir/a1/f1
1824 $LFS getstripe $DIR/$tdir/a1/f1
1826 if [ $MDSCOUNT -ge 2 ]; then
1827 $LFS path2fid $DIR/$tdir/a2/f2
1828 $LFS getstripe $DIR/$tdir/a2/f2
1831 echo "The file size should be correct after layout LFSCK scanning"
1832 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1833 [ "$cur_size" == "$saved_size" ] ||
1834 error "(7) Expect file1 size $saved_size, but got $cur_size"
1836 if [ $MDSCOUNT -ge 2 ]; then
1837 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1838 [ "$cur_size" == "$saved_size" ] ||
1839 error "(8) Expect file2 size $saved_size, but got $cur_size"
1842 run_test 18a "Find out orphan OST-object and repair it (1)"
1846 echo "The target MDT-object is lost. The LFSCK should re-create the"
1847 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1848 echo "can move it back to normal namespace manually."
1851 check_mount_and_prep
1852 $LFS mkdir -i 0 $DIR/$tdir/a1
1853 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1854 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1855 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1856 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1858 $LFS getstripe $DIR/$tdir/a1/f1
1860 if [ $MDSCOUNT -ge 2 ]; then
1861 $LFS mkdir -i 1 $DIR/$tdir/a2
1862 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1863 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1864 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1866 $LFS getstripe $DIR/$tdir/a2/f2
1869 cancel_lru_locks osc
1871 echo "Inject failure, to simulate the case of missing the MDT-object"
1872 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1873 do_facet mds1 $LCTL set_param fail_loc=0x1616
1874 rm -f $DIR/$tdir/a1/f1
1876 if [ $MDSCOUNT -ge 2 ]; then
1877 do_facet mds2 $LCTL set_param fail_loc=0x1616
1878 rm -f $DIR/$tdir/a2/f2
1884 do_facet mds1 $LCTL set_param fail_loc=0
1885 if [ $MDSCOUNT -ge 2 ]; then
1886 do_facet mds2 $LCTL set_param fail_loc=0
1889 cancel_lru_locks mdc
1890 cancel_lru_locks osc
1892 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1893 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1895 for k in $(seq $MDSCOUNT); do
1896 # The LFSCK status query internal is 30 seconds. For the case
1897 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1898 # time to guarantee the status sync up.
1899 wait_update_facet mds${k} "$LCTL get_param -n \
1900 mdd.$(facet_svc mds${k}).lfsck_layout |
1901 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1902 error "(2) MDS${k} is not the expected 'completed'"
1905 for k in $(seq $OSTCOUNT); do
1906 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1907 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1908 awk '/^status/ { print $2 }')
1909 [ "$cur_status" == "completed" ] ||
1910 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1913 local repaired=$(do_facet mds1 $LCTL get_param -n \
1914 mdd.$(facet_svc mds1).lfsck_layout |
1915 awk '/^repaired_orphan/ { print $2 }')
1916 [ $repaired -eq 1 ] ||
1917 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1919 if [ $MDSCOUNT -ge 2 ]; then
1920 repaired=$(do_facet mds2 $LCTL get_param -n \
1921 mdd.$(facet_svc mds2).lfsck_layout |
1922 awk '/^repaired_orphan/ { print $2 }')
1923 [ $repaired -eq 2 ] ||
1924 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1927 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1928 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1929 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1931 if [ $MDSCOUNT -ge 2 ]; then
1932 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1933 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1936 $LFS path2fid $DIR/$tdir/a1/f1
1937 $LFS getstripe $DIR/$tdir/a1/f1
1939 if [ $MDSCOUNT -ge 2 ]; then
1940 $LFS path2fid $DIR/$tdir/a2/f2
1941 $LFS getstripe $DIR/$tdir/a2/f2
1944 echo "The file size should be correct after layout LFSCK scanning"
1945 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1946 [ "$cur_size" == "$saved_size" ] ||
1947 error "(7) Expect file1 size $saved_size, but got $cur_size"
1949 if [ $MDSCOUNT -ge 2 ]; then
1950 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1951 [ "$cur_size" == "$saved_size" ] ||
1952 error "(8) Expect file2 size $saved_size, but got $cur_size"
1955 run_test 18b "Find out orphan OST-object and repair it (2)"
1959 echo "The target MDT-object is lost, and the OST-object FID is missing."
1960 echo "The LFSCK should re-create the MDT-object with new FID under the "
1961 echo "directory .lustre/lost+found/MDTxxxx."
1964 check_mount_and_prep
1965 $LFS mkdir -i 0 $DIR/$tdir/a1
1966 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1968 echo "Inject failure, to simulate the case of missing parent FID"
1969 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1970 do_facet ost1 $LCTL set_param fail_loc=0x1617
1972 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1973 $LFS getstripe $DIR/$tdir/a1/f1
1975 if [ $MDSCOUNT -ge 2 ]; then
1976 $LFS mkdir -i 1 $DIR/$tdir/a2
1977 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a2
1978 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1979 $LFS getstripe $DIR/$tdir/a2/f2
1982 cancel_lru_locks osc
1984 echo "Inject failure, to simulate the case of missing the MDT-object"
1985 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1986 do_facet mds1 $LCTL set_param fail_loc=0x1616
1987 rm -f $DIR/$tdir/a1/f1
1989 if [ $MDSCOUNT -ge 2 ]; then
1990 do_facet mds2 $LCTL set_param fail_loc=0x1616
1991 rm -f $DIR/$tdir/a2/f2
1997 do_facet mds1 $LCTL set_param fail_loc=0
1998 if [ $MDSCOUNT -ge 2 ]; then
1999 do_facet mds2 $LCTL set_param fail_loc=0
2002 cancel_lru_locks mdc
2003 cancel_lru_locks osc
2005 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2006 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2008 for k in $(seq $MDSCOUNT); do
2009 # The LFSCK status query internal is 30 seconds. For the case
2010 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2011 # time to guarantee the status sync up.
2012 wait_update_facet mds${k} "$LCTL get_param -n \
2013 mdd.$(facet_svc mds${k}).lfsck_layout |
2014 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2015 error "(2) MDS${k} is not the expected 'completed'"
2018 for k in $(seq $OSTCOUNT); do
2019 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2020 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2021 awk '/^status/ { print $2 }')
2022 [ "$cur_status" == "completed" ] ||
2023 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2026 if [ $MDSCOUNT -ge 2 ]; then
2032 local repaired=$(do_facet mds1 $LCTL get_param -n \
2033 mdd.$(facet_svc mds1).lfsck_layout |
2034 awk '/^repaired_orphan/ { print $2 }')
2035 [ $repaired -eq $expected ] ||
2036 error "(4) Expect $expected fixed on mds1, but got: $repaired"
2038 if [ $MDSCOUNT -ge 2 ]; then
2039 repaired=$(do_facet mds2 $LCTL get_param -n \
2040 mdd.$(facet_svc mds2).lfsck_layout |
2041 awk '/^repaired_orphan/ { print $2 }')
2042 [ $repaired -eq 0 ] ||
2043 error "(5) Expect 0 fixed on mds2, but got: $repaired"
2046 ls -ail $MOUNT/.lustre/lost+found/
2048 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
2049 if [ -d $MOUNT/.lustre/lost+found/MDT0001 ]; then
2050 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-N-*)
2052 error "(6) .lustre/lost+found/MDT0001/ should be empty"
2055 echo "There should be some stub under .lustre/lost+found/MDT0000/"
2056 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2057 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2059 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-N-*)
2060 [ ! -z "$cname" ] ||
2061 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2063 run_test 18c "Find out orphan OST-object and repair it (3)"
2067 echo "The target MDT-object layout EA slot is occpuied by some new"
2068 echo "created OST-object when repair dangling reference case. Such"
2069 echo "conflict OST-object has never been modified. Then when found"
2070 echo "the orphan OST-object, LFSCK will replace it with the orphan"
2074 check_mount_and_prep
2076 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2077 echo "guard" > $DIR/$tdir/a1/f1
2078 echo "foo" > $DIR/$tdir/a1/f2
2079 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2080 $LFS path2fid $DIR/$tdir/a1/f1
2081 $LFS getstripe $DIR/$tdir/a1/f1
2082 $LFS path2fid $DIR/$tdir/a1/f2
2083 $LFS getstripe $DIR/$tdir/a1/f2
2084 cancel_lru_locks osc
2086 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2087 echo "to reference the same OST-object (which is f1's OST-obejct)."
2088 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2089 echo "dangling reference case, but f2's old OST-object is there."
2092 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2093 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2094 chown 1.1 $DIR/$tdir/a1/f2
2095 rm -f $DIR/$tdir/a1/f1
2098 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2100 echo "stopall to cleanup object cache"
2103 setupall > /dev/null
2105 echo "The file size should be incorrect since dangling referenced"
2106 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2107 [ "$cur_size" != "$saved_size" ] ||
2108 error "(1) Expect incorrect file2 size"
2110 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2111 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
2113 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2114 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2116 wait_update_facet mds1 "$LCTL get_param -n \
2117 mdd.$(facet_svc mds1).lfsck_layout |
2118 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2119 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
2121 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2123 for k in $(seq $MDSCOUNT); do
2124 # The LFSCK status query internal is 30 seconds. For the case
2125 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2126 # time to guarantee the status sync up.
2127 wait_update_facet mds${k} "$LCTL get_param -n \
2128 mdd.$(facet_svc mds${k}).lfsck_layout |
2129 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2130 error "(3) MDS${k} is not the expected 'completed'"
2133 for k in $(seq $OSTCOUNT); do
2134 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2135 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2136 awk '/^status/ { print $2 }')
2137 [ "$cur_status" == "completed" ] ||
2138 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
2141 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2142 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2143 awk '/^repaired_orphan/ { print $2 }')
2144 [ $repaired -eq 1 ] ||
2145 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
2147 echo "The file size should be correct after layout LFSCK scanning"
2148 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2149 [ "$cur_size" == "$saved_size" ] ||
2150 error "(6) Expect file2 size $saved_size, but got $cur_size"
2152 echo "The LFSCK should find back the original data."
2153 cat $DIR/$tdir/a1/f2
2154 $LFS path2fid $DIR/$tdir/a1/f2
2155 $LFS getstripe $DIR/$tdir/a1/f2
2157 run_test 18d "Find out orphan OST-object and repair it (4)"
2161 echo "The target MDT-object layout EA slot is occpuied by some new"
2162 echo "created OST-object when repair dangling reference case. Such"
2163 echo "conflict OST-object has been modified by others. To keep the"
2164 echo "new data, the LFSCK will create a new file to refernece this"
2165 echo "old orphan OST-object."
2168 check_mount_and_prep
2170 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2171 echo "guard" > $DIR/$tdir/a1/f1
2172 echo "foo" > $DIR/$tdir/a1/f2
2173 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2174 $LFS path2fid $DIR/$tdir/a1/f1
2175 $LFS getstripe $DIR/$tdir/a1/f1
2176 $LFS path2fid $DIR/$tdir/a1/f2
2177 $LFS getstripe $DIR/$tdir/a1/f2
2178 cancel_lru_locks osc
2180 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2181 echo "to reference the same OST-object (which is f1's OST-obejct)."
2182 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2183 echo "dangling reference case, but f2's old OST-object is there."
2186 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2187 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2188 chown 1.1 $DIR/$tdir/a1/f2
2189 rm -f $DIR/$tdir/a1/f1
2192 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2194 echo "stopall to cleanup object cache"
2197 setupall > /dev/null
2199 echo "The file size should be incorrect since dangling referenced"
2200 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2201 [ "$cur_size" != "$saved_size" ] ||
2202 error "(1) Expect incorrect file2 size"
2204 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2205 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
2207 start_full_debug_logging
2209 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2210 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2212 wait_update_facet mds1 "$LCTL get_param -n \
2213 mdd.$(facet_svc mds1).lfsck_layout |
2214 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2215 error "(3) MDS1 is not the expected 'scanning-phase2'"
2217 # to guarantee all updates are synced.
2221 echo "Write new data to f2 to modify the new created OST-object."
2222 echo "dummy" >> $DIR/$tdir/a1/f2
2224 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2226 for k in $(seq $MDSCOUNT); do
2227 # The LFSCK status query internal is 30 seconds. For the case
2228 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2229 # time to guarantee the status sync up.
2230 wait_update_facet mds${k} "$LCTL get_param -n \
2231 mdd.$(facet_svc mds${k}).lfsck_layout |
2232 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2233 error "(4) MDS${k} is not the expected 'completed'"
2236 for k in $(seq $OSTCOUNT); do
2237 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2238 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2239 awk '/^status/ { print $2 }')
2240 [ "$cur_status" == "completed" ] ||
2241 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2244 stop_full_debug_logging
2246 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2247 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2248 awk '/^repaired_orphan/ { print $2 }')
2249 [ $repaired -eq 1 ] ||
2250 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2252 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2253 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2254 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2256 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-*)
2257 [ ! -z "$cname" ] ||
2258 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2260 echo "The stub file should keep the original f2 data"
2261 cur_size=$(ls -il $cname | awk '{ print $6 }')
2262 [ "$cur_size" == "$saved_size" ] ||
2263 error "(9) Expect file2 size $saved_size, but got $cur_size"
2266 $LFS path2fid $cname
2267 $LFS getstripe $cname
2269 echo "The f2 should contains new data."
2270 cat $DIR/$tdir/a1/f2
2271 $LFS path2fid $DIR/$tdir/a1/f2
2272 $LFS getstripe $DIR/$tdir/a1/f2
2274 run_test 18e "Find out orphan OST-object and repair it (5)"
2277 [ $OSTCOUNT -lt 2 ] &&
2278 skip "The test needs at least 2 OSTs" && return
2281 echo "The target MDT-object is lost. The LFSCK should re-create the"
2282 echo "MDT-object under .lustre/lost+found/MDTxxxx. If some OST fail"
2283 echo "to verify some OST-object(s) during the first stage-scanning,"
2284 echo "the LFSCK should skip orphan OST-objects for such OST. Others"
2285 echo "should not be affected."
2288 check_mount_and_prep
2289 $LFS mkdir -i 0 $DIR/$tdir/a1
2290 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2291 dd if=/dev/zero of=$DIR/$tdir/a1/guard bs=1M count=2
2292 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2293 $LFS mkdir -i 0 $DIR/$tdir/a2
2294 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a2
2295 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2296 $LFS getstripe $DIR/$tdir/a1/f1
2297 $LFS getstripe $DIR/$tdir/a2/f2
2299 if [ $MDSCOUNT -ge 2 ]; then
2300 $LFS mkdir -i 1 $DIR/$tdir/a3
2301 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a3
2302 dd if=/dev/zero of=$DIR/$tdir/a3/guard bs=1M count=2
2303 dd if=/dev/zero of=$DIR/$tdir/a3/f3 bs=1M count=2
2304 $LFS mkdir -i 1 $DIR/$tdir/a4
2305 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a4
2306 dd if=/dev/zero of=$DIR/$tdir/a4/f4 bs=1M count=2
2307 $LFS getstripe $DIR/$tdir/a3/f3
2308 $LFS getstripe $DIR/$tdir/a4/f4
2311 cancel_lru_locks osc
2313 echo "Inject failure, to simulate the case of missing the MDT-object"
2314 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2315 do_facet mds1 $LCTL set_param fail_loc=0x1616
2316 rm -f $DIR/$tdir/a1/f1
2317 rm -f $DIR/$tdir/a2/f2
2319 if [ $MDSCOUNT -ge 2 ]; then
2320 do_facet mds2 $LCTL set_param fail_loc=0x1616
2321 rm -f $DIR/$tdir/a3/f3
2322 rm -f $DIR/$tdir/a4/f4
2328 do_facet mds1 $LCTL set_param fail_loc=0
2329 if [ $MDSCOUNT -ge 2 ]; then
2330 do_facet mds2 $LCTL set_param fail_loc=0
2333 cancel_lru_locks mdc
2334 cancel_lru_locks osc
2336 echo "Inject failure, to simulate the OST0 fail to handle"
2337 echo "MDT0 LFSCK request during the first-stage scanning."
2338 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
2339 do_facet mds1 $LCTL set_param fail_loc=0x161c fail_val=0
2341 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2342 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2344 for k in $(seq $MDSCOUNT); do
2345 # The LFSCK status query internal is 30 seconds. For the case
2346 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2347 # time to guarantee the status sync up.
2348 wait_update_facet mds${k} "$LCTL get_param -n \
2349 mdd.$(facet_svc mds${k}).lfsck_layout |
2350 awk '/^status/ { print \\\$2 }'" "partial" $LTIME ||
2351 error "(2) MDS${k} is not the expected 'partial'"
2354 wait_update_facet ost1 "$LCTL get_param -n \
2355 obdfilter.$(facet_svc ost1).lfsck_layout |
2356 awk '/^status/ { print \\\$2 }'" "partial" $LTIME || {
2357 error "(3) OST1 is not the expected 'partial'"
2360 wait_update_facet ost2 "$LCTL get_param -n \
2361 obdfilter.$(facet_svc ost2).lfsck_layout |
2362 awk '/^status/ { print \\\$2 }'" "completed" $LTIME || {
2363 error "(4) OST2 is not the expected 'completed'"
2366 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2368 local repaired=$(do_facet mds1 $LCTL get_param -n \
2369 mdd.$(facet_svc mds1).lfsck_layout |
2370 awk '/^repaired_orphan/ { print $2 }')
2371 [ $repaired -eq 1 ] ||
2372 error "(5) Expect 1 fixed on mds{1}, but got: $repaired"
2374 if [ $MDSCOUNT -ge 2 ]; then
2375 repaired=$(do_facet mds2 $LCTL get_param -n \
2376 mdd.$(facet_svc mds2).lfsck_layout |
2377 awk '/^repaired_orphan/ { print $2 }')
2378 [ $repaired -eq 1 ] ||
2379 error "(6) Expect 1 fixed on mds{2}, but got: $repaired"
2382 echo "Trigger layout LFSCK on all devices again to cleanup"
2383 $START_LAYOUT -r -o || error "(7) Fail to start LFSCK for layout!"
2385 for k in $(seq $MDSCOUNT); do
2386 # The LFSCK status query internal is 30 seconds. For the case
2387 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2388 # time to guarantee the status sync up.
2389 wait_update_facet mds${k} "$LCTL get_param -n \
2390 mdd.$(facet_svc mds${k}).lfsck_layout |
2391 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2392 error "(8) MDS${k} is not the expected 'completed'"
2395 for k in $(seq $OSTCOUNT); do
2396 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2397 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2398 awk '/^status/ { print $2 }')
2399 [ "$cur_status" == "completed" ] ||
2400 error "(9) OST${k} Expect 'completed', but got '$cur_status'"
2404 local repaired=$(do_facet mds1 $LCTL get_param -n \
2405 mdd.$(facet_svc mds1).lfsck_layout |
2406 awk '/^repaired_orphan/ { print $2 }')
2407 [ $repaired -eq 2 ] ||
2408 error "(10) Expect 2 fixed on mds{1}, but got: $repaired"
2410 if [ $MDSCOUNT -ge 2 ]; then
2411 repaired=$(do_facet mds2 $LCTL get_param -n \
2412 mdd.$(facet_svc mds2).lfsck_layout |
2413 awk '/^repaired_orphan/ { print $2 }')
2414 [ $repaired -eq 2 ] ||
2415 error "(11) Expect 2 fixed on mds{2}, but got: $repaired"
2418 run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
2420 $LCTL set_param debug=-cache > /dev/null
2423 check_mount_and_prep
2424 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2426 echo "foo" > $DIR/$tdir/a0
2427 echo "guard" > $DIR/$tdir/a1
2428 cancel_lru_locks osc
2430 echo "Inject failure, then client will offer wrong parent FID when read"
2431 do_facet ost1 $LCTL set_param -n \
2432 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2433 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2434 $LCTL set_param fail_loc=0x1619
2436 echo "Read RPC with wrong parent FID should be denied"
2437 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2438 $LCTL set_param fail_loc=0
2440 run_test 19a "OST-object inconsistency self detect"
2443 check_mount_and_prep
2444 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2446 echo "Inject failure stub to make the OST-object to back point to"
2447 echo "non-exist MDT-object"
2449 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2450 do_facet ost1 $LCTL set_param fail_loc=0x1611
2451 echo "foo" > $DIR/$tdir/f0
2452 cancel_lru_locks osc
2453 do_facet ost1 $LCTL set_param fail_loc=0
2455 echo "Nothing should be fixed since self detect and repair is disabled"
2456 local repaired=$(do_facet ost1 $LCTL get_param -n \
2457 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2458 awk '/^repaired/ { print $2 }')
2459 [ $repaired -eq 0 ] ||
2460 error "(1) Expected 0 repaired, but got $repaired"
2462 echo "Read RPC with right parent FID should be accepted,"
2463 echo "and cause parent FID on OST to be fixed"
2465 do_facet ost1 $LCTL set_param -n \
2466 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2467 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2469 repaired=$(do_facet ost1 $LCTL get_param -n \
2470 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2471 awk '/^repaired/ { print $2 }')
2472 [ $repaired -eq 1 ] ||
2473 error "(3) Expected 1 repaired, but got $repaired"
2475 run_test 19b "OST-object inconsistency self repair"
2478 [ $OSTCOUNT -lt 2 ] &&
2479 skip "The test needs at least 2 OSTs" && return
2482 echo "The target MDT-object and some of its OST-object are lost."
2483 echo "The LFSCK should find out the left OST-objects and re-create"
2484 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2485 echo "with the partial OST-objects (LOV EA hole)."
2487 echo "New client can access the file with LOV EA hole via normal"
2488 echo "system tools or commands without crash the system."
2490 echo "For old client, even though it cannot access the file with"
2491 echo "LOV EA hole, it should not cause the system crash."
2494 check_mount_and_prep
2495 $LFS mkdir -i 0 $DIR/$tdir/a1
2496 if [ $OSTCOUNT -gt 2 ]; then
2497 $LFS setstripe -c 3 -i 0 -S 1M $DIR/$tdir/a1
2500 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a1
2504 # 256 blocks on the stripe0.
2505 # 1 block on the stripe1 for 2 OSTs case.
2506 # 256 blocks on the stripe1 for other cases.
2507 # 1 block on the stripe2 if OSTs > 2
2508 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2509 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2510 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2512 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2513 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2514 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2517 $LFS getstripe $DIR/$tdir/a1/f0
2519 $LFS getstripe $DIR/$tdir/a1/f1
2521 $LFS getstripe $DIR/$tdir/a1/f2
2523 if [ $OSTCOUNT -gt 2 ]; then
2524 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2525 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2527 $LFS getstripe $DIR/$tdir/a1/f3
2530 cancel_lru_locks osc
2532 echo "Inject failure..."
2533 echo "To simulate f0 lost MDT-object"
2534 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2535 do_facet mds1 $LCTL set_param fail_loc=0x1616
2536 rm -f $DIR/$tdir/a1/f0
2538 echo "To simulate f1 lost MDT-object and OST-object0"
2539 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2540 do_facet mds1 $LCTL set_param fail_loc=0x161a
2541 rm -f $DIR/$tdir/a1/f1
2543 echo "To simulate f2 lost MDT-object and OST-object1"
2544 do_facet mds1 $LCTL set_param fail_val=1
2545 rm -f $DIR/$tdir/a1/f2
2547 if [ $OSTCOUNT -gt 2 ]; then
2548 echo "To simulate f3 lost MDT-object and OST-object2"
2549 do_facet mds1 $LCTL set_param fail_val=2
2550 rm -f $DIR/$tdir/a1/f3
2553 umount_client $MOUNT
2556 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2558 echo "Inject failure to slow down the LFSCK on OST0"
2559 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2560 do_facet ost1 $LCTL set_param fail_loc=0x161b
2562 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2563 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2566 do_facet ost1 $LCTL set_param fail_loc=0
2568 for k in $(seq $MDSCOUNT); do
2569 # The LFSCK status query internal is 30 seconds. For the case
2570 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2571 # time to guarantee the status sync up.
2572 wait_update_facet mds${k} "$LCTL get_param -n \
2573 mdd.$(facet_svc mds${k}).lfsck_layout |
2574 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2575 error "(2) MDS${k} is not the expected 'completed'"
2578 for k in $(seq $OSTCOUNT); do
2579 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2580 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2581 awk '/^status/ { print $2 }')
2582 [ "$cur_status" == "completed" ] ||
2583 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2586 local repaired=$(do_facet mds1 $LCTL get_param -n \
2587 mdd.$(facet_svc mds1).lfsck_layout |
2588 awk '/^repaired_orphan/ { print $2 }')
2589 if [ $OSTCOUNT -gt 2 ]; then
2590 [ $repaired -eq 9 ] ||
2591 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2593 [ $repaired -eq 4 ] ||
2594 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2597 mount_client $MOUNT || error "(5.0) Fail to start client!"
2599 LOV_PATTERN_F_HOLE=0x40000000
2602 # ${fid0}-R-0 is the old f0
2604 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2605 echo "Check $name, which is the old f0"
2607 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2609 local pattern=0x$($LFS getstripe -L $name)
2610 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2611 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2613 local stripes=$($LFS getstripe -c $name)
2614 if [ $OSTCOUNT -gt 2 ]; then
2615 [ $stripes -eq 3 ] ||
2616 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2618 [ $stripes -eq 2 ] ||
2619 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2622 local size=$(stat $name | awk '/Size:/ { print $2 }')
2623 [ $size -eq $((4096 * $bcount)) ] ||
2624 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2626 cat $name > /dev/null || error "(5.5) cannot read $name"
2628 echo "dummy" >> $name || error "(5.6) cannot write $name"
2630 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2632 touch $name || error "(5.8) cannot touch $name"
2634 rm -f $name || error "(5.9) cannot unlink $name"
2637 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2639 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2640 if [ $OSTCOUNT -gt 2 ]; then
2641 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2643 echo "Check $name, it contains the old f1's stripe1"
2646 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2648 pattern=0x$($LFS getstripe -L $name)
2649 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2650 error "(6.2) expect pattern flag hole, but got $pattern"
2652 stripes=$($LFS getstripe -c $name)
2653 if [ $OSTCOUNT -gt 2 ]; then
2654 [ $stripes -eq 3 ] ||
2655 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2657 [ $stripes -eq 2 ] ||
2658 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2661 size=$(stat $name | awk '/Size:/ { print $2 }')
2662 [ $size -eq $((4096 * $bcount)) ] ||
2663 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2665 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2667 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2668 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2671 [ $failures -eq 256 ] ||
2672 error "(6.6) expect 256 IO failures, but get $failures"
2674 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2675 [ $size -eq $((4096 * $bcount)) ] ||
2676 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2678 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2679 error "(6.8) write to the LOV EA hole should fail"
2681 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2682 error "(6.9) write to normal stripe should NOT fail"
2684 echo "foo" >> $name && error "(6.10) append write $name should fail"
2686 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2688 touch $name || error "(6.12) cannot touch $name"
2690 rm -f $name || error "(6.13) cannot unlink $name"
2693 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2695 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2696 if [ $OSTCOUNT -gt 2 ]; then
2697 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2699 echo "Check $name, it contains the old f2's stripe0"
2702 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2704 pattern=0x$($LFS getstripe -L $name)
2705 stripes=$($LFS getstripe -c $name)
2706 size=$(stat $name | awk '/Size:/ { print $2 }')
2707 if [ $OSTCOUNT -gt 2 ]; then
2708 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2709 error "(7.2.1) expect pattern flag hole, but got $pattern"
2711 [ $stripes -eq 3 ] ||
2712 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2714 [ $size -eq $((4096 * $bcount)) ] ||
2715 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2717 cat $name > /dev/null &&
2718 error "(7.5.1) normal read $name should fail"
2720 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2721 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2723 [ $failures -eq 256 ] ||
2724 error "(7.6) expect 256 IO failures, but get $failures"
2726 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2727 [ $size -eq $((4096 * $bcount)) ] ||
2728 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2730 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2731 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2733 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2734 error "(7.8.1) write to normal stripe should NOT fail"
2736 echo "foo" >> $name &&
2737 error "(7.8.3) append write $name should fail"
2739 chown $RUNAS_ID:$RUNAS_GID $name ||
2740 error "(7.9.1) cannot chown on $name"
2742 touch $name || error "(7.10.1) cannot touch $name"
2744 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2745 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2747 [ $stripes -eq 1 ] ||
2748 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2751 [ $size -eq $((4096 * (256 + 0))) ] ||
2752 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2754 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2756 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2758 chown $RUNAS_ID:$RUNAS_GID $name ||
2759 error "(7.9.2) cannot chown on $name"
2761 touch $name || error "(7.10.2) cannot touch $name"
2764 rm -f $name || error "(7.11) cannot unlink $name"
2766 [ $OSTCOUNT -le 2 ] && return
2769 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2771 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2772 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2774 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2776 pattern=0x$($LFS getstripe -L $name)
2777 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2778 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2780 stripes=$($LFS getstripe -c $name)
2781 # LFSCK does not know the old f3 had 3 stripes.
2782 # It only tries to find as much as possible.
2783 # The stripe count depends on the last stripe's offset.
2784 [ $stripes -eq 2 ] ||
2785 error "(8.3) expect the stripe count is 2, but got $stripes"
2787 size=$(stat $name | awk '/Size:/ { print $2 }')
2789 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2790 error "(8.4) expect the size $((4096 * 512)), but got $size"
2792 cat $name > /dev/null || error "(8.5) cannot read $name"
2794 echo "dummy" >> $name || error "(8.6) cannot write $name"
2796 chown $RUNAS_ID:$RUNAS_GID $name ||
2797 error "(8.7) cannot chown on $name"
2799 touch $name || error "(8.8) cannot touch $name"
2801 rm -f $name || error "(8.9) cannot unlink $name"
2803 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2806 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2807 skip "ignore the test if MDS is older than 2.5.59" && return
2809 check_mount_and_prep
2810 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2812 echo "Start all LFSCK components by default (-s 1)"
2813 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2814 error "Fail to start LFSCK"
2816 echo "namespace LFSCK should be in 'scanning-phase1' status"
2817 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2818 [ "$STATUS" == "scanning-phase1" ] ||
2819 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2821 echo "layout LFSCK should be in 'scanning-phase1' status"
2822 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2823 [ "$STATUS" == "scanning-phase1" ] ||
2824 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2826 echo "Stop all LFSCK components by default"
2827 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2828 error "Fail to stop LFSCK"
2830 run_test 21 "run all LFSCK components by default"
2833 [ $MDSCOUNT -lt 2 ] &&
2834 skip "We need at least 2 MDSes for this test" && return
2837 echo "The parent_A references the child directory via some name entry,"
2838 echo "but the child directory back references another parent_B via its"
2839 echo "".." name entry. The parent_B does not exist. Then the namespace"
2840 echo "LFSCK will repair the child directory's ".." name entry."
2843 check_mount_and_prep
2845 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2846 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2848 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2849 echo "The dummy's dotdot name entry references the guard."
2850 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2851 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2852 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2853 error "(3) Fail to mkdir on MDT0"
2854 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2856 rmdir $DIR/$tdir/guard || error "(4) Fail to rmdir $DIR/$tdir/guard"
2858 echo "Trigger namespace LFSCK to repair unmatched pairs"
2859 $START_NAMESPACE -A -r ||
2860 error "(5) Fail to start LFSCK for namespace"
2862 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2863 mdd.${MDT_DEV}.lfsck_namespace |
2864 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2866 error "(6) unexpected status"
2869 local repaired=$($SHOW_NAMESPACE |
2870 awk '/^unmatched_pairs_repaired/ { print $2 }')
2871 [ $repaired -eq 1 ] ||
2872 error "(7) Fail to repair unmatched pairs: $repaired"
2874 echo "'ls' should success after namespace LFSCK repairing"
2875 ls -ail $DIR/$tdir/foo/dummy > /dev/null ||
2876 error "(8) ls should success."
2878 run_test 22a "LFSCK can repair unmatched pairs (1)"
2881 [ $MDSCOUNT -lt 2 ] &&
2882 skip "We need at least 2 MDSes for this test" && return
2885 echo "The parent_A references the child directory via the name entry_B,"
2886 echo "but the child directory back references another parent_C via its"
2887 echo "".." name entry. The parent_C exists, but there is no the name"
2888 echo "entry_B under the parent_C. Then the namespace LFSCK will repair"
2889 echo "the child directory's ".." name entry and its linkEA."
2892 check_mount_and_prep
2894 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2895 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2897 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2898 echo "and bad linkEA. The dummy's dotdot name entry references the"
2899 echo "guard. The dummy's linkEA references n non-exist name entry."
2900 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2901 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2902 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2903 error "(3) Fail to mkdir on MDT0"
2904 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2906 local dummyfid=$($LFS path2fid $DIR/$tdir/foo/dummy)
2907 echo "fid2path should NOT work on the dummy's FID $dummyfid"
2908 local dummyname=$($LFS fid2path $DIR $dummyfid)
2909 [ "$dummyname" != "$DIR/$tdir/foo/dummy" ] ||
2910 error "(4) fid2path works unexpectedly."
2912 echo "Trigger namespace LFSCK to repair unmatched pairs"
2913 $START_NAMESPACE -A -r ||
2914 error "(5) Fail to start LFSCK for namespace"
2916 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2917 mdd.${MDT_DEV}.lfsck_namespace |
2918 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2920 error "(6) unexpected status"
2923 local repaired=$($SHOW_NAMESPACE |
2924 awk '/^unmatched_pairs_repaired/ { print $2 }')
2925 [ $repaired -eq 1 ] ||
2926 error "(7) Fail to repair unmatched pairs: $repaired"
2928 echo "fid2path should work on the dummy's FID $dummyfid after LFSCK"
2929 local dummyname=$($LFS fid2path $DIR $dummyfid)
2930 [ "$dummyname" == "$DIR/$tdir/foo/dummy" ] ||
2931 error "(8) fid2path does not work"
2933 run_test 22b "LFSCK can repair unmatched pairs (2)"
2936 [ $MDSCOUNT -lt 2 ] &&
2937 skip "We need at least 2 MDSes for this test" && return
2940 echo "The name entry is there, but the MDT-object for such name "
2941 echo "entry does not exist. The namespace LFSCK should find out "
2942 echo "and repair the inconsistency as required."
2945 check_mount_and_prep
2947 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2948 $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
2950 echo "Inject failure stub on MDT1 to simulate dangling name entry"
2951 #define OBD_FAIL_LFSCK_DANGLING2 0x1620
2952 do_facet mds2 $LCTL set_param fail_loc=0x1620
2953 rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
2954 do_facet mds2 $LCTL set_param fail_loc=0
2956 echo "'ls' should fail because of dangling name entry"
2957 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
2959 echo "Trigger namespace LFSCK to find out dangling name entry"
2960 $START_NAMESPACE -A -r ||
2961 error "(5) Fail to start LFSCK for namespace"
2963 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2964 mdd.${MDT_DEV}.lfsck_namespace |
2965 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2967 error "(6) unexpected status"
2970 local repaired=$($SHOW_NAMESPACE |
2971 awk '/^dangling_repaired/ { print $2 }')
2972 [ $repaired -eq 1 ] ||
2973 error "(7) Fail to repair dangling name entry: $repaired"
2975 echo "'ls' should fail because not re-create MDT-object by default"
2976 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
2978 echo "Trigger namespace LFSCK again to repair dangling name entry"
2979 $START_NAMESPACE -A -r -C ||
2980 error "(9) Fail to start LFSCK for namespace"
2982 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2983 mdd.${MDT_DEV}.lfsck_namespace |
2984 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2986 error "(10) unexpected status"
2989 repaired=$($SHOW_NAMESPACE |
2990 awk '/^dangling_repaired/ { print $2 }')
2991 [ $repaired -eq 1 ] ||
2992 error "(11) Fail to repair dangling name entry: $repaired"
2994 echo "'ls' should success after namespace LFSCK repairing"
2995 ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
2997 run_test 23a "LFSCK can repair dangling name entry (1)"
3001 echo "The objectA has multiple hard links, one of them corresponding"
3002 echo "to the name entry_B. But there is something wrong for the name"
3003 echo "entry_B and cause entry_B to references non-exist object_C."
3004 echo "In the first-stage scanning, the LFSCK will think the entry_B"
3005 echo "as dangling, and re-create the lost object_C. When the LFSCK"
3006 echo "comes to the second-stage scanning, it will find that the"
3007 echo "former re-creating object_C is not proper, and will try to"
3008 echo "replace the object_C with the real object_A."
3011 check_mount_and_prep
3013 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3014 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3015 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3017 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3018 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3019 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
3020 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3021 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3023 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3025 echo "'ls' should fail because of dangling name entry"
3026 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3027 error "(6) ls should fail."
3029 echo "Trigger namespace LFSCK to find out dangling name entry"
3030 $START_NAMESPACE -r -C ||
3031 error "(7) Fail to start LFSCK for namespace"
3033 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3034 mdd.${MDT_DEV}.lfsck_namespace |
3035 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3037 error "(8) unexpected status"
3040 local repaired=$($SHOW_NAMESPACE |
3041 awk '/^dangling_repaired/ { print $2 }')
3042 [ $repaired -eq 1 ] ||
3043 error "(9) Fail to repair dangling name entry: $repaired"
3045 repaired=$($SHOW_NAMESPACE |
3046 awk '/^multiple_linked_repaired/ { print $2 }')
3047 [ $repaired -eq 1 ] ||
3048 error "(10) Fail to drop the former created object: $repaired"
3050 local data=$(cat $DIR/$tdir/d0/foo)
3051 [ "$data" == "dummy" ] ||
3052 error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
3054 run_test 23b "LFSCK can repair dangling name entry (2)"
3058 echo "The objectA has multiple hard links, one of them corresponding"
3059 echo "to the name entry_B. But there is something wrong for the name"
3060 echo "entry_B and cause entry_B to references non-exist object_C."
3061 echo "In the first-stage scanning, the LFSCK will think the entry_B"
3062 echo "as dangling, and re-create the lost object_C. And then others"
3063 echo "modified the re-created object_C. When the LFSCK comes to the"
3064 echo "second-stage scanning, it will find that the former re-creating"
3065 echo "object_C maybe wrong and try to replace the object_C with the"
3066 echo "real object_A. But because object_C has been modified, so the"
3067 echo "LFSCK cannot replace it."
3070 check_mount_and_prep
3072 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3073 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3074 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3076 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3077 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3078 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
3079 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3080 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3082 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3084 echo "'ls' should fail because of dangling name entry"
3085 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3086 error "(6) ls should fail."
3088 #define OBD_FAIL_LFSCK_DELAY3 0x1602
3089 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
3091 echo "Trigger namespace LFSCK to find out dangling name entry"
3092 $START_NAMESPACE -r -C ||
3093 error "(7) Fail to start LFSCK for namespace"
3095 wait_update_facet client "stat $DIR/$tdir/d0/foo |
3096 awk '/Size/ { print \\\$2 }'" "0" 32 || {
3097 stat $DIR/$tdir/guard
3099 error "(8) unexpected size"
3102 echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
3103 cancel_lru_locks osc
3105 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3106 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3107 mdd.${MDT_DEV}.lfsck_namespace |
3108 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3110 error "(10) unexpected status"
3113 local repaired=$($SHOW_NAMESPACE |
3114 awk '/^dangling_repaired/ { print $2 }')
3115 [ $repaired -eq 1 ] ||
3116 error "(11) Fail to repair dangling name entry: $repaired"
3118 local data=$(cat $DIR/$tdir/d0/foo)
3119 [ "$data" != "dummy" ] ||
3120 error "(12) The $DIR/$tdir/d0/foo should not be recovered"
3122 run_test 23c "LFSCK can repair dangling name entry (3)"
3125 [ $MDSCOUNT -lt 2 ] &&
3126 skip "We need at least 2 MDSes for this test" && return
3129 echo "Two MDT-objects back reference the same name entry via their"
3130 echo "each own linkEA entry, but the name entry only references one"
3131 echo "MDT-object. The namespace LFSCK will remove the linkEA entry"
3132 echo "for the MDT-object that is not recognized. If such MDT-object"
3133 echo "has no other linkEA entry after the removing, then the LFSCK"
3134 echo "will add it as orphan under the .lustre/lost+found/MDTxxxx/."
3137 check_mount_and_prep
3139 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3141 mkdir $DIR/$tdir/d0/guard || error "(1) Fail to mkdir guard"
3142 $LFS path2fid $DIR/$tdir/d0/guard
3144 mkdir $DIR/$tdir/d0/dummy || error "(2) Fail to mkdir dummy"
3145 $LFS path2fid $DIR/$tdir/d0/dummy
3148 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
3149 pfid=$($LFS path2fid $DIR/$tdir/d0/guard)
3151 pfid=$($LFS path2fid $DIR/$tdir/d0/dummy)
3154 touch $DIR/$tdir/d0/guard/foo ||
3155 error "(3) Fail to touch $DIR/$tdir/d0/guard/foo"
3157 echo "Inject failure stub on MDT0 to simulate the case that"
3158 echo "the $DIR/$tdir/d0/dummy/foo has the 'bad' linkEA entry"
3159 echo "that references $DIR/$tdir/d0/guard/foo."
3160 echo "Then remove the name entry $DIR/$tdir/d0/dummy/foo."
3161 echo "So the MDT-object $DIR/$tdir/d0/dummy/foo will be left"
3162 echo "there with the same linkEA entry as another MDT-object"
3163 echo "$DIR/$tdir/d0/guard/foo has"
3165 #define OBD_FAIL_LFSCK_MUL_REF 0x1622
3166 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1622
3167 $LFS mkdir -i 0 $DIR/$tdir/d0/dummy/foo ||
3168 error "(4) Fail to mkdir $DIR/$tdir/d0/dummy/foo"
3169 $LFS path2fid $DIR/$tdir/d0/dummy/foo
3170 local cfid=$($LFS path2fid $DIR/$tdir/d0/dummy/foo)
3171 rmdir $DIR/$tdir/d0/dummy/foo ||
3172 error "(5) Fail to remove $DIR/$tdir/d0/dummy/foo name entry"
3173 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3175 echo "stat $DIR/$tdir/d0/dummy/foo should fail"
3176 stat $DIR/$tdir/d0/dummy/foo > /dev/null 2>&1 &&
3177 error "(6) stat successfully unexpectedly"
3179 echo "Trigger namespace LFSCK to repair multiple-referenced name entry"
3180 $START_NAMESPACE -A -r ||
3181 error "(7) Fail to start LFSCK for namespace"
3183 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3184 mdd.${MDT_DEV}.lfsck_namespace |
3185 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3187 error "(8) unexpected status"
3190 local repaired=$($SHOW_NAMESPACE |
3191 awk '/^multiple_referenced_repaired/ { print $2 }')
3192 [ $repaired -eq 1 ] ||
3193 error "(9) Fail to repair multiple referenced name entry: $repaired"
3195 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3196 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3197 error "(10) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3199 local cname="$cfid-$pfid-D-0"
3200 ls -ail $MOUNT/.lustre/lost+found/MDT0000/$cname ||
3201 error "(11) .lustre/lost+found/MDT0000/ should not be empty"
3203 run_test 24 "LFSCK can repair multiple-referenced name entry"
3206 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3207 skip "Only support to inject failure on ldiskfs" && return
3210 echo "The file type in the name entry does not match the file type"
3211 echo "claimed by the referenced object. Then the LFSCK will update"
3212 echo "the file type in the name entry."
3215 check_mount_and_prep
3217 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3219 echo "Inject failure stub on MDT0 to simulate the case that"
3220 echo "the file type stored in the name entry is wrong."
3222 #define OBD_FAIL_LFSCK_BAD_TYPE 0x1623
3223 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1623
3224 touch $DIR/$tdir/d0/foo || error "(2) Fail to touch $DIR/$tdir/d0/foo"
3225 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3227 echo "Trigger namespace LFSCK to repair bad file type in the name entry"
3228 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace"
3230 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3231 mdd.${MDT_DEV}.lfsck_namespace |
3232 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3234 error "(4) unexpected status"
3237 local repaired=$($SHOW_NAMESPACE |
3238 awk '/^bad_file_type_repaired/ { print $2 }')
3239 [ $repaired -eq 1 ] ||
3240 error "(5) Fail to repair bad file type in name entry: $repaired"
3242 ls -ail $DIR/$tdir/d0 || error "(6) Fail to 'ls' the $DIR/$tdir/d0"
3244 run_test 25 "LFSCK can repair bad file type in the name entry"
3248 echo "The local name entry back referenced by the MDT-object is lost."
3249 echo "The namespace LFSCK will add the missing local name entry back"
3250 echo "to the normal namespace."
3253 check_mount_and_prep
3255 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3256 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3257 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3259 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3260 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3262 echo "Inject failure stub on MDT0 to simulate the case that"
3263 echo "foo's name entry will be removed, but the foo's object"
3264 echo "and its linkEA are kept in the system."
3266 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3267 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3268 rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3269 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3271 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail"
3273 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3274 $START_NAMESPACE -r -A ||
3275 error "(6) Fail to start LFSCK for namespace"
3277 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3278 mdd.${MDT_DEV}.lfsck_namespace |
3279 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3281 error "(7) unexpected status"
3284 local repaired=$($SHOW_NAMESPACE |
3285 awk '/^lost_dirent_repaired/ { print $2 }')
3286 [ $repaired -eq 1 ] ||
3287 error "(8) Fail to repair lost dirent: $repaired"
3289 ls -ail $DIR/$tdir/d0/foo ||
3290 error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
3292 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3293 [ "$foofid" == "$foofid2" ] ||
3294 error "(10) foo's FID changed: $foofid, $foofid2"
3296 run_test 26a "LFSCK can add the missing local name entry back to the namespace"
3299 [ $MDSCOUNT -lt 2 ] &&
3300 skip "We need at least 2 MDSes for this test" && return
3303 echo "The remote name entry back referenced by the MDT-object is lost."
3304 echo "The namespace LFSCK will add the missing remote name entry back"
3305 echo "to the normal namespace."
3308 check_mount_and_prep
3310 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3311 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3312 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3314 echo "Inject failure stub on MDT0 to simulate the case that"
3315 echo "foo's name entry will be removed, but the foo's object"
3316 echo "and its linkEA are kept in the system."
3318 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3319 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3320 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3321 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3323 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail"
3325 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3326 $START_NAMESPACE -r -A ||
3327 error "(5) Fail to start LFSCK for namespace"
3329 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3330 mdd.${MDT_DEV}.lfsck_namespace |
3331 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3333 error "(6) unexpected status"
3336 local repaired=$($SHOW_NAMESPACE |
3337 awk '/^lost_dirent_repaired/ { print $2 }')
3338 [ $repaired -eq 1 ] ||
3339 error "(7) Fail to repair lost dirent: $repaired"
3341 ls -ail $DIR/$tdir/d0/foo ||
3342 error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
3344 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3345 [ "$foofid" == "$foofid2" ] ||
3346 error "(9) foo's FID changed: $foofid, $foofid2"
3348 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
3352 echo "The local parent referenced by the MDT-object linkEA is lost."
3353 echo "The namespace LFSCK will re-create the lost parent as orphan."
3356 check_mount_and_prep
3358 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3359 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3360 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3361 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3363 echo "Inject failure stub on MDT0 to simulate the case that"
3364 echo "foo's name entry will be removed, but the foo's object"
3365 echo "and its linkEA are kept in the system. And then remove"
3366 echo "another hard link and the parent directory."
3368 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3369 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3370 rm -f $DIR/$tdir/d0/foo ||
3371 error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3372 rm -f $DIR/$tdir/d0/dummy ||
3373 error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
3374 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3376 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
3377 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail"
3379 echo "Trigger namespace LFSCK to repair the lost parent"
3380 $START_NAMESPACE -r -A ||
3381 error "(6) Fail to start LFSCK for namespace"
3383 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3384 mdd.${MDT_DEV}.lfsck_namespace |
3385 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3387 error "(7) unexpected status"
3390 local repaired=$($SHOW_NAMESPACE |
3391 awk '/^lost_dirent_repaired/ { print $2 }')
3392 [ $repaired -eq 1 ] ||
3393 error "(8) Fail to repair lost dirent: $repaired"
3395 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3396 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3397 error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3399 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3401 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
3402 [ ! -z "$cname" ] ||
3403 error "(10) .lustre/lost+found/MDT0000/ should not be empty"
3405 run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
3408 [ $MDSCOUNT -lt 2 ] &&
3409 skip "We need at least 2 MDSes for this test" && return
3412 echo "The remote parent referenced by the MDT-object linkEA is lost."
3413 echo "The namespace LFSCK will re-create the lost parent as orphan."
3416 check_mount_and_prep
3418 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3419 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3421 $LFS path2fid $DIR/$tdir/d0
3423 echo "Inject failure stub on MDT0 to simulate the case that"
3424 echo "foo's name entry will be removed, but the foo's object"
3425 echo "and its linkEA are kept in the system. And then remove"
3426 echo "the parent directory."
3428 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3429 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3430 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3431 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3433 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
3434 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail"
3436 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3437 $START_NAMESPACE -r -A ||
3438 error "(6) Fail to start LFSCK for namespace"
3440 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3441 mdd.${MDT_DEV}.lfsck_namespace |
3442 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3444 error "(7) unexpected status"
3447 local repaired=$($SHOW_NAMESPACE |
3448 awk '/^lost_dirent_repaired/ { print $2 }')
3449 [ $repaired -eq 1 ] ||
3450 error "(8) Fail to repair lost dirent: $repaired"
3452 ls -ail $MOUNT/.lustre/lost+found/
3454 echo "There should be an orphan under .lustre/lost+found/MDT0001/"
3455 [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
3456 error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
3458 ls -ail $MOUNT/.lustre/lost+found/MDT0001/
3460 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
3461 [ ! -z "$cname" ] ||
3462 error "(10) .lustre/lost+found/MDT0001/ should not be empty"
3464 run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
3467 [ $MDSCOUNT -lt 2 ] &&
3468 skip "The test needs at least 2 MDTs" && return
3471 echo "The target name entry is lost. The LFSCK should insert the"
3472 echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if"
3473 echo "the MDT (on which the orphan MDT-object resides) has ever"
3474 echo "failed to respond some name entry verification during the"
3475 echo "first stage-scanning, then the LFSCK should skip to handle"
3476 echo "orphan MDT-object on this MDT. But other MDTs should not"
3480 check_mount_and_prep
3481 $LFS mkdir -i 0 $DIR/$tdir/d1
3482 $LFS mkdir -i 1 $DIR/$tdir/d1/a1
3483 $LFS mkdir -i 1 $DIR/$tdir/d1/a2
3485 $LFS mkdir -i 1 $DIR/$tdir/d2
3486 $LFS mkdir -i 0 $DIR/$tdir/d2/a1
3487 $LFS mkdir -i 0 $DIR/$tdir/d2/a2
3489 echo "Inject failure stub on MDT0 to simulate the case that"
3490 echo "d1/a1's name entry will be removed, but the d1/a1's object"
3491 echo "and its linkEA are kept in the system. And the case that"
3492 echo "d2/a2's name entry will be removed, but the d2/a2's object"
3493 echo "and its linkEA are kept in the system."
3495 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3496 do_facet mds1 $LCTL set_param fail_loc=0x1624
3497 do_facet mds2 $LCTL set_param fail_loc=0x1624
3498 rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1"
3499 rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2"
3500 do_facet mds1 $LCTL set_param fail_loc=0
3501 do_facet mds2 $LCTL set_param fail_loc=0
3503 cancel_lru_locks mdc
3504 cancel_lru_locks osc
3506 echo "Inject failure, to simulate the MDT0 fail to handle"
3507 echo "MDT1 LFSCK request during the first-stage scanning."
3508 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
3509 do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0
3511 echo "Trigger namespace LFSCK on all devices to find out orphan object"
3512 $START_NAMESPACE -r -A ||
3513 error "(3) Fail to start LFSCK for namespace"
3515 wait_update_facet mds1 "$LCTL get_param -n \
3516 mdd.$(facet_svc mds1).lfsck_namespace |
3517 awk '/^status/ { print \\\$2 }'" "partial" 32 || {
3518 error "(4) mds1 is not the expected 'partial'"
3521 wait_update_facet mds2 "$LCTL get_param -n \
3522 mdd.$(facet_svc mds2).lfsck_namespace |
3523 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3524 error "(5) mds2 is not the expected 'completed'"
3527 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
3529 local repaired=$(do_facet mds1 $LCTL get_param -n \
3530 mdd.$(facet_svc mds1).lfsck_namespace |
3531 awk '/^lost_dirent_repaired/ { print $2 }')
3532 [ $repaired -eq 0 ] ||
3533 error "(6) Expect 0 fixed on mds1, but got: $repaired"
3535 repaired=$(do_facet mds2 $LCTL get_param -n \
3536 mdd.$(facet_svc mds2).lfsck_namespace |
3537 awk '/^lost_dirent_repaired/ { print $2 }')
3538 [ $repaired -eq 1 ] ||
3539 error "(7) Expect 1 fixed on mds2, but got: $repaired"
3541 echo "Trigger namespace LFSCK on all devices again to cleanup"
3542 $START_NAMESPACE -r -A ||
3543 error "(8) Fail to start LFSCK for namespace"
3545 for k in $(seq $MDSCOUNT); do
3546 # The LFSCK status query internal is 30 seconds. For the case
3547 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3548 # time to guarantee the status sync up.
3549 wait_update_facet mds${k} "$LCTL get_param -n \
3550 mdd.$(facet_svc mds${k}).lfsck_namespace |
3551 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3552 error "(9) MDS${k} is not the expected 'completed'"
3555 local repaired=$(do_facet mds1 $LCTL get_param -n \
3556 mdd.$(facet_svc mds1).lfsck_namespace |
3557 awk '/^lost_dirent_repaired/ { print $2 }')
3558 [ $repaired -eq 1 ] ||
3559 error "(10) Expect 1 fixed on mds1, but got: $repaired"
3561 repaired=$(do_facet mds2 $LCTL get_param -n \
3562 mdd.$(facet_svc mds2).lfsck_namespace |
3563 awk '/^lost_dirent_repaired/ { print $2 }')
3564 [ $repaired -eq 0 ] ||
3565 error "(11) Expect 0 fixed on mds2, but got: $repaired"
3567 run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects"
3571 echo "The object's nlink attribute is larger than the object's known"
3572 echo "name entries count. The LFSCK will repair the object's nlink"
3573 echo "attribute to match the known name entries count"
3576 check_mount_and_prep
3578 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3579 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3581 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3582 echo "nlink attribute is larger than its name entries count."
3584 #define OBD_FAIL_LFSCK_MORE_NLINK 0x1625
3585 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1625
3586 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3587 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3588 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3590 cancel_lru_locks mdc
3591 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3592 [ $count -eq 3 ] || error "(4) Cannot inject error: $count"
3594 echo "Trigger namespace LFSCK to repair the nlink count"
3595 $START_NAMESPACE -r -A ||
3596 error "(5) Fail to start LFSCK for namespace"
3598 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3599 mdd.${MDT_DEV}.lfsck_namespace |
3600 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3602 error "(6) unexpected status"
3605 local repaired=$($SHOW_NAMESPACE |
3606 awk '/^nlinks_repaired/ { print $2 }')
3607 [ $repaired -eq 1 ] ||
3608 error "(7) Fail to repair nlink count: $repaired"
3610 cancel_lru_locks mdc
3611 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3612 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3614 run_test 29a "LFSCK can repair bad nlink count (1)"
3618 echo "The object's nlink attribute is smaller than the object's known"
3619 echo "name entries count. The LFSCK will repair the object's nlink"
3620 echo "attribute to match the known name entries count"
3623 check_mount_and_prep
3625 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3626 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3628 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3629 echo "nlink attribute is smaller than its name entries count."
3631 #define OBD_FAIL_LFSCK_LESS_NLINK 0x1626
3632 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1626
3633 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3634 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3635 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3637 cancel_lru_locks mdc
3638 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3639 [ $count -eq 1 ] || error "(4) Cannot inject error: $count"
3641 echo "Trigger namespace LFSCK to repair the nlink count"
3642 $START_NAMESPACE -r -A ||
3643 error "(5) Fail to start LFSCK for namespace"
3645 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3646 mdd.${MDT_DEV}.lfsck_namespace |
3647 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3649 error "(6) unexpected status"
3652 local repaired=$($SHOW_NAMESPACE |
3653 awk '/^nlinks_repaired/ { print $2 }')
3654 [ $repaired -eq 1 ] ||
3655 error "(7) Fail to repair nlink count: $repaired"
3657 cancel_lru_locks mdc
3658 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3659 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3661 run_test 29b "LFSCK can repair bad nlink count (2)"
3665 echo "There are too many hard links to the object, and exceeds the"
3666 echo "object's linkEA limitation, as to NOT all the known name entries"
3667 echo "will be recorded in the linkEA. Under such case, LFSCK should"
3668 echo "skip the nlink verification for this object."
3671 check_mount_and_prep
3673 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3674 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3675 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3676 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3678 echo "Inject failure stub on MDT0 to simulate the case that"
3679 echo "foo's hard links exceed the object's linkEA limitation."
3681 #define OBD_FAIL_LFSCK_LINKEA_OVERFLOW 0x1627
3682 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1627
3683 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h2 ||
3684 error "(4) Fail to hard link to $DIR/$tdir/d0/foo"
3686 cancel_lru_locks mdc
3688 local count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3689 [ $count1 -eq 3 ] || error "(5) Stat failure: $count1"
3691 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3692 $LFS fid2path $DIR $foofid
3693 local count2=$($LFS fid2path $DIR $foofid | wc -l)
3694 [ $count2 -eq 2 ] || "(6) Fail to inject error: $count2"
3696 echo "Trigger namespace LFSCK to repair the nlink count"
3697 $START_NAMESPACE -r -A ||
3698 error "(7) Fail to start LFSCK for namespace"
3700 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3701 mdd.${MDT_DEV}.lfsck_namespace |
3702 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3704 error "(8) unexpected status"
3707 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3708 local repaired=$($SHOW_NAMESPACE |
3709 awk '/^nlinks_repaired/ { print $2 }')
3710 [ $repaired -eq 0 ] ||
3711 error "(9) Repair nlink count unexpcetedly: $repaired"
3713 cancel_lru_locks mdc
3715 count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3716 [ $count1 -eq 3 ] || error "(10) Stat failure: $count1"
3718 count2=$($LFS fid2path $DIR $foofid | wc -l)
3719 [ $count2 -eq 2 ] ||
3720 error "(11) Repaired something unexpectedly: $count2"
3722 run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
3725 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3726 skip "Only support backend /lost+found for ldiskfs" && return
3729 echo "The namespace LFSCK will move the orphans from backend"
3730 echo "/lost+found directory to normal client visible namespace"
3731 echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
3734 check_mount_and_prep
3736 $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
3737 touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
3739 echo "Inject failure stub on MDT0 to simulate the case that"
3740 echo "directory d0 has no linkEA entry, then the LFSCK will"
3741 echo "move it into .lustre/lost+found/MDTxxxx/ later."
3743 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
3744 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
3745 mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
3746 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3748 touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
3749 mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
3751 echo "Inject failure stub on MDT0 to simulate the case that the"
3752 echo "object's name entry will be removed, but not destroy the"
3753 echo "object. Then backend e2fsck will handle it as orphan and"
3754 echo "add them into the backend /lost+found directory."
3756 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3757 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3758 rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
3759 rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
3760 rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
3761 rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
3762 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3764 umount_client $MOUNT || error "(10) Fail to stop client!"
3766 stop $SINGLEMDS || error "(11) Fail to stop MDT0"
3769 run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
3770 error "(12) Fail to run e2fsck"
3772 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
3773 error "(13) Fail to start MDT0"
3775 echo "Trigger namespace LFSCK to recover backend orphans"
3776 $START_NAMESPACE -r -A ||
3777 error "(14) Fail to start LFSCK for namespace"
3779 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3780 mdd.${MDT_DEV}.lfsck_namespace |
3781 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3783 error "(15) unexpected status"
3786 local repaired=$($SHOW_NAMESPACE |
3787 awk '/^local_lost_found_moved/ { print $2 }')
3788 [ $repaired -ge 4 ] ||
3789 error "(16) Fail to recover backend orphans: $repaired"
3791 mount_client $MOUNT || error "(17) Fail to start client!"
3793 stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered"
3795 ls -ail $MOUNT/.lustre/lost+found/
3797 echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
3798 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3799 error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3801 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3803 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-*-D-*)
3804 [ ! -z "$cname" ] || error "(20) d0 is not recovered"
3806 stat ${cname}/d1 || error "(21) d0 is not recovered"
3807 stat ${cname}/f1 || error "(22) f1 is not recovered"
3809 run_test 30 "LFSCK can recover the orphans from backend /lost+found"
3812 [ $MDSCOUNT -lt 2 ] &&
3813 skip "The test needs at least 2 MDTs" && return
3816 echo "For the name entry under a striped directory, if the name"
3817 echo "hash does not match the shard, then the LFSCK will repair"
3818 echo "the bad name entry"
3821 check_mount_and_prep
3823 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3824 error "(1) Fail to create striped directory"
3826 echo "Inject failure stub on client to simulate the case that"
3827 echo "some name entry should be inserted into other non-first"
3828 echo "shard, but inserted into the first shard by wrong"
3830 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3831 $LCTL set_param fail_loc=0x1628 fail_val=0
3832 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3833 error "(2) Fail to create file under striped directory"
3834 $LCTL set_param fail_loc=0 fail_val=0
3836 echo "Trigger namespace LFSCK to repair bad name hash"
3837 $START_NAMESPACE -r -A ||
3838 error "(3) Fail to start LFSCK for namespace"
3840 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3841 mdd.${MDT_DEV}.lfsck_namespace |
3842 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3844 error "(4) unexpected status"
3847 local repaired=$($SHOW_NAMESPACE |
3848 awk '/^name_hash_repaired/ { print $2 }')
3849 [ $repaired -ge 1 ] ||
3850 error "(5) Fail to repair bad name hash: $repaired"
3852 umount_client $MOUNT || error "(6) umount failed"
3853 mount_client $MOUNT || error "(7) mount failed"
3855 for ((i = 0; i < $MDSCOUNT; i++)); do
3856 stat $DIR/$tdir/striped_dir/d$i ||
3857 error "(8) Fail to stat d$i after LFSCK"
3858 rmdir $DIR/$tdir/striped_dir/d$i ||
3859 error "(9) Fail to unlink d$i after LFSCK"
3862 rmdir $DIR/$tdir/striped_dir ||
3863 error "(10) Fail to remove the striped directory after LFSCK"
3865 run_test 31a "The LFSCK can find/repair the name entry with bad name hash (1)"
3868 [ $MDSCOUNT -lt 2 ] &&
3869 skip "The test needs at least 2 MDTs" && return
3872 echo "For the name entry under a striped directory, if the name"
3873 echo "hash does not match the shard, then the LFSCK will repair"
3874 echo "the bad name entry"
3877 check_mount_and_prep
3879 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3880 error "(1) Fail to create striped directory"
3882 echo "Inject failure stub on client to simulate the case that"
3883 echo "some name entry should be inserted into other non-second"
3884 echo "shard, but inserted into the secod shard by wrong"
3886 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3887 $LCTL set_param fail_loc=0x1628 fail_val=1
3888 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3889 error "(2) Fail to create file under striped directory"
3890 $LCTL set_param fail_loc=0 fail_val=0
3892 echo "Trigger namespace LFSCK to repair bad name hash"
3893 $START_NAMESPACE -r -A ||
3894 error "(3) Fail to start LFSCK for namespace"
3896 wait_update_facet mds2 "$LCTL get_param -n \
3897 mdd.$(facet_svc mds2).lfsck_namespace |
3898 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3899 error "(4) unexpected status"
3901 local repaired=$(do_facet mds2 $LCTL get_param -n \
3902 mdd.$(facet_svc mds2).lfsck_namespace |
3903 awk '/^name_hash_repaired/ { print $2 }')
3904 [ $repaired -ge 1 ] ||
3905 error "(5) Fail to repair bad name hash: $repaired"
3907 umount_client $MOUNT || error "(6) umount failed"
3908 mount_client $MOUNT || error "(7) mount failed"
3910 for ((i = 0; i < $MDSCOUNT; i++)); do
3911 stat $DIR/$tdir/striped_dir/d$i ||
3912 error "(8) Fail to stat d$i after LFSCK"
3913 rmdir $DIR/$tdir/striped_dir/d$i ||
3914 error "(9) Fail to unlink d$i after LFSCK"
3917 rmdir $DIR/$tdir/striped_dir ||
3918 error "(10) Fail to remove the striped directory after LFSCK"
3920 run_test 31b "The LFSCK can find/repair the name entry with bad name hash (2)"
3923 [ $MDSCOUNT -lt 2 ] &&
3924 skip "The test needs at least 2 MDTs" && return
3927 echo "For some reason, the master MDT-object of the striped directory"
3928 echo "may lost its master LMV EA. If nobody created files under the"
3929 echo "master directly after the master LMV EA lost, then the LFSCK"
3930 echo "should re-generate the master LMV EA."
3933 check_mount_and_prep
3935 echo "Inject failure stub on MDT0 to simulate the case that the"
3936 echo "master MDT-object of the striped directory lost the LMV EA."
3938 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3939 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3940 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3941 error "(1) Fail to create striped directory"
3942 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3944 echo "Trigger namespace LFSCK to re-generate master LMV EA"
3945 $START_NAMESPACE -r -A ||
3946 error "(2) Fail to start LFSCK for namespace"
3948 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3949 mdd.${MDT_DEV}.lfsck_namespace |
3950 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3952 error "(3) unexpected status"
3955 local repaired=$($SHOW_NAMESPACE |
3956 awk '/^striped_dirs_repaired/ { print $2 }')
3957 [ $repaired -eq 1 ] ||
3958 error "(4) Fail to re-generate master LMV EA: $repaired"
3960 umount_client $MOUNT || error "(5) umount failed"
3961 mount_client $MOUNT || error "(6) mount failed"
3963 local empty=$(ls $DIR/$tdir/striped_dir/)
3964 [ -z "$empty" ] || error "(7) The master LMV EA is not repaired: $empty"
3966 rmdir $DIR/$tdir/striped_dir ||
3967 error "(8) Fail to remove the striped directory after LFSCK"
3969 run_test 31c "Re-generate the lost master LMV EA for striped directory"
3972 [ $MDSCOUNT -lt 2 ] &&
3973 skip "The test needs at least 2 MDTs" && return
3976 echo "For some reason, the master MDT-object of the striped directory"
3977 echo "may lost its master LMV EA. If somebody created files under the"
3978 echo "master directly after the master LMV EA lost, then the LFSCK"
3979 echo "should NOT re-generate the master LMV EA, instead, it should"
3980 echo "change the broken striped dirctory as read-only to prevent"
3981 echo "further damage"
3984 check_mount_and_prep
3986 echo "Inject failure stub on MDT0 to simulate the case that the"
3987 echo "master MDT-object of the striped directory lost the LMV EA."
3989 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3990 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3991 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3992 error "(1) Fail to create striped directory"
3993 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0
3995 umount_client $MOUNT || error "(2) umount failed"
3996 mount_client $MOUNT || error "(3) mount failed"
3998 touch $DIR/$tdir/striped_dir/dummy ||
3999 error "(4) Fail to touch under broken striped directory"
4001 echo "Trigger namespace LFSCK to find out the inconsistency"
4002 $START_NAMESPACE -r -A ||
4003 error "(5) Fail to start LFSCK for namespace"
4005 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4006 mdd.${MDT_DEV}.lfsck_namespace |
4007 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4009 error "(6) unexpected status"
4012 local repaired=$($SHOW_NAMESPACE |
4013 awk '/^striped_dirs_repaired/ { print $2 }')
4014 [ $repaired -eq 0 ] ||
4015 error "(7) Re-generate master LMV EA unexpected: $repaired"
4017 stat $DIR/$tdir/striped_dir/dummy ||
4018 error "(8) Fail to stat $DIR/$tdir/striped_dir/dummy"
4020 touch $DIR/$tdir/striped_dir/foo &&
4021 error "(9) The broken striped directory should be read-only"
4023 chattr -i $DIR/$tdir/striped_dir ||
4024 error "(10) Fail to chattr on the broken striped directory"
4026 rmdir $DIR/$tdir/striped_dir ||
4027 error "(11) Fail to remove the striped directory after LFSCK"
4029 run_test 31d "Set broken striped directory (modified after broken) as read-only"
4032 [ $MDSCOUNT -lt 2 ] &&
4033 skip "The test needs at least 2 MDTs" && return
4036 echo "For some reason, the slave MDT-object of the striped directory"
4037 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
4038 echo "slave LMV EA."
4041 check_mount_and_prep
4043 echo "Inject failure stub on MDT0 to simulate the case that the"
4044 echo "slave MDT-object (that resides on the same MDT as the master"
4045 echo "MDT-object resides on) lost the LMV EA."
4047 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
4048 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=0
4049 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4050 error "(1) Fail to create striped directory"
4051 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4053 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
4054 $START_NAMESPACE -r -A ||
4055 error "(2) Fail to start LFSCK for namespace"
4057 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4058 mdd.${MDT_DEV}.lfsck_namespace |
4059 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4061 error "(3) unexpected status"
4064 local repaired=$($SHOW_NAMESPACE |
4065 awk '/^striped_shards_repaired/ { print $2 }')
4066 [ $repaired -eq 1 ] ||
4067 error "(4) Fail to re-generate slave LMV EA: $repaired"
4069 rmdir $DIR/$tdir/striped_dir ||
4070 error "(5) Fail to remove the striped directory after LFSCK"
4072 run_test 31e "Re-generate the lost slave LMV EA for striped directory (1)"
4075 [ $MDSCOUNT -lt 2 ] &&
4076 skip "The test needs at least 2 MDTs" && return
4079 echo "For some reason, the slave MDT-object of the striped directory"
4080 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
4081 echo "slave LMV EA."
4084 check_mount_and_prep
4086 echo "Inject failure stub on MDT0 to simulate the case that the"
4087 echo "slave MDT-object (that resides on differnt MDT as the master"
4088 echo "MDT-object resides on) lost the LMV EA."
4090 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
4091 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=1
4092 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4093 error "(1) Fail to create striped directory"
4094 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4096 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
4097 $START_NAMESPACE -r -A ||
4098 error "(2) Fail to start LFSCK for namespace"
4100 wait_update_facet mds2 "$LCTL get_param -n \
4101 mdd.$(facet_svc mds2).lfsck_namespace |
4102 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
4103 error "(3) unexpected status"
4105 local repaired=$(do_facet mds2 $LCTL get_param -n \
4106 mdd.$(facet_svc mds2).lfsck_namespace |
4107 awk '/^striped_shards_repaired/ { print $2 }')
4108 [ $repaired -eq 1 ] ||
4109 error "(4) Fail to re-generate slave LMV EA: $repaired"
4111 rmdir $DIR/$tdir/striped_dir ||
4112 error "(5) Fail to remove the striped directory after LFSCK"
4114 run_test 31f "Re-generate the lost slave LMV EA for striped directory (2)"
4117 [ $MDSCOUNT -lt 2 ] &&
4118 skip "The test needs at least 2 MDTs" && return
4121 echo "For some reason, the stripe index in the slave LMV EA is"
4122 echo "corrupted. The LFSCK should repair the slave LMV EA."
4125 check_mount_and_prep
4127 echo "Inject failure stub on MDT0 to simulate the case that the"
4128 echo "slave LMV EA on the first shard of the striped directory"
4129 echo "claims the same index as the second shard claims"
4131 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
4132 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162b fail_val=0
4133 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4134 error "(1) Fail to create striped directory"
4135 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4137 echo "Trigger namespace LFSCK to repair the slave LMV EA"
4138 $START_NAMESPACE -r -A ||
4139 error "(2) Fail to start LFSCK for namespace"
4141 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4142 mdd.${MDT_DEV}.lfsck_namespace |
4143 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4145 error "(3) unexpected status"
4148 local repaired=$($SHOW_NAMESPACE |
4149 awk '/^striped_shards_repaired/ { print $2 }')
4150 [ $repaired -eq 1 ] ||
4151 error "(4) Fail to repair slave LMV EA: $repaired"
4153 umount_client $MOUNT || error "(5) umount failed"
4154 mount_client $MOUNT || error "(6) mount failed"
4156 touch $DIR/$tdir/striped_dir/foo ||
4157 error "(7) Fail to touch file after the LFSCK"
4159 rm -f $DIR/$tdir/striped_dir/foo ||
4160 error "(8) Fail to unlink file after the LFSCK"
4162 rmdir $DIR/$tdir/striped_dir ||
4163 error "(9) Fail to remove the striped directory after LFSCK"
4165 run_test 31g "Repair the corrupted slave LMV EA"
4168 [ $MDSCOUNT -lt 2 ] &&
4169 skip "The test needs at least 2 MDTs" && return
4172 echo "For some reason, the shard's name entry in the striped"
4173 echo "directory may be corrupted. The LFSCK should repair the"
4174 echo "bad shard's name entry."
4177 check_mount_and_prep
4179 echo "Inject failure stub on MDT0 to simulate the case that the"
4180 echo "first shard's name entry in the striped directory claims"
4181 echo "the same index as the second shard's name entry claims."
4183 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
4184 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162c fail_val=0
4185 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4186 error "(1) Fail to create striped directory"
4187 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4189 echo "Trigger namespace LFSCK to repair the shard's name entry"
4190 $START_NAMESPACE -r -A ||
4191 error "(2) Fail to start LFSCK for namespace"
4193 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4194 mdd.${MDT_DEV}.lfsck_namespace |
4195 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4197 error "(3) unexpected status"
4200 local repaired=$($SHOW_NAMESPACE |
4201 awk '/^dirent_repaired/ { print $2 }')
4202 [ $repaired -eq 1 ] ||
4203 error "(4) Fail to repair shard's name entry: $repaired"
4205 umount_client $MOUNT || error "(5) umount failed"
4206 mount_client $MOUNT || error "(6) mount failed"
4208 touch $DIR/$tdir/striped_dir/foo ||
4209 error "(7) Fail to touch file after the LFSCK"
4211 rm -f $DIR/$tdir/striped_dir/foo ||
4212 error "(8) Fail to unlink file after the LFSCK"
4214 rmdir $DIR/$tdir/striped_dir ||
4215 error "(9) Fail to remove the striped directory after LFSCK"
4217 run_test 31h "Repair the corrupted shard's name entry"
4219 # restore MDS/OST size
4220 MDSSIZE=${SAVED_MDSSIZE}
4221 OSTSIZE=${SAVED_OSTSIZE}
4222 OSTCOUNT=${SAVED_OSTCOUNT}
4224 # cleanup the system at last