3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
24 SAVED_MDSSIZE=${MDSSIZE}
25 SAVED_OSTSIZE=${OSTSIZE}
26 SAVED_OSTCOUNT=${OSTCOUNT}
27 # use small MDS + OST size to speed formatting time
28 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
31 # no need too many OSTs, to reduce the format/start/stop overhead
32 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
34 # build up a clean test environment.
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
48 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29 30 31"
51 # DNE does not support striped directory on zfs-based backend yet.
52 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
53 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 31"
57 MDT_DEV="${FSNAME}-MDT0000"
58 OST_DEV="${FSNAME}-OST0000"
59 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
60 START_NAMESPACE="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
62 START_LAYOUT="do_facet $SINGLEMDS \
63 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
64 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
65 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
66 SHOW_NAMESPACE="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
68 SHOW_LAYOUT="do_facet $SINGLEMDS \
69 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
70 SHOW_LAYOUT_ON_OST="do_facet ost1 \
71 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
72 MOUNT_OPTS_SCRUB="-o user_xattr"
73 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
82 echo "preparing... $nfiles * $ndirs files will be created $(date)."
83 if [ ! -z $igif ]; then
84 #define OBD_FAIL_FID_IGIF 0x1504
85 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
88 cp $LUSTRE/tests/*.sh $DIR/$tdir/
89 if [ $ndirs -gt 0 ]; then
90 createmany -d $DIR/$tdir/d $ndirs
91 createmany -m $DIR/$tdir/f $ndirs
92 if [ $nfiles -gt 0 ]; then
93 for ((i = 0; i < $ndirs; i++)); do
94 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
95 /dev/null || error "createmany $nfiles"
98 createmany -d $DIR/$tdir/e $ndirs
101 if [ ! -z $igif ]; then
102 touch $DIR/$tdir/dummy
103 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
106 echo "prepared $(date)."
112 #define OBD_FAIL_LFSCK_DELAY1 0x1600
113 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
114 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
116 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
118 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
119 [ "$STATUS" == "scanning-phase1" ] ||
120 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
122 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
124 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
125 [ "$STATUS" == "stopped" ] ||
126 error "(6) Expect 'stopped', but got '$STATUS'"
128 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
130 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
131 [ "$STATUS" == "scanning-phase1" ] ||
132 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
134 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
135 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
136 mdd.${MDT_DEV}.lfsck_namespace |
137 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
139 error "(9) unexpected status"
142 local repaired=$($SHOW_NAMESPACE |
143 awk '/^updated_phase1/ { print $2 }')
144 [ $repaired -eq 0 ] ||
145 error "(10) Expect nothing to be repaired, but got: $repaired"
147 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
148 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
149 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
150 mdd.${MDT_DEV}.lfsck_namespace |
151 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
153 error "(12) unexpected status"
156 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
157 [ $((scanned1 + 1)) -eq $scanned2 ] ||
158 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
160 echo "stopall, should NOT crash LU-3649"
161 stopall || error "(14) Fail to stopall"
163 run_test 0 "Control LFSCK manually"
166 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
167 skip "OI Scrub not implemented for ZFS" && return
171 #define OBD_FAIL_FID_INDIR 0x1501
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
173 touch $DIR/$tdir/dummy
175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
177 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
178 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
179 mdd.${MDT_DEV}.lfsck_namespace |
180 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
182 error "(4) unexpected status"
185 local repaired=$($SHOW_NAMESPACE |
186 awk '/^dirent_repaired/ { print $2 }')
187 # for interop with old server
188 [ -z "$repaired" ] &&
189 repaired=$($SHOW_NAMESPACE |
190 awk '/^updated_phase1/ { print $2 }')
192 [ $repaired -eq 1 ] ||
193 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
195 mount_client $MOUNT || error "(6) Fail to start client!"
197 #define OBD_FAIL_FID_LOOKUP 0x1505
198 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
199 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
201 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
203 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
207 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
208 skip "OI Scrub not implemented for ZFS" && return
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
221 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
222 mdd.${MDT_DEV}.lfsck_namespace |
223 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
225 error "(4) unexpected status"
228 local repaired=$($SHOW_NAMESPACE |
229 awk '/^dirent_repaired/ { print $2 }')
230 # for interop with old server
231 [ -z "$repaired" ] &&
232 repaired=$($SHOW_NAMESPACE |
233 awk '/^updated_phase1/ { print $2 }')
235 [ $repaired -eq 1 ] ||
236 error "(5) Fail to repair the missing FID-in-LMA: $repaired"
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
239 mount_client $MOUNT || error "(6) Fail to start client!"
241 #define OBD_FAIL_FID_LOOKUP 0x1505
242 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
243 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
245 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
247 run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
252 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
254 touch $DIR/$tdir/dummy
256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
258 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
259 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
260 mdd.${MDT_DEV}.lfsck_namespace |
261 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
263 error "(4) unexpected status"
266 local repaired=$($SHOW_NAMESPACE |
267 awk '/^linkea_repaired/ { print $2 }')
268 # for interop with old server
269 [ -z "$repaired" ] &&
270 repaired=$($SHOW_NAMESPACE |
271 awk '/^updated_phase2/ { print $2 }')
273 [ $repaired -eq 1 ] ||
274 error "(5) Fail to repair crashed linkEA: $repaired"
276 mount_client $MOUNT || error "(6) Fail to start client!"
278 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
279 error "(7) Fail to stat $DIR/$tdir/dummy"
281 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
282 local dummyname=$($LFS fid2path $DIR $dummyfid)
283 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
284 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
286 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
292 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
293 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
294 touch $DIR/$tdir/dummy
296 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
298 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
299 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
300 mdd.${MDT_DEV}.lfsck_namespace |
301 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
303 error "(4) unexpected status"
306 local repaired=$($SHOW_NAMESPACE |
307 awk '/^updated_phase2/ { print $2 }')
308 [ $repaired -eq 1 ] ||
309 error "(5) Fail to repair crashed linkEA: $repaired"
311 mount_client $MOUNT || error "(6) Fail to start client!"
313 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
314 error "(7) Fail to stat $DIR/$tdir/dummy"
316 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
317 local dummyname=$($LFS fid2path $DIR $dummyfid)
318 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
319 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
321 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
327 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
328 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
329 touch $DIR/$tdir/dummy
331 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
333 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
334 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
335 mdd.${MDT_DEV}.lfsck_namespace |
336 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
338 error "(4) unexpected status"
341 local repaired=$($SHOW_NAMESPACE |
342 awk '/^updated_phase2/ { print $2 }')
343 [ $repaired -eq 1 ] ||
344 error "(5) Fail to repair crashed linkEA: $repaired"
346 mount_client $MOUNT || error "(6) Fail to start client!"
348 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
349 error "(7) Fail to stat $DIR/$tdir/dummy"
351 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
352 local dummyname=$($LFS fid2path $DIR $dummyfid)
353 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
354 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
356 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
363 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
364 touch $DIR/$tdir/dummy
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
368 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
369 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
370 mdd.${MDT_DEV}.lfsck_namespace |
371 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
373 error "(4) unexpected status"
376 local repaired=$($SHOW_NAMESPACE |
377 awk '/^linkea_repaired/ { print $2 }')
378 [ $repaired -eq 1 ] ||
379 error "(5) Fail to repair crashed linkEA: $repaired"
381 mount_client $MOUNT || error "(6) Fail to start client!"
383 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
384 error "(7) Fail to stat $DIR/$tdir/dummy"
386 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
387 local dummyname=$($LFS fid2path $DIR $dummyfid)
388 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
389 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
391 run_test 2d "LFSCK can recover the missing linkEA entry"
395 [ $MDSCOUNT -lt 2 ] &&
396 skip "We need at least 2 MDSes for this test" && return
400 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT1"
402 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
404 $LFS mkdir -i 0 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT0"
405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
407 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
408 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
409 mdd.${MDT_DEV}.lfsck_namespace |
410 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
412 error "(4) unexpected status"
415 local repaired=$($SHOW_NAMESPACE |
416 awk '/^linkea_repaired/ { print $2 }')
417 [ $repaired -eq 1 ] ||
418 error "(5) Fail to repair crashed linkEA: $repaired"
420 local fid=$($LFS path2fid $DIR/$tdir/d0/d1)
421 local name=$($LFS fid2path $DIR $fid)
422 [ "$name" == "$DIR/$tdir/d0/d1" ] ||
423 error "(6) Fail to repair linkEA: $fid $name"
425 run_test 2e "namespace LFSCK can verify remote object linkEA"
431 mkdir $DIR/$tdir/dummy || error "(1) Fail to mkdir"
432 ln $DIR/$tdir/d0/f0 $DIR/$tdir/dummy/f0 || error "(2) Fail to hardlink"
433 ln $DIR/$tdir/d0/f1 $DIR/$tdir/dummy/f1 || error "(3) Fail to hardlink"
435 $LFS mkdir -i 0 $DIR/$tdir/edir || error "(4) Fail to mkdir"
436 touch $DIR/$tdir/edir/f0 || error "(5) Fail to touch"
437 touch $DIR/$tdir/edir/f1 || error "(6) Fail to touch"
439 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
440 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
441 ln $DIR/$tdir/edir/f0 $DIR/$tdir/edir/w0 || error "(7) Fail to hardlink"
443 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
444 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
445 ln $DIR/$tdir/edir/f1 $DIR/$tdir/edir/w1 || error "(8) Fail to hardlink"
447 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
449 $START_NAMESPACE -r || error "(9) Fail to start LFSCK for namespace!"
450 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
451 mdd.${MDT_DEV}.lfsck_namespace |
452 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
454 error "(10) unexpected status"
457 local checked=$($SHOW_NAMESPACE |
458 awk '/^checked_phase2/ { print $2 }')
459 [ $checked -ge 4 ] ||
460 error "(11) Fail to check multiple-linked object: $checked"
462 local repaired=$($SHOW_NAMESPACE |
463 awk '/^multiple_linked_repaired/ { print $2 }')
464 [ $repaired -ge 2 ] ||
465 error "(12) Fail to repair multiple-linked object: $repaired"
467 run_test 3 "LFSCK can verify multiple-linked objects"
471 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
472 skip "OI Scrub not implemented for ZFS" && return
475 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
476 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
478 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
479 echo "start $SINGLEMDS with disabling OI scrub"
480 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
481 error "(2) Fail to start MDS!"
483 #define OBD_FAIL_LFSCK_DELAY2 0x1601
484 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
485 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
486 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
487 mdd.${MDT_DEV}.lfsck_namespace |
488 awk '/^flags/ { print \\\$2 }'" "inconsistent" 32 || {
490 error "(5) unexpected status"
493 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
494 [ "$STATUS" == "scanning-phase1" ] ||
495 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
497 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
498 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
499 mdd.${MDT_DEV}.lfsck_namespace |
500 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
502 error "(7) unexpected status"
505 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
506 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
508 local repaired=$($SHOW_NAMESPACE |
509 awk '/^dirent_repaired/ { print $2 }')
510 # for interop with old server
511 [ -z "$repaired" ] &&
512 repaired=$($SHOW_NAMESPACE |
513 awk '/^updated_phase1/ { print $2 }')
515 [ $repaired -ge 9 ] ||
516 error "(9) Fail to re-generate FID-in-dirent: $repaired"
518 mount_client $MOUNT || error "(10) Fail to start client!"
520 #define OBD_FAIL_FID_LOOKUP 0x1505
521 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
522 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
523 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
525 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
529 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
530 skip "OI Scrub not implemented for ZFS" && return
533 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
534 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
536 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
537 echo "start $SINGLEMDS with disabling OI scrub"
538 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
539 error "(2) Fail to start MDS!"
541 #define OBD_FAIL_LFSCK_DELAY2 0x1601
542 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
543 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
544 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
545 mdd.${MDT_DEV}.lfsck_namespace |
546 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 32 || {
548 error "(5) unexpected status"
551 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
552 [ "$STATUS" == "scanning-phase1" ] ||
553 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
555 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
556 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
557 mdd.${MDT_DEV}.lfsck_namespace |
558 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
560 error "(7) unexpected status"
563 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
564 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
566 local repaired=$($SHOW_NAMESPACE |
567 awk '/^dirent_repaired/ { print $2 }')
568 # for interop with old server
569 [ -z "$repaired" ] &&
570 repaired=$($SHOW_NAMESPACE |
571 awk '/^updated_phase1/ { print $2 }')
573 [ $repaired -ge 2 ] ||
574 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
576 mount_client $MOUNT || error "(10) Fail to start client!"
578 #define OBD_FAIL_FID_LOOKUP 0x1505
579 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
580 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
582 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
584 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
585 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
586 local dummyname=$($LFS fid2path $DIR $dummyfid)
587 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
588 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
590 run_test 5 "LFSCK can handle IGIF object upgrading"
595 #define OBD_FAIL_LFSCK_DELAY1 0x1600
596 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
597 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
599 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
600 [ "$STATUS" == "scanning-phase1" ] ||
601 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
603 # Sleep 3 sec to guarantee at least one object processed by LFSCK
605 # Fail the LFSCK to guarantee there is at least one checkpoint
606 #define OBD_FAIL_LFSCK_FATAL1 0x1608
607 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
608 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
609 mdd.${MDT_DEV}.lfsck_namespace |
610 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
612 error "(4) unexpected status"
615 local POS0=$($SHOW_NAMESPACE |
616 awk '/^last_checkpoint_position/ { print $2 }' |
619 #define OBD_FAIL_LFSCK_DELAY1 0x1600
620 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
621 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
623 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
624 [ "$STATUS" == "scanning-phase1" ] ||
625 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
627 local POS1=$($SHOW_NAMESPACE |
628 awk '/^latest_start_position/ { print $2 }' |
630 [[ $POS0 -lt $POS1 ]] ||
631 error "(7) Expect larger than: $POS0, but got $POS1"
633 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
634 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
635 mdd.${MDT_DEV}.lfsck_namespace |
636 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
638 error "(8) unexpected status"
641 run_test 6a "LFSCK resumes from last checkpoint (1)"
646 #define OBD_FAIL_LFSCK_DELAY2 0x1601
647 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
648 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
650 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "scanning-phase1" ] ||
652 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
654 # Sleep 5 sec to guarantee that we are in the directory scanning
656 # Fail the LFSCK to guarantee there is at least one checkpoint
657 #define OBD_FAIL_LFSCK_FATAL2 0x1609
658 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
659 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
660 mdd.${MDT_DEV}.lfsck_namespace |
661 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
663 error "(4) unexpected status"
666 local O_POS0=$($SHOW_NAMESPACE |
667 awk '/^last_checkpoint_position/ { print $2 }' |
670 local D_POS0=$($SHOW_NAMESPACE |
671 awk '/^last_checkpoint_position/ { print $4 }')
673 #define OBD_FAIL_LFSCK_DELAY2 0x1601
674 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
675 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
677 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
678 [ "$STATUS" == "scanning-phase1" ] ||
679 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
681 local O_POS1=$($SHOW_NAMESPACE |
682 awk '/^latest_start_position/ { print $2 }' |
684 local D_POS1=$($SHOW_NAMESPACE |
685 awk '/^latest_start_position/ { print $4 }')
687 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
688 [[ $O_POS0 -lt $O_POS1 ]] ||
689 error "(7.1) $O_POS1 is not larger than $O_POS0"
691 [[ $D_POS0 -lt $D_POS1 ]] ||
692 error "(7.2) $D_POS1 is not larger than $D_POS0"
695 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
696 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
697 mdd.${MDT_DEV}.lfsck_namespace |
698 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
700 error "(8) unexpected status"
703 run_test 6b "LFSCK resumes from last checkpoint (2)"
710 #define OBD_FAIL_LFSCK_DELAY2 0x1601
711 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
712 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
714 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
715 [ "$STATUS" == "scanning-phase1" ] ||
716 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
718 # Sleep 3 sec to guarantee at least one object processed by LFSCK
720 echo "stop $SINGLEMDS"
721 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
723 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
724 echo "start $SINGLEMDS"
725 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
726 error "(5) Fail to start MDS!"
728 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
729 mdd.${MDT_DEV}.lfsck_namespace |
730 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
732 error "(6) unexpected status"
735 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
741 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
743 for ((i = 0; i < 20; i++)); do
744 touch $DIR/$tdir/dummy${i}
747 #define OBD_FAIL_LFSCK_DELAY3 0x1602
748 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
749 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
750 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
751 mdd.${MDT_DEV}.lfsck_namespace |
752 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
754 error "(4) unexpected status"
758 echo "stop $SINGLEMDS"
759 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
761 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
762 echo "start $SINGLEMDS"
763 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
764 error "(6) Fail to start MDS!"
766 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
767 mdd.${MDT_DEV}.lfsck_namespace |
768 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
770 error "(7) unexpected status"
773 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
778 formatall > /dev/null
784 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
785 [ "$STATUS" == "init" ] ||
786 error "(2) Expect 'init', but got '$STATUS'"
788 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
789 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
790 mkdir $DIR/$tdir/crashed
792 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
793 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
794 for ((i = 0; i < 5; i++)); do
795 touch $DIR/$tdir/dummy${i}
798 umount_client $MOUNT || error "(3) Fail to stop client!"
800 #define OBD_FAIL_LFSCK_DELAY2 0x1601
801 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
802 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
804 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
805 [ "$STATUS" == "scanning-phase1" ] ||
806 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
808 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
810 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
811 [ "$STATUS" == "stopped" ] ||
812 error "(7) Expect 'stopped', but got '$STATUS'"
814 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
816 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
817 [ "$STATUS" == "scanning-phase1" ] ||
818 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
820 #define OBD_FAIL_LFSCK_FATAL2 0x1609
821 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
822 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
823 mdd.${MDT_DEV}.lfsck_namespace |
824 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
826 error "(10) unexpected status"
829 #define OBD_FAIL_LFSCK_DELAY1 0x1600
830 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
831 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
833 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
834 [ "$STATUS" == "scanning-phase1" ] ||
835 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
837 #define OBD_FAIL_LFSCK_CRASH 0x160a
838 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
841 echo "stop $SINGLEMDS"
842 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
844 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
845 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
847 echo "start $SINGLEMDS"
848 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
849 error "(14) Fail to start MDS!"
851 local timeout=$(max_recovery_time)
854 while [ $timer -lt $timeout ]; do
855 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
856 mdt.${MDT_DEV}.recovery_status |
857 awk '/^status/ { print \\\$2 }'")
858 [ "$STATUS" != "RECOVERING" ] && break;
863 [ $timer != $timeout ] ||
864 error "(14.1) recovery timeout"
866 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
867 [ "$STATUS" == "crashed" ] ||
868 error "(15) Expect 'crashed', but got '$STATUS'"
870 #define OBD_FAIL_LFSCK_DELAY2 0x1601
871 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
872 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
874 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
875 [ "$STATUS" == "scanning-phase1" ] ||
876 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
878 echo "stop $SINGLEMDS"
879 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
881 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
882 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
884 echo "start $SINGLEMDS"
885 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
886 error "(19) Fail to start MDS!"
889 while [ $timer -lt $timeout ]; do
890 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
891 mdt.${MDT_DEV}.recovery_status |
892 awk '/^status/ { print \\\$2 }'")
893 [ "$STATUS" != "RECOVERING" ] && break;
898 [ $timer != $timeout ] ||
899 error "(19.1) recovery timeout"
901 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
902 [ "$STATUS" == "paused" ] ||
903 error "(20) Expect 'paused', but got '$STATUS'"
905 #define OBD_FAIL_LFSCK_DELAY3 0x1602
906 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
908 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
909 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
910 mdd.${MDT_DEV}.lfsck_namespace |
911 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
913 error "(22) unexpected status"
916 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
917 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
918 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
920 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
921 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
922 mdd.${MDT_DEV}.lfsck_namespace |
923 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
925 error "(24) unexpected status"
928 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
929 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
931 run_test 8 "LFSCK state machine"
934 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
935 skip "Testing on UP system, the speed may be inaccurate."
941 local BASE_SPEED1=100
943 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
946 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
947 [ "$STATUS" == "scanning-phase1" ] ||
948 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
950 local SPEED=$($SHOW_NAMESPACE |
951 awk '/^average_speed_phase1/ { print $2 }')
953 # There may be time error, normally it should be less than 2 seconds.
954 # We allow another 20% schedule error.
956 # MAX_MARGIN = 1.2 = 12 / 10
957 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
958 RUN_TIME1 * 12 / 10))
959 [ $SPEED -lt $MAX_SPEED ] ||
960 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
963 local BASE_SPEED2=300
965 do_facet $SINGLEMDS \
966 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
969 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
970 # MIN_MARGIN = 0.8 = 8 / 10
971 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
972 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
973 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
974 [ $SPEED -gt $MIN_SPEED ] || {
975 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
976 error_ignore LU-5624 \
977 "(5.1) Got speed $SPEED, expected more than $MIN_SPEED"
980 "(5.2) Got speed $SPEED, expected more than $MIN_SPEED"
984 # MAX_MARGIN = 1.2 = 12 / 10
985 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
986 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
987 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
988 [ $SPEED -lt $MAX_SPEED ] ||
989 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
991 do_facet $SINGLEMDS \
992 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
994 wait_update_facet $SINGLEMDS \
995 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
996 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
997 error "(7) Failed to get expected 'completed'"
999 run_test 9a "LFSCK speed control (1)"
1002 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1003 skip "Testing on UP system, the speed may be inaccurate."
1009 echo "Preparing another 50 * 50 files (with error) at $(date)."
1010 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1011 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1012 createmany -d $DIR/$tdir/d 50
1013 createmany -m $DIR/$tdir/f 50
1014 for ((i = 0; i < 50; i++)); do
1015 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
1018 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
1019 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
1020 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
1021 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1022 mdd.${MDT_DEV}.lfsck_namespace |
1023 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
1025 error "(5) unexpected status"
1028 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1029 echo "Prepared at $(date)."
1031 local BASE_SPEED1=50
1033 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
1036 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1037 [ "$STATUS" == "scanning-phase2" ] ||
1038 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
1040 local SPEED=$($SHOW_NAMESPACE |
1041 awk '/^average_speed_phase2/ { print $2 }')
1042 # There may be time error, normally it should be less than 2 seconds.
1043 # We allow another 20% schedule error.
1045 # MAX_MARGIN = 1.2 = 12 / 10
1046 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1047 RUN_TIME1 * 12 / 10))
1048 [ $SPEED -lt $MAX_SPEED ] ||
1049 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
1051 # adjust speed limit
1052 local BASE_SPEED2=150
1054 do_facet $SINGLEMDS \
1055 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1058 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
1059 # MIN_MARGIN = 0.8 = 8 / 10
1060 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1061 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1062 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
1063 [ $SPEED -gt $MIN_SPEED ] || {
1064 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
1065 error_ignore LU-5624 \
1066 "(9.1) Got speed $SPEED, expected more than $MIN_SPEED"
1069 "(9.2) Got speed $SPEED, expected more than $MIN_SPEED"
1073 # MAX_MARGIN = 1.2 = 12 / 10
1074 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1075 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1076 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
1077 [ $SPEED -lt $MAX_SPEED ] ||
1078 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
1080 do_facet $SINGLEMDS \
1081 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1082 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1083 mdd.${MDT_DEV}.lfsck_namespace |
1084 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1086 error "(11) unexpected status"
1089 run_test 9b "LFSCK speed control (2)"
1093 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
1094 skip "lookup(..)/linkea on ZFS issue" && return
1098 echo "Preparing more files with error at $(date)."
1099 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
1100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
1102 for ((i = 0; i < 1000; i = $((i+2)))); do
1103 mkdir -p $DIR/$tdir/d${i}
1104 touch $DIR/$tdir/f${i}
1105 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1108 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1109 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1111 for ((i = 1; i < 1000; i = $((i+2)))); do
1112 mkdir -p $DIR/$tdir/d${i}
1113 touch $DIR/$tdir/f${i}
1114 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1117 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1118 echo "Prepared at $(date)."
1120 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
1122 umount_client $MOUNT
1123 mount_client $MOUNT || error "(3) Fail to start client!"
1125 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
1128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1129 [ "$STATUS" == "scanning-phase1" ] ||
1130 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
1132 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
1134 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
1136 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
1138 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
1140 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
1142 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
1144 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
1146 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1147 error "(14) Fail to softlink!"
1149 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1150 [ "$STATUS" == "scanning-phase1" ] ||
1151 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1153 do_facet $SINGLEMDS \
1154 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1155 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1156 mdd.${MDT_DEV}.lfsck_namespace |
1157 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1159 error "(16) unexpected status"
1162 run_test 10 "System is available during LFSCK scanning"
1165 ost_remove_lastid() {
1168 local rcmd="do_facet ost${ost}"
1170 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1172 # step 1: local mount
1173 mount_fstype ost${ost} || return 1
1174 # step 2: remove the specified LAST_ID
1175 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1177 unmount_fstype ost${ost} || return 2
1181 check_mount_and_prep
1182 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1183 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1188 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1190 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1191 error "(2) Fail to start ost1"
1193 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1194 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1196 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1197 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1199 wait_update_facet ost1 "$LCTL get_param -n \
1200 obdfilter.${OST_DEV}.lfsck_layout |
1201 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1203 error "(5) unexpected status"
1206 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1208 wait_update_facet ost1 "$LCTL get_param -n \
1209 obdfilter.${OST_DEV}.lfsck_layout |
1210 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1212 error "(6) unexpected status"
1215 echo "the LAST_ID(s) should have been rebuilt"
1216 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1217 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1219 run_test 11a "LFSCK can rebuild lost last_id"
1222 check_mount_and_prep
1223 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1225 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1226 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1227 do_facet ost1 $LCTL set_param fail_loc=0x160d
1229 local count=$(precreated_ost_obj_count 0 0)
1231 createmany -o $DIR/$tdir/f $((count + 32))
1233 local proc_path="${FSNAME}-OST0000-osc-MDT0000"
1234 local seq=$(do_facet mds1 $LCTL get_param -n \
1235 osp.${proc_path}.prealloc_last_seq)
1236 local lastid1=$(do_facet ost1 "lctl get_param -n \
1237 obdfilter.${ost1_svc}.last_id" | grep $seq |
1238 awk -F: '{ print $2 }')
1240 umount_client $MOUNT
1241 stop ost1 || error "(1) Fail to stop ost1"
1243 #define OBD_FAIL_OST_ENOSPC 0x215
1244 do_facet ost1 $LCTL set_param fail_loc=0x215
1246 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1247 error "(2) Fail to start ost1"
1249 for ((i = 0; i < 60; i++)); do
1250 lastid2=$(do_facet ost1 "lctl get_param -n \
1251 obdfilter.${ost1_svc}.last_id" | grep $seq |
1252 awk -F: '{ print $2 }')
1253 [ ! -z $lastid2 ] && break;
1257 echo "the on-disk LAST_ID should be smaller than the expected one"
1258 [ $lastid1 -gt $lastid2 ] ||
1259 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1261 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1262 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1264 wait_update_facet ost1 "$LCTL get_param -n \
1265 obdfilter.${OST_DEV}.lfsck_layout |
1266 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1268 error "(6) unexpected status"
1271 stop ost1 || error "(7) Fail to stop ost1"
1273 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1274 error "(8) Fail to start ost1"
1276 echo "the on-disk LAST_ID should have been rebuilt"
1277 wait_update_facet ost1 "$LCTL get_param -n \
1278 obdfilter.${ost1_svc}.last_id | grep $seq |
1279 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1280 do_facet ost1 $LCTL get_param -n \
1281 obdfilter.${ost1_svc}.last_id
1282 error "(9) expect lastid1 $seq:$lastid1"
1285 do_facet ost1 $LCTL set_param fail_loc=0
1286 stopall || error "(10) Fail to stopall"
1288 run_test 11b "LFSCK can rebuild crashed last_id"
1291 [ $MDSCOUNT -lt 2 ] &&
1292 skip "We need at least 2 MDSes for test_12" && return
1294 check_mount_and_prep
1295 for k in $(seq $MDSCOUNT); do
1296 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1297 createmany -o $DIR/$tdir/${k}/f 100 ||
1298 error "(0) Fail to create 100 files."
1301 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1302 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1303 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1305 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1306 for k in $(seq $MDSCOUNT); do
1307 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1308 mdd.$(facet_svc mds${k}).lfsck_namespace |
1309 awk '/^status/ { print $2 }')
1310 [ "$STATUS" == "scanning-phase1" ] ||
1311 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1314 echo "Stop namespace LFSCK on all targets by single lctl command."
1315 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1316 error "(4) Fail to stop LFSCK on all devices!"
1318 echo "All the LFSCK targets should be in 'stopped' status."
1319 for k in $(seq $MDSCOUNT); do
1320 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1321 mdd.$(facet_svc mds${k}).lfsck_namespace |
1322 awk '/^status/ { print $2 }')
1323 [ "$STATUS" == "stopped" ] ||
1324 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1327 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1328 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1329 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1331 echo "All the LFSCK targets should be in 'completed' status."
1332 for k in $(seq $MDSCOUNT); do
1333 wait_update_facet mds${k} "$LCTL get_param -n \
1334 mdd.$(facet_svc mds${k}).lfsck_namespace |
1335 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1336 error "(7) MDS${k} is not the expected 'completed'"
1339 start_full_debug_logging
1341 echo "Start layout LFSCK on all targets by single command (-s 1)."
1342 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1343 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1345 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1346 for k in $(seq $MDSCOUNT); do
1347 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1348 mdd.$(facet_svc mds${k}).lfsck_layout |
1349 awk '/^status/ { print $2 }')
1350 [ "$STATUS" == "scanning-phase1" ] ||
1351 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1354 echo "Stop layout LFSCK on all targets by single lctl command."
1355 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1356 error "(10) Fail to stop LFSCK on all devices!"
1358 echo "All the LFSCK targets should be in 'stopped' status."
1359 for k in $(seq $MDSCOUNT); do
1360 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1361 mdd.$(facet_svc mds${k}).lfsck_layout |
1362 awk '/^status/ { print $2 }')
1363 [ "$STATUS" == "stopped" ] ||
1364 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1367 for k in $(seq $OSTCOUNT); do
1368 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1369 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1370 awk '/^status/ { print $2 }')
1371 [ "$STATUS" == "stopped" ] ||
1372 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1375 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1376 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1377 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1379 echo "All the LFSCK targets should be in 'completed' status."
1380 for k in $(seq $MDSCOUNT); do
1381 # The LFSCK status query internal is 30 seconds. For the case
1382 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1383 # time to guarantee the status sync up.
1384 wait_update_facet mds${k} "$LCTL get_param -n \
1385 mdd.$(facet_svc mds${k}).lfsck_layout |
1386 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1387 error "(14) MDS${k} is not the expected 'completed'"
1390 stop_full_debug_logging
1392 run_test 12 "single command to trigger LFSCK on all devices"
1396 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1397 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1398 echo "MDT-object FID."
1401 check_mount_and_prep
1403 echo "Inject failure stub to simulate bad lmm_oi"
1404 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1406 createmany -o $DIR/$tdir/f 32
1407 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1409 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1410 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1412 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1413 mdd.${MDT_DEV}.lfsck_layout |
1414 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1416 error "(2) unexpected status"
1419 local repaired=$($SHOW_LAYOUT |
1420 awk '/^repaired_others/ { print $2 }')
1421 [ $repaired -eq 32 ] ||
1422 error "(3) Fail to repair crashed lmm_oi: $repaired"
1424 run_test 13 "LFSCK can repair crashed lmm_oi"
1428 echo "The OST-object referenced by the MDT-object should be there;"
1429 echo "otherwise, the LFSCK should re-create the missing OST-object."
1432 check_mount_and_prep
1433 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1435 echo "Inject failure stub to simulate dangling referenced MDT-object"
1436 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1437 do_facet ost1 $LCTL set_param fail_loc=0x1610
1438 local count=$(precreated_ost_obj_count 0 0)
1440 createmany -o $DIR/$tdir/f $((count + 31))
1441 touch $DIR/$tdir/guard
1442 do_facet ost1 $LCTL set_param fail_loc=0
1444 start_full_debug_logging
1446 # exhaust other pre-created dangling cases
1447 count=$(precreated_ost_obj_count 0 0)
1448 createmany -o $DIR/$tdir/a $count ||
1449 error "(0) Fail to create $count files."
1451 echo "'ls' should fail because of dangling referenced MDT-object"
1452 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1454 echo "Trigger layout LFSCK to find out dangling reference"
1455 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1457 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1458 mdd.${MDT_DEV}.lfsck_layout |
1459 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1461 error "(3) unexpected status"
1464 local repaired=$($SHOW_LAYOUT |
1465 awk '/^repaired_dangling/ { print $2 }')
1466 [ $repaired -ge 32 ] ||
1467 error "(4) Fail to repair dangling reference: $repaired"
1469 echo "'stat' should fail because of not repair dangling by default"
1470 stat $DIR/$tdir/guard > /dev/null 2>&1 && error "(5) stat should fail"
1472 echo "Trigger layout LFSCK to repair dangling reference"
1473 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1475 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1476 mdd.${MDT_DEV}.lfsck_layout |
1477 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1479 error "(7) unexpected status"
1482 # There may be some async LFSCK updates in processing, wait for
1483 # a while until the target reparation has been done. LU-4970.
1485 echo "'stat' should success after layout LFSCK repairing"
1486 wait_update_facet client "stat $DIR/$tdir/guard |
1487 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1488 stat $DIR/$tdir/guard
1490 error "(8) unexpected size"
1493 repaired=$($SHOW_LAYOUT |
1494 awk '/^repaired_dangling/ { print $2 }')
1495 [ $repaired -ge 32 ] ||
1496 error "(9) Fail to repair dangling reference: $repaired"
1498 stop_full_debug_logging
1500 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1504 echo "If the OST-object referenced by the MDT-object back points"
1505 echo "to some non-exist MDT-object, then the LFSCK should repair"
1506 echo "the OST-object to back point to the right MDT-object."
1509 check_mount_and_prep
1510 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1512 echo "Inject failure stub to make the OST-object to back point to"
1513 echo "non-exist MDT-object."
1514 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1516 do_facet ost1 $LCTL set_param fail_loc=0x1611
1517 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1518 cancel_lru_locks osc
1519 do_facet ost1 $LCTL set_param fail_loc=0
1521 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1522 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1524 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1525 mdd.${MDT_DEV}.lfsck_layout |
1526 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1528 error "(2) unexpected status"
1531 local repaired=$($SHOW_LAYOUT |
1532 awk '/^repaired_unmatched_pair/ { print $2 }')
1533 [ $repaired -eq 1 ] ||
1534 error "(3) Fail to repair unmatched pair: $repaired"
1536 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1540 echo "If the OST-object referenced by the MDT-object back points"
1541 echo "to other MDT-object that doesn't recognize the OST-object,"
1542 echo "then the LFSCK should repair it to back point to the right"
1543 echo "MDT-object (the first one)."
1546 check_mount_and_prep
1547 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1548 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1549 cancel_lru_locks osc
1551 echo "Inject failure stub to make the OST-object to back point to"
1552 echo "other MDT-object"
1554 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1555 do_facet ost1 $LCTL set_param fail_loc=0x1612
1556 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1557 cancel_lru_locks osc
1558 do_facet ost1 $LCTL set_param fail_loc=0
1560 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1561 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1563 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1564 mdd.${MDT_DEV}.lfsck_layout |
1565 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1567 error "(2) unexpected status"
1570 local repaired=$($SHOW_LAYOUT |
1571 awk '/^repaired_unmatched_pair/ { print $2 }')
1572 [ $repaired -eq 1 ] ||
1573 error "(3) Fail to repair unmatched pair: $repaired"
1575 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1579 echo "If the OST-object's owner information does not match the owner"
1580 echo "information stored in the MDT-object, then the LFSCK trust the"
1581 echo "MDT-object and update the OST-object's owner information."
1584 check_mount_and_prep
1585 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1586 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1587 cancel_lru_locks osc
1589 echo "Inject failure stub to skip OST-object owner changing"
1590 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1591 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1592 chown 1.1 $DIR/$tdir/f0
1593 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1595 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1598 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1600 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1601 mdd.${MDT_DEV}.lfsck_layout |
1602 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1604 error "(2) unexpected status"
1607 local repaired=$($SHOW_LAYOUT |
1608 awk '/^repaired_inconsistent_owner/ { print $2 }')
1609 [ $repaired -eq 1 ] ||
1610 error "(3) Fail to repair inconsistent owner: $repaired"
1612 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1616 echo "If more than one MDT-objects reference the same OST-object,"
1617 echo "and the OST-object only recognizes one MDT-object, then the"
1618 echo "LFSCK should create new OST-objects for such non-recognized"
1622 check_mount_and_prep
1623 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1625 echo "Inject failure stub to make two MDT-objects to refernce"
1626 echo "the OST-object"
1628 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1629 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1631 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1632 cancel_lru_locks osc
1634 createmany -o $DIR/$tdir/f 1
1636 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1638 cancel_lru_locks mdc
1639 cancel_lru_locks osc
1641 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1642 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1643 [ $size -eq 1048576 ] ||
1644 error "(1) f0 (wrong) size should be 1048576, but got $size"
1646 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1649 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1651 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1652 mdd.${MDT_DEV}.lfsck_layout |
1653 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1655 error "(3) unexpected status"
1658 local repaired=$($SHOW_LAYOUT |
1659 awk '/^repaired_multiple_referenced/ { print $2 }')
1660 [ $repaired -eq 1 ] ||
1661 error "(4) Fail to repair multiple references: $repaired"
1663 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1664 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1665 error "(5) Fail to write f0."
1666 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1667 [ $size -eq 1048576 ] ||
1668 error "(6) guard size should be 1048576, but got $size"
1670 run_test 17 "LFSCK can repair multiple references"
1672 $LCTL set_param debug=+cache > /dev/null
1676 echo "The target MDT-object is there, but related stripe information"
1677 echo "is lost or partly lost. The LFSCK should regenerate the missing"
1678 echo "layout EA entries."
1681 check_mount_and_prep
1682 $LFS mkdir -i 0 $DIR/$tdir/a1
1683 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1684 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1686 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1688 $LFS path2fid $DIR/$tdir/a1/f1
1689 $LFS getstripe $DIR/$tdir/a1/f1
1691 if [ $MDSCOUNT -ge 2 ]; then
1692 $LFS mkdir -i 1 $DIR/$tdir/a2
1693 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1694 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1695 $LFS path2fid $DIR/$tdir/a2/f2
1696 $LFS getstripe $DIR/$tdir/a2/f2
1699 cancel_lru_locks osc
1701 echo "Inject failure, to make the MDT-object lost its layout EA"
1702 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1703 do_facet mds1 $LCTL set_param fail_loc=0x1615
1704 chown 1.1 $DIR/$tdir/a1/f1
1706 if [ $MDSCOUNT -ge 2 ]; then
1707 do_facet mds2 $LCTL set_param fail_loc=0x1615
1708 chown 1.1 $DIR/$tdir/a2/f2
1714 do_facet mds1 $LCTL set_param fail_loc=0
1715 if [ $MDSCOUNT -ge 2 ]; then
1716 do_facet mds2 $LCTL set_param fail_loc=0
1719 cancel_lru_locks mdc
1720 cancel_lru_locks osc
1722 echo "The file size should be incorrect since layout EA is lost"
1723 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1724 [ "$cur_size" != "$saved_size" ] ||
1725 error "(1) Expect incorrect file1 size"
1727 if [ $MDSCOUNT -ge 2 ]; then
1728 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1729 [ "$cur_size" != "$saved_size" ] ||
1730 error "(2) Expect incorrect file2 size"
1733 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1734 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1736 for k in $(seq $MDSCOUNT); do
1737 # The LFSCK status query internal is 30 seconds. For the case
1738 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1739 # time to guarantee the status sync up.
1740 wait_update_facet mds${k} "$LCTL get_param -n \
1741 mdd.$(facet_svc mds${k}).lfsck_layout |
1742 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1743 error "(4) MDS${k} is not the expected 'completed'"
1746 for k in $(seq $OSTCOUNT); do
1747 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1748 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1749 awk '/^status/ { print $2 }')
1750 [ "$cur_status" == "completed" ] ||
1751 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1754 local repaired=$(do_facet mds1 $LCTL get_param -n \
1755 mdd.$(facet_svc mds1).lfsck_layout |
1756 awk '/^repaired_orphan/ { print $2 }')
1757 [ $repaired -eq 1 ] ||
1758 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1760 if [ $MDSCOUNT -ge 2 ]; then
1761 repaired=$(do_facet mds2 $LCTL get_param -n \
1762 mdd.$(facet_svc mds2).lfsck_layout |
1763 awk '/^repaired_orphan/ { print $2 }')
1764 [ $repaired -eq 2 ] ||
1765 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1768 $LFS path2fid $DIR/$tdir/a1/f1
1769 $LFS getstripe $DIR/$tdir/a1/f1
1771 if [ $MDSCOUNT -ge 2 ]; then
1772 $LFS path2fid $DIR/$tdir/a2/f2
1773 $LFS getstripe $DIR/$tdir/a2/f2
1776 echo "The file size should be correct after layout LFSCK scanning"
1777 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1778 [ "$cur_size" == "$saved_size" ] ||
1779 error "(7) Expect file1 size $saved_size, but got $cur_size"
1781 if [ $MDSCOUNT -ge 2 ]; then
1782 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1783 [ "$cur_size" == "$saved_size" ] ||
1784 error "(8) Expect file2 size $saved_size, but got $cur_size"
1787 run_test 18a "Find out orphan OST-object and repair it (1)"
1791 echo "The target MDT-object is lost. The LFSCK should re-create the"
1792 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1793 echo "can move it back to normal namespace manually."
1796 check_mount_and_prep
1797 $LFS mkdir -i 0 $DIR/$tdir/a1
1798 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1799 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1800 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1801 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1803 $LFS getstripe $DIR/$tdir/a1/f1
1805 if [ $MDSCOUNT -ge 2 ]; then
1806 $LFS mkdir -i 1 $DIR/$tdir/a2
1807 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1808 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1809 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1811 $LFS getstripe $DIR/$tdir/a2/f2
1814 cancel_lru_locks osc
1816 echo "Inject failure, to simulate the case of missing the MDT-object"
1817 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1818 do_facet mds1 $LCTL set_param fail_loc=0x1616
1819 rm -f $DIR/$tdir/a1/f1
1821 if [ $MDSCOUNT -ge 2 ]; then
1822 do_facet mds2 $LCTL set_param fail_loc=0x1616
1823 rm -f $DIR/$tdir/a2/f2
1829 do_facet mds1 $LCTL set_param fail_loc=0
1830 if [ $MDSCOUNT -ge 2 ]; then
1831 do_facet mds2 $LCTL set_param fail_loc=0
1834 cancel_lru_locks mdc
1835 cancel_lru_locks osc
1837 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1838 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1840 for k in $(seq $MDSCOUNT); do
1841 # The LFSCK status query internal is 30 seconds. For the case
1842 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1843 # time to guarantee the status sync up.
1844 wait_update_facet mds${k} "$LCTL get_param -n \
1845 mdd.$(facet_svc mds${k}).lfsck_layout |
1846 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1847 error "(2) MDS${k} is not the expected 'completed'"
1850 for k in $(seq $OSTCOUNT); do
1851 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1852 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1853 awk '/^status/ { print $2 }')
1854 [ "$cur_status" == "completed" ] ||
1855 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1858 local repaired=$(do_facet mds1 $LCTL get_param -n \
1859 mdd.$(facet_svc mds1).lfsck_layout |
1860 awk '/^repaired_orphan/ { print $2 }')
1861 [ $repaired -eq 1 ] ||
1862 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1864 if [ $MDSCOUNT -ge 2 ]; then
1865 repaired=$(do_facet mds2 $LCTL get_param -n \
1866 mdd.$(facet_svc mds2).lfsck_layout |
1867 awk '/^repaired_orphan/ { print $2 }')
1868 [ $repaired -eq 2 ] ||
1869 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1872 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1873 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1874 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1876 if [ $MDSCOUNT -ge 2 ]; then
1877 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1878 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1881 $LFS path2fid $DIR/$tdir/a1/f1
1882 $LFS getstripe $DIR/$tdir/a1/f1
1884 if [ $MDSCOUNT -ge 2 ]; then
1885 $LFS path2fid $DIR/$tdir/a2/f2
1886 $LFS getstripe $DIR/$tdir/a2/f2
1889 echo "The file size should be correct after layout LFSCK scanning"
1890 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1891 [ "$cur_size" == "$saved_size" ] ||
1892 error "(7) Expect file1 size $saved_size, but got $cur_size"
1894 if [ $MDSCOUNT -ge 2 ]; then
1895 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1896 [ "$cur_size" == "$saved_size" ] ||
1897 error "(8) Expect file2 size $saved_size, but got $cur_size"
1900 run_test 18b "Find out orphan OST-object and repair it (2)"
1904 echo "The target MDT-object is lost, and the OST-object FID is missing."
1905 echo "The LFSCK should re-create the MDT-object with new FID under the "
1906 echo "directory .lustre/lost+found/MDTxxxx."
1909 check_mount_and_prep
1910 $LFS mkdir -i 0 $DIR/$tdir/a1
1911 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1913 echo "Inject failure, to simulate the case of missing parent FID"
1914 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1915 do_facet ost1 $LCTL set_param fail_loc=0x1617
1917 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1918 $LFS getstripe $DIR/$tdir/a1/f1
1920 if [ $MDSCOUNT -ge 2 ]; then
1921 $LFS mkdir -i 1 $DIR/$tdir/a2
1922 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a2
1923 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1924 $LFS getstripe $DIR/$tdir/a2/f2
1927 cancel_lru_locks osc
1929 echo "Inject failure, to simulate the case of missing the MDT-object"
1930 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1931 do_facet mds1 $LCTL set_param fail_loc=0x1616
1932 rm -f $DIR/$tdir/a1/f1
1934 if [ $MDSCOUNT -ge 2 ]; then
1935 do_facet mds2 $LCTL set_param fail_loc=0x1616
1936 rm -f $DIR/$tdir/a2/f2
1942 do_facet mds1 $LCTL set_param fail_loc=0
1943 if [ $MDSCOUNT -ge 2 ]; then
1944 do_facet mds2 $LCTL set_param fail_loc=0
1947 cancel_lru_locks mdc
1948 cancel_lru_locks osc
1950 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1951 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1953 for k in $(seq $MDSCOUNT); do
1954 # The LFSCK status query internal is 30 seconds. For the case
1955 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1956 # time to guarantee the status sync up.
1957 wait_update_facet mds${k} "$LCTL get_param -n \
1958 mdd.$(facet_svc mds${k}).lfsck_layout |
1959 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1960 error "(2) MDS${k} is not the expected 'completed'"
1963 for k in $(seq $OSTCOUNT); do
1964 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1965 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1966 awk '/^status/ { print $2 }')
1967 [ "$cur_status" == "completed" ] ||
1968 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1971 if [ $MDSCOUNT -ge 2 ]; then
1977 local repaired=$(do_facet mds1 $LCTL get_param -n \
1978 mdd.$(facet_svc mds1).lfsck_layout |
1979 awk '/^repaired_orphan/ { print $2 }')
1980 [ $repaired -eq $expected ] ||
1981 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1983 if [ $MDSCOUNT -ge 2 ]; then
1984 repaired=$(do_facet mds2 $LCTL get_param -n \
1985 mdd.$(facet_svc mds2).lfsck_layout |
1986 awk '/^repaired_orphan/ { print $2 }')
1987 [ $repaired -eq 0 ] ||
1988 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1991 ls -ail $MOUNT/.lustre/lost+found/
1993 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
1994 if [ -d $MOUNT/.lustre/lost+found/MDT0001 ]; then
1995 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-N-*)
1997 error "(6) .lustre/lost+found/MDT0001/ should be empty"
2000 echo "There should be some stub under .lustre/lost+found/MDT0000/"
2001 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2002 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2004 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-N-*)
2005 [ ! -z "$cname" ] ||
2006 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2008 run_test 18c "Find out orphan OST-object and repair it (3)"
2012 echo "The target MDT-object layout EA slot is occpuied by some new"
2013 echo "created OST-object when repair dangling reference case. Such"
2014 echo "conflict OST-object has never been modified. Then when found"
2015 echo "the orphan OST-object, LFSCK will replace it with the orphan"
2019 check_mount_and_prep
2021 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2022 echo "guard" > $DIR/$tdir/a1/f1
2023 echo "foo" > $DIR/$tdir/a1/f2
2024 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2025 $LFS path2fid $DIR/$tdir/a1/f1
2026 $LFS getstripe $DIR/$tdir/a1/f1
2027 $LFS path2fid $DIR/$tdir/a1/f2
2028 $LFS getstripe $DIR/$tdir/a1/f2
2029 cancel_lru_locks osc
2031 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2032 echo "to reference the same OST-object (which is f1's OST-obejct)."
2033 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2034 echo "dangling reference case, but f2's old OST-object is there."
2037 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2038 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2039 chown 1.1 $DIR/$tdir/a1/f2
2040 rm -f $DIR/$tdir/a1/f1
2043 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2045 echo "stopall to cleanup object cache"
2048 setupall > /dev/null
2050 echo "The file size should be incorrect since dangling referenced"
2051 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2052 [ "$cur_size" != "$saved_size" ] ||
2053 error "(1) Expect incorrect file2 size"
2055 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2056 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
2058 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2059 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2061 wait_update_facet mds1 "$LCTL get_param -n \
2062 mdd.$(facet_svc mds1).lfsck_layout |
2063 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2064 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
2066 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2068 for k in $(seq $MDSCOUNT); do
2069 # The LFSCK status query internal is 30 seconds. For the case
2070 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2071 # time to guarantee the status sync up.
2072 wait_update_facet mds${k} "$LCTL get_param -n \
2073 mdd.$(facet_svc mds${k}).lfsck_layout |
2074 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2075 error "(3) MDS${k} is not the expected 'completed'"
2078 for k in $(seq $OSTCOUNT); do
2079 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2080 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2081 awk '/^status/ { print $2 }')
2082 [ "$cur_status" == "completed" ] ||
2083 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
2086 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2087 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2088 awk '/^repaired_orphan/ { print $2 }')
2089 [ $repaired -eq 1 ] ||
2090 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
2092 echo "The file size should be correct after layout LFSCK scanning"
2093 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2094 [ "$cur_size" == "$saved_size" ] ||
2095 error "(6) Expect file2 size $saved_size, but got $cur_size"
2097 echo "The LFSCK should find back the original data."
2098 cat $DIR/$tdir/a1/f2
2099 $LFS path2fid $DIR/$tdir/a1/f2
2100 $LFS getstripe $DIR/$tdir/a1/f2
2102 run_test 18d "Find out orphan OST-object and repair it (4)"
2106 echo "The target MDT-object layout EA slot is occpuied by some new"
2107 echo "created OST-object when repair dangling reference case. Such"
2108 echo "conflict OST-object has been modified by others. To keep the"
2109 echo "new data, the LFSCK will create a new file to refernece this"
2110 echo "old orphan OST-object."
2113 check_mount_and_prep
2115 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2116 echo "guard" > $DIR/$tdir/a1/f1
2117 echo "foo" > $DIR/$tdir/a1/f2
2118 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2119 $LFS path2fid $DIR/$tdir/a1/f1
2120 $LFS getstripe $DIR/$tdir/a1/f1
2121 $LFS path2fid $DIR/$tdir/a1/f2
2122 $LFS getstripe $DIR/$tdir/a1/f2
2123 cancel_lru_locks osc
2125 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2126 echo "to reference the same OST-object (which is f1's OST-obejct)."
2127 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2128 echo "dangling reference case, but f2's old OST-object is there."
2131 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2132 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2133 chown 1.1 $DIR/$tdir/a1/f2
2134 rm -f $DIR/$tdir/a1/f1
2137 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2139 echo "stopall to cleanup object cache"
2142 setupall > /dev/null
2144 echo "The file size should be incorrect since dangling referenced"
2145 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2146 [ "$cur_size" != "$saved_size" ] ||
2147 error "(1) Expect incorrect file2 size"
2149 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2150 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
2152 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2153 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2155 wait_update_facet mds1 "$LCTL get_param -n \
2156 mdd.$(facet_svc mds1).lfsck_layout |
2157 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2158 error "(3) MDS1 is not the expected 'scanning-phase2'"
2160 # to guarantee all updates are synced.
2164 echo "Write new data to f2 to modify the new created OST-object."
2165 echo "dummy" >> $DIR/$tdir/a1/f2
2167 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2169 for k in $(seq $MDSCOUNT); do
2170 # The LFSCK status query internal is 30 seconds. For the case
2171 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2172 # time to guarantee the status sync up.
2173 wait_update_facet mds${k} "$LCTL get_param -n \
2174 mdd.$(facet_svc mds${k}).lfsck_layout |
2175 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2176 error "(4) MDS${k} is not the expected 'completed'"
2179 for k in $(seq $OSTCOUNT); do
2180 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2181 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2182 awk '/^status/ { print $2 }')
2183 [ "$cur_status" == "completed" ] ||
2184 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2187 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2188 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2189 awk '/^repaired_orphan/ { print $2 }')
2190 [ $repaired -eq 1 ] ||
2191 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2193 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2194 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2195 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2197 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-*)
2198 [ ! -z "$cname" ] ||
2199 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2201 echo "The stub file should keep the original f2 data"
2202 cur_size=$(ls -il $cname | awk '{ print $6 }')
2203 [ "$cur_size" == "$saved_size" ] ||
2204 error "(9) Expect file2 size $saved_size, but got $cur_size"
2207 $LFS path2fid $cname
2208 $LFS getstripe $cname
2210 echo "The f2 should contains new data."
2211 cat $DIR/$tdir/a1/f2
2212 $LFS path2fid $DIR/$tdir/a1/f2
2213 $LFS getstripe $DIR/$tdir/a1/f2
2215 run_test 18e "Find out orphan OST-object and repair it (5)"
2218 [ $OSTCOUNT -lt 2 ] &&
2219 skip "The test needs at least 2 OSTs" && return
2222 echo "The target MDT-object is lost. The LFSCK should re-create the"
2223 echo "MDT-object under .lustre/lost+found/MDTxxxx. If some OST fail"
2224 echo "to verify some OST-object(s) during the first stage-scanning,"
2225 echo "the LFSCK should skip orphan OST-objects for such OST. Others"
2226 echo "should not be affected."
2229 check_mount_and_prep
2230 $LFS mkdir -i 0 $DIR/$tdir/a1
2231 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2232 dd if=/dev/zero of=$DIR/$tdir/a1/guard bs=1M count=2
2233 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2234 $LFS mkdir -i 0 $DIR/$tdir/a2
2235 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a2
2236 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2237 $LFS getstripe $DIR/$tdir/a1/f1
2238 $LFS getstripe $DIR/$tdir/a2/f2
2240 if [ $MDSCOUNT -ge 2 ]; then
2241 $LFS mkdir -i 1 $DIR/$tdir/a3
2242 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a3
2243 dd if=/dev/zero of=$DIR/$tdir/a3/guard bs=1M count=2
2244 dd if=/dev/zero of=$DIR/$tdir/a3/f3 bs=1M count=2
2245 $LFS mkdir -i 1 $DIR/$tdir/a4
2246 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a4
2247 dd if=/dev/zero of=$DIR/$tdir/a4/f4 bs=1M count=2
2248 $LFS getstripe $DIR/$tdir/a3/f3
2249 $LFS getstripe $DIR/$tdir/a4/f4
2252 cancel_lru_locks osc
2254 echo "Inject failure, to simulate the case of missing the MDT-object"
2255 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2256 do_facet mds1 $LCTL set_param fail_loc=0x1616
2257 rm -f $DIR/$tdir/a1/f1
2258 rm -f $DIR/$tdir/a2/f2
2260 if [ $MDSCOUNT -ge 2 ]; then
2261 do_facet mds2 $LCTL set_param fail_loc=0x1616
2262 rm -f $DIR/$tdir/a3/f3
2263 rm -f $DIR/$tdir/a4/f4
2269 do_facet mds1 $LCTL set_param fail_loc=0
2270 if [ $MDSCOUNT -ge 2 ]; then
2271 do_facet mds2 $LCTL set_param fail_loc=0
2274 cancel_lru_locks mdc
2275 cancel_lru_locks osc
2277 echo "Inject failure, to simulate the OST0 fail to handle"
2278 echo "MDT0 LFSCK request during the first-stage scanning."
2279 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
2280 do_facet mds1 $LCTL set_param fail_loc=0x161c fail_val=0
2282 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2283 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2285 for k in $(seq $MDSCOUNT); do
2286 # The LFSCK status query internal is 30 seconds. For the case
2287 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2288 # time to guarantee the status sync up.
2289 wait_update_facet mds${k} "$LCTL get_param -n \
2290 mdd.$(facet_svc mds${k}).lfsck_layout |
2291 awk '/^status/ { print \\\$2 }'" "partial" $LTIME ||
2292 error "(2) MDS${k} is not the expected 'partial'"
2295 wait_update_facet ost1 "$LCTL get_param -n \
2296 obdfilter.$(facet_svc ost1).lfsck_layout |
2297 awk '/^status/ { print \\\$2 }'" "partial" $LTIME || {
2298 error "(3) OST1 is not the expected 'partial'"
2301 wait_update_facet ost2 "$LCTL get_param -n \
2302 obdfilter.$(facet_svc ost2).lfsck_layout |
2303 awk '/^status/ { print \\\$2 }'" "completed" $LTIME || {
2304 error "(4) OST2 is not the expected 'completed'"
2307 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2309 local repaired=$(do_facet mds1 $LCTL get_param -n \
2310 mdd.$(facet_svc mds1).lfsck_layout |
2311 awk '/^repaired_orphan/ { print $2 }')
2312 [ $repaired -eq 1 ] ||
2313 error "(5) Expect 1 fixed on mds{1}, but got: $repaired"
2315 if [ $MDSCOUNT -ge 2 ]; then
2316 repaired=$(do_facet mds2 $LCTL get_param -n \
2317 mdd.$(facet_svc mds2).lfsck_layout |
2318 awk '/^repaired_orphan/ { print $2 }')
2319 [ $repaired -eq 1 ] ||
2320 error "(6) Expect 1 fixed on mds{2}, but got: $repaired"
2323 echo "Trigger layout LFSCK on all devices again to cleanup"
2324 $START_LAYOUT -r -o || error "(7) Fail to start LFSCK for layout!"
2326 for k in $(seq $MDSCOUNT); do
2327 # The LFSCK status query internal is 30 seconds. For the case
2328 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2329 # time to guarantee the status sync up.
2330 wait_update_facet mds${k} "$LCTL get_param -n \
2331 mdd.$(facet_svc mds${k}).lfsck_layout |
2332 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2333 error "(8) MDS${k} is not the expected 'completed'"
2336 for k in $(seq $OSTCOUNT); do
2337 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2338 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2339 awk '/^status/ { print $2 }')
2340 [ "$cur_status" == "completed" ] ||
2341 error "(9) OST${k} Expect 'completed', but got '$cur_status'"
2345 local repaired=$(do_facet mds1 $LCTL get_param -n \
2346 mdd.$(facet_svc mds1).lfsck_layout |
2347 awk '/^repaired_orphan/ { print $2 }')
2348 [ $repaired -eq 2 ] ||
2349 error "(10) Expect 2 fixed on mds{1}, but got: $repaired"
2351 if [ $MDSCOUNT -ge 2 ]; then
2352 repaired=$(do_facet mds2 $LCTL get_param -n \
2353 mdd.$(facet_svc mds2).lfsck_layout |
2354 awk '/^repaired_orphan/ { print $2 }')
2355 [ $repaired -eq 2 ] ||
2356 error "(11) Expect 2 fixed on mds{2}, but got: $repaired"
2359 run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
2361 $LCTL set_param debug=-cache > /dev/null
2364 check_mount_and_prep
2365 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2367 echo "foo" > $DIR/$tdir/a0
2368 echo "guard" > $DIR/$tdir/a1
2369 cancel_lru_locks osc
2371 echo "Inject failure, then client will offer wrong parent FID when read"
2372 do_facet ost1 $LCTL set_param -n \
2373 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2374 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2375 $LCTL set_param fail_loc=0x1619
2377 echo "Read RPC with wrong parent FID should be denied"
2378 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2379 $LCTL set_param fail_loc=0
2381 run_test 19a "OST-object inconsistency self detect"
2384 check_mount_and_prep
2385 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2387 echo "Inject failure stub to make the OST-object to back point to"
2388 echo "non-exist MDT-object"
2390 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2391 do_facet ost1 $LCTL set_param fail_loc=0x1611
2392 echo "foo" > $DIR/$tdir/f0
2393 cancel_lru_locks osc
2394 do_facet ost1 $LCTL set_param fail_loc=0
2396 echo "Nothing should be fixed since self detect and repair is disabled"
2397 local repaired=$(do_facet ost1 $LCTL get_param -n \
2398 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2399 awk '/^repaired/ { print $2 }')
2400 [ $repaired -eq 0 ] ||
2401 error "(1) Expected 0 repaired, but got $repaired"
2403 echo "Read RPC with right parent FID should be accepted,"
2404 echo "and cause parent FID on OST to be fixed"
2406 do_facet ost1 $LCTL set_param -n \
2407 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2408 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2410 repaired=$(do_facet ost1 $LCTL get_param -n \
2411 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2412 awk '/^repaired/ { print $2 }')
2413 [ $repaired -eq 1 ] ||
2414 error "(3) Expected 1 repaired, but got $repaired"
2416 run_test 19b "OST-object inconsistency self repair"
2419 [ $OSTCOUNT -lt 2 ] &&
2420 skip "The test needs at least 2 OSTs" && return
2423 echo "The target MDT-object and some of its OST-object are lost."
2424 echo "The LFSCK should find out the left OST-objects and re-create"
2425 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2426 echo "with the partial OST-objects (LOV EA hole)."
2428 echo "New client can access the file with LOV EA hole via normal"
2429 echo "system tools or commands without crash the system."
2431 echo "For old client, even though it cannot access the file with"
2432 echo "LOV EA hole, it should not cause the system crash."
2435 check_mount_and_prep
2436 $LFS mkdir -i 0 $DIR/$tdir/a1
2437 if [ $OSTCOUNT -gt 2 ]; then
2438 $LFS setstripe -c 3 -i 0 -S 1M $DIR/$tdir/a1
2441 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a1
2445 # 256 blocks on the stripe0.
2446 # 1 block on the stripe1 for 2 OSTs case.
2447 # 256 blocks on the stripe1 for other cases.
2448 # 1 block on the stripe2 if OSTs > 2
2449 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2450 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2451 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2453 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2454 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2455 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2458 $LFS getstripe $DIR/$tdir/a1/f0
2460 $LFS getstripe $DIR/$tdir/a1/f1
2462 $LFS getstripe $DIR/$tdir/a1/f2
2464 if [ $OSTCOUNT -gt 2 ]; then
2465 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2466 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2468 $LFS getstripe $DIR/$tdir/a1/f3
2471 cancel_lru_locks osc
2473 echo "Inject failure..."
2474 echo "To simulate f0 lost MDT-object"
2475 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2476 do_facet mds1 $LCTL set_param fail_loc=0x1616
2477 rm -f $DIR/$tdir/a1/f0
2479 echo "To simulate f1 lost MDT-object and OST-object0"
2480 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2481 do_facet mds1 $LCTL set_param fail_loc=0x161a
2482 rm -f $DIR/$tdir/a1/f1
2484 echo "To simulate f2 lost MDT-object and OST-object1"
2485 do_facet mds1 $LCTL set_param fail_val=1
2486 rm -f $DIR/$tdir/a1/f2
2488 if [ $OSTCOUNT -gt 2 ]; then
2489 echo "To simulate f3 lost MDT-object and OST-object2"
2490 do_facet mds1 $LCTL set_param fail_val=2
2491 rm -f $DIR/$tdir/a1/f3
2494 umount_client $MOUNT
2497 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2499 echo "Inject failure to slow down the LFSCK on OST0"
2500 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2501 do_facet ost1 $LCTL set_param fail_loc=0x161b
2503 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2504 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2507 do_facet ost1 $LCTL set_param fail_loc=0
2509 for k in $(seq $MDSCOUNT); do
2510 # The LFSCK status query internal is 30 seconds. For the case
2511 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2512 # time to guarantee the status sync up.
2513 wait_update_facet mds${k} "$LCTL get_param -n \
2514 mdd.$(facet_svc mds${k}).lfsck_layout |
2515 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2516 error "(2) MDS${k} is not the expected 'completed'"
2519 for k in $(seq $OSTCOUNT); do
2520 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2521 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2522 awk '/^status/ { print $2 }')
2523 [ "$cur_status" == "completed" ] ||
2524 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2527 local repaired=$(do_facet mds1 $LCTL get_param -n \
2528 mdd.$(facet_svc mds1).lfsck_layout |
2529 awk '/^repaired_orphan/ { print $2 }')
2530 if [ $OSTCOUNT -gt 2 ]; then
2531 [ $repaired -eq 9 ] ||
2532 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2534 [ $repaired -eq 4 ] ||
2535 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2538 mount_client $MOUNT || error "(5.0) Fail to start client!"
2540 LOV_PATTERN_F_HOLE=0x40000000
2543 # ${fid0}-R-0 is the old f0
2545 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2546 echo "Check $name, which is the old f0"
2548 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2550 local pattern=0x$($LFS getstripe -L $name)
2551 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2552 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2554 local stripes=$($LFS getstripe -c $name)
2555 if [ $OSTCOUNT -gt 2 ]; then
2556 [ $stripes -eq 3 ] ||
2557 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2559 [ $stripes -eq 2 ] ||
2560 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2563 local size=$(stat $name | awk '/Size:/ { print $2 }')
2564 [ $size -eq $((4096 * $bcount)) ] ||
2565 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2567 cat $name > /dev/null || error "(5.5) cannot read $name"
2569 echo "dummy" >> $name || error "(5.6) cannot write $name"
2571 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2573 touch $name || error "(5.8) cannot touch $name"
2575 rm -f $name || error "(5.9) cannot unlink $name"
2578 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2580 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2581 if [ $OSTCOUNT -gt 2 ]; then
2582 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2584 echo "Check $name, it contains the old f1's stripe1"
2587 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2589 pattern=0x$($LFS getstripe -L $name)
2590 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2591 error "(6.2) expect pattern flag hole, but got $pattern"
2593 stripes=$($LFS getstripe -c $name)
2594 if [ $OSTCOUNT -gt 2 ]; then
2595 [ $stripes -eq 3 ] ||
2596 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2598 [ $stripes -eq 2 ] ||
2599 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2602 size=$(stat $name | awk '/Size:/ { print $2 }')
2603 [ $size -eq $((4096 * $bcount)) ] ||
2604 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2606 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2608 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2609 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2612 [ $failures -eq 256 ] ||
2613 error "(6.6) expect 256 IO failures, but get $failures"
2615 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2616 [ $size -eq $((4096 * $bcount)) ] ||
2617 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2619 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2620 error "(6.8) write to the LOV EA hole should fail"
2622 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2623 error "(6.9) write to normal stripe should NOT fail"
2625 echo "foo" >> $name && error "(6.10) append write $name should fail"
2627 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2629 touch $name || error "(6.12) cannot touch $name"
2631 rm -f $name || error "(6.13) cannot unlink $name"
2634 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2636 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2637 if [ $OSTCOUNT -gt 2 ]; then
2638 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2640 echo "Check $name, it contains the old f2's stripe0"
2643 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2645 pattern=0x$($LFS getstripe -L $name)
2646 stripes=$($LFS getstripe -c $name)
2647 size=$(stat $name | awk '/Size:/ { print $2 }')
2648 if [ $OSTCOUNT -gt 2 ]; then
2649 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2650 error "(7.2.1) expect pattern flag hole, but got $pattern"
2652 [ $stripes -eq 3 ] ||
2653 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2655 [ $size -eq $((4096 * $bcount)) ] ||
2656 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2658 cat $name > /dev/null &&
2659 error "(7.5.1) normal read $name should fail"
2661 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2662 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2664 [ $failures -eq 256 ] ||
2665 error "(7.6) expect 256 IO failures, but get $failures"
2667 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2668 [ $size -eq $((4096 * $bcount)) ] ||
2669 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2671 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2672 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2674 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2675 error "(7.8.1) write to normal stripe should NOT fail"
2677 echo "foo" >> $name &&
2678 error "(7.8.3) append write $name should fail"
2680 chown $RUNAS_ID:$RUNAS_GID $name ||
2681 error "(7.9.1) cannot chown on $name"
2683 touch $name || error "(7.10.1) cannot touch $name"
2685 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2686 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2688 [ $stripes -eq 1 ] ||
2689 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2692 [ $size -eq $((4096 * (256 + 0))) ] ||
2693 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2695 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2697 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2699 chown $RUNAS_ID:$RUNAS_GID $name ||
2700 error "(7.9.2) cannot chown on $name"
2702 touch $name || error "(7.10.2) cannot touch $name"
2705 rm -f $name || error "(7.11) cannot unlink $name"
2707 [ $OSTCOUNT -le 2 ] && return
2710 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2712 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2713 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2715 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2717 pattern=0x$($LFS getstripe -L $name)
2718 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2719 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2721 stripes=$($LFS getstripe -c $name)
2722 # LFSCK does not know the old f3 had 3 stripes.
2723 # It only tries to find as much as possible.
2724 # The stripe count depends on the last stripe's offset.
2725 [ $stripes -eq 2 ] ||
2726 error "(8.3) expect the stripe count is 2, but got $stripes"
2728 size=$(stat $name | awk '/Size:/ { print $2 }')
2730 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2731 error "(8.4) expect the size $((4096 * 512)), but got $size"
2733 cat $name > /dev/null || error "(8.5) cannot read $name"
2735 echo "dummy" >> $name || error "(8.6) cannot write $name"
2737 chown $RUNAS_ID:$RUNAS_GID $name ||
2738 error "(8.7) cannot chown on $name"
2740 touch $name || error "(8.8) cannot touch $name"
2742 rm -f $name || error "(8.9) cannot unlink $name"
2744 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2747 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2748 skip "ignore the test if MDS is older than 2.5.59" && return
2750 check_mount_and_prep
2751 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2753 echo "Start all LFSCK components by default (-s 1)"
2754 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2755 error "Fail to start LFSCK"
2757 echo "namespace LFSCK should be in 'scanning-phase1' status"
2758 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2759 [ "$STATUS" == "scanning-phase1" ] ||
2760 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2762 echo "layout LFSCK should be in 'scanning-phase1' status"
2763 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2764 [ "$STATUS" == "scanning-phase1" ] ||
2765 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2767 echo "Stop all LFSCK components by default"
2768 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2769 error "Fail to stop LFSCK"
2771 run_test 21 "run all LFSCK components by default"
2774 [ $MDSCOUNT -lt 2 ] &&
2775 skip "We need at least 2 MDSes for this test" && return
2778 echo "The parent_A references the child directory via some name entry,"
2779 echo "but the child directory back references another parent_B via its"
2780 echo "".." name entry. The parent_B does not exist. Then the namesapce"
2781 echo "LFSCK will repair the child directory's ".." name entry."
2784 check_mount_and_prep
2786 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2787 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2789 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2790 echo "The dummy's dotdot name entry references the guard."
2791 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2792 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2793 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2794 error "(3) Fail to mkdir on MDT0"
2795 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2797 rmdir $DIR/$tdir/guard || error "(4) Fail to rmdir $DIR/$tdir/guard"
2799 echo "Trigger namespace LFSCK to repair unmatched pairs"
2800 $START_NAMESPACE -A -r ||
2801 error "(5) Fail to start LFSCK for namespace"
2803 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2804 mdd.${MDT_DEV}.lfsck_namespace |
2805 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2807 error "(6) unexpected status"
2810 local repaired=$($SHOW_NAMESPACE |
2811 awk '/^unmatched_pairs_repaired/ { print $2 }')
2812 [ $repaired -eq 1 ] ||
2813 error "(7) Fail to repair unmatched pairs: $repaired"
2815 echo "'ls' should success after namespace LFSCK repairing"
2816 ls -ail $DIR/$tdir/foo/dummy > /dev/null ||
2817 error "(8) ls should success."
2819 run_test 22a "LFSCK can repair unmatched pairs (1)"
2822 [ $MDSCOUNT -lt 2 ] &&
2823 skip "We need at least 2 MDSes for this test" && return
2826 echo "The parent_A references the child directory via the name entry_B,"
2827 echo "but the child directory back references another parent_C via its"
2828 echo "".." name entry. The parent_C exists, but there is no the name"
2829 echo "entry_B under the parent_C. Then the namesapce LFSCK will repair"
2830 echo "the child directory's ".." name entry and its linkEA."
2833 check_mount_and_prep
2835 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2836 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2838 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2839 echo "and bad linkEA. The dummy's dotdot name entry references the"
2840 echo "guard. The dummy's linkEA references n non-exist name entry."
2841 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2842 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2843 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2844 error "(3) Fail to mkdir on MDT0"
2845 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2847 local dummyfid=$($LFS path2fid $DIR/$tdir/foo/dummy)
2848 echo "fid2path should NOT work on the dummy's FID $dummyfid"
2849 local dummyname=$($LFS fid2path $DIR $dummyfid)
2850 [ "$dummyname" != "$DIR/$tdir/foo/dummy" ] ||
2851 error "(4) fid2path works unexpectedly."
2853 echo "Trigger namespace LFSCK to repair unmatched pairs"
2854 $START_NAMESPACE -A -r ||
2855 error "(5) Fail to start LFSCK for namespace"
2857 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2858 mdd.${MDT_DEV}.lfsck_namespace |
2859 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2861 error "(6) unexpected status"
2864 local repaired=$($SHOW_NAMESPACE |
2865 awk '/^unmatched_pairs_repaired/ { print $2 }')
2866 [ $repaired -eq 1 ] ||
2867 error "(7) Fail to repair unmatched pairs: $repaired"
2869 echo "fid2path should work on the dummy's FID $dummyfid after LFSCK"
2870 local dummyname=$($LFS fid2path $DIR $dummyfid)
2871 [ "$dummyname" == "$DIR/$tdir/foo/dummy" ] ||
2872 error "(8) fid2path does not work"
2874 run_test 22b "LFSCK can repair unmatched pairs (2)"
2877 [ $MDSCOUNT -lt 2 ] &&
2878 skip "We need at least 2 MDSes for this test" && return
2881 echo "The name entry is there, but the MDT-object for such name "
2882 echo "entry does not exist. The namespace LFSCK should find out "
2883 echo "and repair the inconsistency as required."
2886 check_mount_and_prep
2888 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2889 $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
2891 echo "Inject failure stub on MDT1 to simulate dangling name entry"
2892 #define OBD_FAIL_LFSCK_DANGLING2 0x1620
2893 do_facet mds2 $LCTL set_param fail_loc=0x1620
2894 rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
2895 do_facet mds2 $LCTL set_param fail_loc=0
2897 echo "'ls' should fail because of dangling name entry"
2898 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
2900 echo "Trigger namespace LFSCK to find out dangling name entry"
2901 $START_NAMESPACE -A -r ||
2902 error "(5) Fail to start LFSCK for namespace"
2904 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2905 mdd.${MDT_DEV}.lfsck_namespace |
2906 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2908 error "(6) unexpected status"
2911 local repaired=$($SHOW_NAMESPACE |
2912 awk '/^dangling_repaired/ { print $2 }')
2913 [ $repaired -eq 1 ] ||
2914 error "(7) Fail to repair dangling name entry: $repaired"
2916 echo "'ls' should fail because not re-create MDT-object by default"
2917 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
2919 echo "Trigger namespace LFSCK again to repair dangling name entry"
2920 $START_NAMESPACE -A -r -C ||
2921 error "(9) Fail to start LFSCK for namespace"
2923 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2924 mdd.${MDT_DEV}.lfsck_namespace |
2925 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2927 error "(10) unexpected status"
2930 repaired=$($SHOW_NAMESPACE |
2931 awk '/^dangling_repaired/ { print $2 }')
2932 [ $repaired -eq 1 ] ||
2933 error "(11) Fail to repair dangling name entry: $repaired"
2935 echo "'ls' should success after namespace LFSCK repairing"
2936 ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
2938 run_test 23a "LFSCK can repair dangling name entry (1)"
2942 echo "The objectA has multiple hard links, one of them corresponding"
2943 echo "to the name entry_B. But there is something wrong for the name"
2944 echo "entry_B and cause entry_B to references non-exist object_C."
2945 echo "In the first-stage scanning, the LFSCK will think the entry_B"
2946 echo "as dangling, and re-create the lost object_C. When the LFSCK"
2947 echo "comes to the second-stage scanning, it will find that the"
2948 echo "former re-creating object_C is not proper, and will try to"
2949 echo "replace the object_C with the real object_A."
2952 check_mount_and_prep
2954 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2955 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
2956 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
2958 echo "Inject failure stub on MDT0 to simulate dangling name entry"
2959 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
2960 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
2961 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
2962 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2964 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
2966 echo "'ls' should fail because of dangling name entry"
2967 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
2968 error "(6) ls should fail."
2970 echo "Trigger namespace LFSCK to find out dangling name entry"
2971 $START_NAMESPACE -r -C ||
2972 error "(7) Fail to start LFSCK for namespace"
2974 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2975 mdd.${MDT_DEV}.lfsck_namespace |
2976 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2978 error "(8) unexpected status"
2981 local repaired=$($SHOW_NAMESPACE |
2982 awk '/^dangling_repaired/ { print $2 }')
2983 [ $repaired -eq 1 ] ||
2984 error "(9) Fail to repair dangling name entry: $repaired"
2986 repaired=$($SHOW_NAMESPACE |
2987 awk '/^multiple_linked_repaired/ { print $2 }')
2988 [ $repaired -eq 1 ] ||
2989 error "(10) Fail to drop the former created object: $repaired"
2991 local data=$(cat $DIR/$tdir/d0/foo)
2992 [ "$data" == "dummy" ] ||
2993 error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
2995 run_test 23b "LFSCK can repair dangling name entry (2)"
2999 echo "The objectA has multiple hard links, one of them corresponding"
3000 echo "to the name entry_B. But there is something wrong for the name"
3001 echo "entry_B and cause entry_B to references non-exist object_C."
3002 echo "In the first-stage scanning, the LFSCK will think the entry_B"
3003 echo "as dangling, and re-create the lost object_C. And then others"
3004 echo "modified the re-created object_C. When the LFSCK comes to the"
3005 echo "second-stage scanning, it will find that the former re-creating"
3006 echo "object_C maybe wrong and try to replace the object_C with the"
3007 echo "real object_A. But because object_C has been modified, so the"
3008 echo "LFSCK cannot replace it."
3011 check_mount_and_prep
3013 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3014 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3015 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3017 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3018 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3019 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
3020 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3021 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3023 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3025 echo "'ls' should fail because of dangling name entry"
3026 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3027 error "(6) ls should fail."
3029 #define OBD_FAIL_LFSCK_DELAY3 0x1602
3030 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
3032 echo "Trigger namespace LFSCK to find out dangling name entry"
3033 $START_NAMESPACE -r -C ||
3034 error "(7) Fail to start LFSCK for namespace"
3036 wait_update_facet client "stat $DIR/$tdir/d0/foo |
3037 awk '/Size/ { print \\\$2 }'" "0" 32 || {
3038 stat $DIR/$tdir/guard
3040 error "(8) unexpected size"
3043 echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
3044 cancel_lru_locks osc
3046 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3047 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3048 mdd.${MDT_DEV}.lfsck_namespace |
3049 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3051 error "(10) unexpected status"
3054 local repaired=$($SHOW_NAMESPACE |
3055 awk '/^dangling_repaired/ { print $2 }')
3056 [ $repaired -eq 1 ] ||
3057 error "(11) Fail to repair dangling name entry: $repaired"
3059 local data=$(cat $DIR/$tdir/d0/foo)
3060 [ "$data" != "dummy" ] ||
3061 error "(12) The $DIR/$tdir/d0/foo should not be recovered"
3063 run_test 23c "LFSCK can repair dangling name entry (3)"
3066 [ $MDSCOUNT -lt 2 ] &&
3067 skip "We need at least 2 MDSes for this test" && return
3070 echo "Two MDT-objects back reference the same name entry via their"
3071 echo "each own linkEA entry, but the name entry only references one"
3072 echo "MDT-object. The namespace LFSCK will remove the linkEA entry"
3073 echo "for the MDT-object that is not recognized. If such MDT-object"
3074 echo "has no other linkEA entry after the removing, then the LFSCK"
3075 echo "will add it as orphan under the .lustre/lost+found/MDTxxxx/."
3078 check_mount_and_prep
3080 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3082 mkdir $DIR/$tdir/d0/guard || error "(1) Fail to mkdir guard"
3083 $LFS path2fid $DIR/$tdir/d0/guard
3085 mkdir $DIR/$tdir/d0/dummy || error "(2) Fail to mkdir dummy"
3086 $LFS path2fid $DIR/$tdir/d0/dummy
3089 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
3090 pfid=$($LFS path2fid $DIR/$tdir/d0/guard)
3092 pfid=$($LFS path2fid $DIR/$tdir/d0/dummy)
3095 touch $DIR/$tdir/d0/guard/foo ||
3096 error "(3) Fail to touch $DIR/$tdir/d0/guard/foo"
3098 echo "Inject failure stub on MDT0 to simulate the case that"
3099 echo "the $DIR/$tdir/d0/dummy/foo has the 'bad' linkEA entry"
3100 echo "that references $DIR/$tdir/d0/guard/foo."
3101 echo "Then remove the name entry $DIR/$tdir/d0/dummy/foo."
3102 echo "So the MDT-object $DIR/$tdir/d0/dummy/foo will be left"
3103 echo "there with the same linkEA entry as another MDT-object"
3104 echo "$DIR/$tdir/d0/guard/foo has"
3106 #define OBD_FAIL_LFSCK_MUL_REF 0x1622
3107 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1622
3108 $LFS mkdir -i 0 $DIR/$tdir/d0/dummy/foo ||
3109 error "(4) Fail to mkdir $DIR/$tdir/d0/dummy/foo"
3110 $LFS path2fid $DIR/$tdir/d0/dummy/foo
3111 local cfid=$($LFS path2fid $DIR/$tdir/d0/dummy/foo)
3112 rmdir $DIR/$tdir/d0/dummy/foo ||
3113 error "(5) Fail to remove $DIR/$tdir/d0/dummy/foo name entry"
3114 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3116 echo "stat $DIR/$tdir/d0/dummy/foo should fail"
3117 stat $DIR/$tdir/d0/dummy/foo > /dev/null 2>&1 &&
3118 error "(6) stat successfully unexpectedly"
3120 echo "Trigger namespace LFSCK to repair multiple-referenced name entry"
3121 $START_NAMESPACE -A -r ||
3122 error "(7) Fail to start LFSCK for namespace"
3124 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3125 mdd.${MDT_DEV}.lfsck_namespace |
3126 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3128 error "(8) unexpected status"
3131 local repaired=$($SHOW_NAMESPACE |
3132 awk '/^multiple_referenced_repaired/ { print $2 }')
3133 [ $repaired -eq 1 ] ||
3134 error "(9) Fail to repair multiple referenced name entry: $repaired"
3136 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3137 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3138 error "(10) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3140 local cname="$cfid-$pfid-D-0"
3141 ls -ail $MOUNT/.lustre/lost+found/MDT0000/$cname ||
3142 error "(11) .lustre/lost+found/MDT0000/ should not be empty"
3144 run_test 24 "LFSCK can repair multiple-referenced name entry"
3147 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3148 skip "Only support to inject failure on ldiskfs" && return
3151 echo "The file type in the name entry does not match the file type"
3152 echo "claimed by the referenced object. Then the LFSCK will update"
3153 echo "the file type in the name entry."
3156 check_mount_and_prep
3158 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3160 echo "Inject failure stub on MDT0 to simulate the case that"
3161 echo "the file type stored in the name entry is wrong."
3163 #define OBD_FAIL_LFSCK_BAD_TYPE 0x1623
3164 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1623
3165 touch $DIR/$tdir/d0/foo || error "(2) Fail to touch $DIR/$tdir/d0/foo"
3166 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3168 echo "Trigger namespace LFSCK to repair bad file type in the name entry"
3169 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace"
3171 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3172 mdd.${MDT_DEV}.lfsck_namespace |
3173 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3175 error "(4) unexpected status"
3178 local repaired=$($SHOW_NAMESPACE |
3179 awk '/^bad_file_type_repaired/ { print $2 }')
3180 [ $repaired -eq 1 ] ||
3181 error "(5) Fail to repair bad file type in name entry: $repaired"
3183 ls -ail $DIR/$tdir/d0 || error "(6) Fail to 'ls' the $DIR/$tdir/d0"
3185 run_test 25 "LFSCK can repair bad file type in the name entry"
3189 echo "The local name entry back referenced by the MDT-object is lost."
3190 echo "The namespace LFSCK will add the missing local name entry back"
3191 echo "to the normal namespace."
3194 check_mount_and_prep
3196 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3197 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3198 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3200 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3201 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3203 echo "Inject failure stub on MDT0 to simulate the case that"
3204 echo "foo's name entry will be removed, but the foo's object"
3205 echo "and its linkEA are kept in the system."
3207 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3208 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3209 rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3210 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3212 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail"
3214 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3215 $START_NAMESPACE -r -A ||
3216 error "(6) Fail to start LFSCK for namespace"
3218 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3219 mdd.${MDT_DEV}.lfsck_namespace |
3220 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3222 error "(7) unexpected status"
3225 local repaired=$($SHOW_NAMESPACE |
3226 awk '/^lost_dirent_repaired/ { print $2 }')
3227 [ $repaired -eq 1 ] ||
3228 error "(8) Fail to repair lost dirent: $repaired"
3230 ls -ail $DIR/$tdir/d0/foo ||
3231 error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
3233 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3234 [ "$foofid" == "$foofid2" ] ||
3235 error "(10) foo's FID changed: $foofid, $foofid2"
3237 run_test 26a "LFSCK can add the missing local name entry back to the namespace"
3240 [ $MDSCOUNT -lt 2 ] &&
3241 skip "We need at least 2 MDSes for this test" && return
3244 echo "The remote name entry back referenced by the MDT-object is lost."
3245 echo "The namespace LFSCK will add the missing remote name entry back"
3246 echo "to the normal namespace."
3249 check_mount_and_prep
3251 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3252 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3253 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3255 echo "Inject failure stub on MDT0 to simulate the case that"
3256 echo "foo's name entry will be removed, but the foo's object"
3257 echo "and its linkEA are kept in the system."
3259 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3260 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3261 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3262 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3264 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail"
3266 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3267 $START_NAMESPACE -r -A ||
3268 error "(5) Fail to start LFSCK for namespace"
3270 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3271 mdd.${MDT_DEV}.lfsck_namespace |
3272 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3274 error "(6) unexpected status"
3277 local repaired=$($SHOW_NAMESPACE |
3278 awk '/^lost_dirent_repaired/ { print $2 }')
3279 [ $repaired -eq 1 ] ||
3280 error "(7) Fail to repair lost dirent: $repaired"
3282 ls -ail $DIR/$tdir/d0/foo ||
3283 error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
3285 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3286 [ "$foofid" == "$foofid2" ] ||
3287 error "(9) foo's FID changed: $foofid, $foofid2"
3289 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
3293 echo "The local parent referenced by the MDT-object linkEA is lost."
3294 echo "The namespace LFSCK will re-create the lost parent as orphan."
3297 check_mount_and_prep
3299 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3300 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3301 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3302 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3304 echo "Inject failure stub on MDT0 to simulate the case that"
3305 echo "foo's name entry will be removed, but the foo's object"
3306 echo "and its linkEA are kept in the system. And then remove"
3307 echo "another hard link and the parent directory."
3309 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3310 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3311 rm -f $DIR/$tdir/d0/foo ||
3312 error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3313 rm -f $DIR/$tdir/d0/dummy ||
3314 error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
3315 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3317 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
3318 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail"
3320 echo "Trigger namespace LFSCK to repair the lost parent"
3321 $START_NAMESPACE -r -A ||
3322 error "(6) Fail to start LFSCK for namespace"
3324 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3325 mdd.${MDT_DEV}.lfsck_namespace |
3326 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3328 error "(7) unexpected status"
3331 local repaired=$($SHOW_NAMESPACE |
3332 awk '/^lost_dirent_repaired/ { print $2 }')
3333 [ $repaired -eq 1 ] ||
3334 error "(8) Fail to repair lost dirent: $repaired"
3336 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3337 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3338 error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3340 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3342 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
3343 [ ! -z "$cname" ] ||
3344 error "(10) .lustre/lost+found/MDT0000/ should not be empty"
3346 run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
3349 [ $MDSCOUNT -lt 2 ] &&
3350 skip "We need at least 2 MDSes for this test" && return
3353 echo "The remote parent referenced by the MDT-object linkEA is lost."
3354 echo "The namespace LFSCK will re-create the lost parent as orphan."
3357 check_mount_and_prep
3359 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3360 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3362 $LFS path2fid $DIR/$tdir/d0
3364 echo "Inject failure stub on MDT0 to simulate the case that"
3365 echo "foo's name entry will be removed, but the foo's object"
3366 echo "and its linkEA are kept in the system. And then remove"
3367 echo "the parent directory."
3369 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3370 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3371 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3372 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3374 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
3375 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail"
3377 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3378 $START_NAMESPACE -r -A ||
3379 error "(6) Fail to start LFSCK for namespace"
3381 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3382 mdd.${MDT_DEV}.lfsck_namespace |
3383 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3385 error "(7) unexpected status"
3388 local repaired=$($SHOW_NAMESPACE |
3389 awk '/^lost_dirent_repaired/ { print $2 }')
3390 [ $repaired -eq 1 ] ||
3391 error "(8) Fail to repair lost dirent: $repaired"
3393 ls -ail $MOUNT/.lustre/lost+found/
3395 echo "There should be an orphan under .lustre/lost+found/MDT0001/"
3396 [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
3397 error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
3399 ls -ail $MOUNT/.lustre/lost+found/MDT0001/
3401 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
3402 [ ! -z "$cname" ] ||
3403 error "(10) .lustre/lost+found/MDT0001/ should not be empty"
3405 run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
3408 [ $MDSCOUNT -lt 2 ] &&
3409 skip "The test needs at least 2 MDTs" && return
3412 echo "The target name entry is lost. The LFSCK should insert the"
3413 echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if"
3414 echo "the MDT (on which the orphan MDT-object resides) has ever"
3415 echo "failed to respond some name entry verification during the"
3416 echo "first stage-scanning, then the LFSCK should skip to handle"
3417 echo "orphan MDT-object on this MDT. But other MDTs should not"
3421 check_mount_and_prep
3422 $LFS mkdir -i 0 $DIR/$tdir/d1
3423 $LFS mkdir -i 1 $DIR/$tdir/d1/a1
3424 $LFS mkdir -i 1 $DIR/$tdir/d1/a2
3426 $LFS mkdir -i 1 $DIR/$tdir/d2
3427 $LFS mkdir -i 0 $DIR/$tdir/d2/a1
3428 $LFS mkdir -i 0 $DIR/$tdir/d2/a2
3430 echo "Inject failure stub on MDT0 to simulate the case that"
3431 echo "d1/a1's name entry will be removed, but the d1/a1's object"
3432 echo "and its linkEA are kept in the system. And the case that"
3433 echo "d2/a2's name entry will be removed, but the d2/a2's object"
3434 echo "and its linkEA are kept in the system."
3436 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3437 do_facet mds1 $LCTL set_param fail_loc=0x1624
3438 do_facet mds2 $LCTL set_param fail_loc=0x1624
3439 rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1"
3440 rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2"
3441 do_facet mds1 $LCTL set_param fail_loc=0
3442 do_facet mds2 $LCTL set_param fail_loc=0
3444 cancel_lru_locks mdc
3445 cancel_lru_locks osc
3447 echo "Inject failure, to simulate the MDT0 fail to handle"
3448 echo "MDT1 LFSCK request during the first-stage scanning."
3449 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
3450 do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0
3452 echo "Trigger namespace LFSCK on all devices to find out orphan object"
3453 $START_NAMESPACE -r -A ||
3454 error "(3) Fail to start LFSCK for namespace"
3456 wait_update_facet mds1 "$LCTL get_param -n \
3457 mdd.$(facet_svc mds1).lfsck_namespace |
3458 awk '/^status/ { print \\\$2 }'" "partial" 32 || {
3459 error "(4) mds1 is not the expected 'partial'"
3462 wait_update_facet mds2 "$LCTL get_param -n \
3463 mdd.$(facet_svc mds2).lfsck_namespace |
3464 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3465 error "(5) mds2 is not the expected 'completed'"
3468 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
3470 local repaired=$(do_facet mds1 $LCTL get_param -n \
3471 mdd.$(facet_svc mds1).lfsck_namespace |
3472 awk '/^lost_dirent_repaired/ { print $2 }')
3473 [ $repaired -eq 0 ] ||
3474 error "(6) Expect 0 fixed on mds1, but got: $repaired"
3476 repaired=$(do_facet mds2 $LCTL get_param -n \
3477 mdd.$(facet_svc mds2).lfsck_namespace |
3478 awk '/^lost_dirent_repaired/ { print $2 }')
3479 [ $repaired -eq 1 ] ||
3480 error "(7) Expect 1 fixed on mds2, but got: $repaired"
3482 echo "Trigger namespace LFSCK on all devices again to cleanup"
3483 $START_NAMESPACE -r -A ||
3484 error "(8) Fail to start LFSCK for namespace"
3486 for k in $(seq $MDSCOUNT); do
3487 # The LFSCK status query internal is 30 seconds. For the case
3488 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3489 # time to guarantee the status sync up.
3490 wait_update_facet mds${k} "$LCTL get_param -n \
3491 mdd.$(facet_svc mds${k}).lfsck_namespace |
3492 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3493 error "(9) MDS${k} is not the expected 'completed'"
3496 local repaired=$(do_facet mds1 $LCTL get_param -n \
3497 mdd.$(facet_svc mds1).lfsck_namespace |
3498 awk '/^lost_dirent_repaired/ { print $2 }')
3499 [ $repaired -eq 1 ] ||
3500 error "(10) Expect 1 fixed on mds1, but got: $repaired"
3502 repaired=$(do_facet mds2 $LCTL get_param -n \
3503 mdd.$(facet_svc mds2).lfsck_namespace |
3504 awk '/^lost_dirent_repaired/ { print $2 }')
3505 [ $repaired -eq 0 ] ||
3506 error "(11) Expect 0 fixed on mds2, but got: $repaired"
3508 run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects"
3512 echo "The object's nlink attribute is larger than the object's known"
3513 echo "name entries count. The LFSCK will repair the object's nlink"
3514 echo "attribute to match the known name entries count"
3517 check_mount_and_prep
3519 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3520 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3522 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3523 echo "nlink attribute is larger than its name entries count."
3525 #define OBD_FAIL_LFSCK_MORE_NLINK 0x1625
3526 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1625
3527 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3528 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3529 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3531 cancel_lru_locks mdc
3532 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3533 [ $count -eq 3 ] || error "(4) Cannot inject error: $count"
3535 echo "Trigger namespace LFSCK to repair the nlink count"
3536 $START_NAMESPACE -r -A ||
3537 error "(5) Fail to start LFSCK for namespace"
3539 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3540 mdd.${MDT_DEV}.lfsck_namespace |
3541 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3543 error "(6) unexpected status"
3546 local repaired=$($SHOW_NAMESPACE |
3547 awk '/^nlinks_repaired/ { print $2 }')
3548 [ $repaired -eq 1 ] ||
3549 error "(7) Fail to repair nlink count: $repaired"
3551 cancel_lru_locks mdc
3552 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3553 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3555 run_test 29a "LFSCK can repair bad nlink count (1)"
3559 echo "The object's nlink attribute is smaller than the object's known"
3560 echo "name entries count. The LFSCK will repair the object's nlink"
3561 echo "attribute to match the known name entries count"
3564 check_mount_and_prep
3566 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3567 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3569 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3570 echo "nlink attribute is smaller than its name entries count."
3572 #define OBD_FAIL_LFSCK_LESS_NLINK 0x1626
3573 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1626
3574 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3575 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3576 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3578 cancel_lru_locks mdc
3579 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3580 [ $count -eq 1 ] || error "(4) Cannot inject error: $count"
3582 echo "Trigger namespace LFSCK to repair the nlink count"
3583 $START_NAMESPACE -r -A ||
3584 error "(5) Fail to start LFSCK for namespace"
3586 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3587 mdd.${MDT_DEV}.lfsck_namespace |
3588 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3590 error "(6) unexpected status"
3593 local repaired=$($SHOW_NAMESPACE |
3594 awk '/^nlinks_repaired/ { print $2 }')
3595 [ $repaired -eq 1 ] ||
3596 error "(7) Fail to repair nlink count: $repaired"
3598 cancel_lru_locks mdc
3599 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3600 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3602 run_test 29b "LFSCK can repair bad nlink count (2)"
3606 echo "There are too many hard links to the object, and exceeds the"
3607 echo "object's linkEA limitation, as to NOT all the known name entries"
3608 echo "will be recorded in the linkEA. Under such case, LFSCK should"
3609 echo "skip the nlink verification for this object."
3612 check_mount_and_prep
3614 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3615 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3616 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3617 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3619 echo "Inject failure stub on MDT0 to simulate the case that"
3620 echo "foo's hard links exceed the object's linkEA limitation."
3622 #define OBD_FAIL_LFSCK_LINKEA_OVERFLOW 0x1627
3623 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1627
3624 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h2 ||
3625 error "(4) Fail to hard link to $DIR/$tdir/d0/foo"
3627 cancel_lru_locks mdc
3629 local count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3630 [ $count1 -eq 3 ] || error "(5) Stat failure: $count1"
3632 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3633 $LFS fid2path $DIR $foofid
3634 local count2=$($LFS fid2path $DIR $foofid | wc -l)
3635 [ $count2 -eq 2 ] || "(6) Fail to inject error: $count2"
3637 echo "Trigger namespace LFSCK to repair the nlink count"
3638 $START_NAMESPACE -r -A ||
3639 error "(7) Fail to start LFSCK for namespace"
3641 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3642 mdd.${MDT_DEV}.lfsck_namespace |
3643 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3645 error "(8) unexpected status"
3648 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3649 local repaired=$($SHOW_NAMESPACE |
3650 awk '/^nlinks_repaired/ { print $2 }')
3651 [ $repaired -eq 0 ] ||
3652 error "(9) Repair nlink count unexpcetedly: $repaired"
3654 cancel_lru_locks mdc
3656 count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3657 [ $count1 -eq 3 ] || error "(10) Stat failure: $count1"
3659 count2=$($LFS fid2path $DIR $foofid | wc -l)
3660 [ $count2 -eq 2 ] ||
3661 error "(11) Repaired something unexpectedly: $count2"
3663 run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
3666 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3667 skip "Only support backend /lost+found for ldiskfs" && return
3670 echo "The namespace LFSCK will move the orphans from backend"
3671 echo "/lost+found directory to normal client visible namespace"
3672 echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
3675 check_mount_and_prep
3677 $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
3678 touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
3680 echo "Inject failure stub on MDT0 to simulate the case that"
3681 echo "directory d0 has no linkEA entry, then the LFSCK will"
3682 echo "move it into .lustre/lost+found/MDTxxxx/ later."
3684 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
3685 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
3686 mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
3687 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3689 touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
3690 mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
3692 echo "Inject failure stub on MDT0 to simulate the case that the"
3693 echo "object's name entry will be removed, but not destroy the"
3694 echo "object. Then backend e2fsck will handle it as orphan and"
3695 echo "add them into the backend /lost+found directory."
3697 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3698 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3699 rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
3700 rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
3701 rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
3702 rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
3703 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3705 umount_client $MOUNT || error "(10) Fail to stop client!"
3707 stop $SINGLEMDS || error "(11) Fail to stop MDT0"
3710 run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
3711 error "(12) Fail to run e2fsck"
3713 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
3714 error "(13) Fail to start MDT0"
3716 echo "Trigger namespace LFSCK to recover backend orphans"
3717 $START_NAMESPACE -r -A ||
3718 error "(14) Fail to start LFSCK for namespace"
3720 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3721 mdd.${MDT_DEV}.lfsck_namespace |
3722 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3724 error "(15) unexpected status"
3727 local repaired=$($SHOW_NAMESPACE |
3728 awk '/^local_lost_found_moved/ { print $2 }')
3729 [ $repaired -ge 4 ] ||
3730 error "(16) Fail to recover backend orphans: $repaired"
3732 mount_client $MOUNT || error "(17) Fail to start client!"
3734 stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered"
3736 ls -ail $MOUNT/.lustre/lost+found/
3738 echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
3739 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3740 error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3742 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3744 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-*-D-*)
3745 [ ! -z "$cname" ] || error "(20) d0 is not recovered"
3747 stat ${cname}/d1 || error "(21) d0 is not recovered"
3748 stat ${cname}/f1 || error "(22) f1 is not recovered"
3750 run_test 30 "LFSCK can recover the orphans from backend /lost+found"
3753 [ $MDSCOUNT -lt 2 ] &&
3754 skip "The test needs at least 2 MDTs" && return
3757 echo "For the name entry under a striped directory, if the name"
3758 echo "hash does not match the shard, then the LFSCK will repair"
3759 echo "the bad name entry"
3762 check_mount_and_prep
3764 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3765 error "(1) Fail to create striped directory"
3767 echo "Inject failure stub on client to simulate the case that"
3768 echo "some name entry should be inserted into other non-first"
3769 echo "shard, but inserted into the first shard by wrong"
3771 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3772 $LCTL set_param fail_loc=0x1628 fail_val=0
3773 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3774 error "(2) Fail to create file under striped directory"
3775 $LCTL set_param fail_loc=0 fail_val=0
3777 echo "Trigger namespace LFSCK to repair bad name hash"
3778 $START_NAMESPACE -r -A ||
3779 error "(3) Fail to start LFSCK for namespace"
3781 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3782 mdd.${MDT_DEV}.lfsck_namespace |
3783 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3785 error "(4) unexpected status"
3788 local repaired=$($SHOW_NAMESPACE |
3789 awk '/^name_hash_repaired/ { print $2 }')
3790 [ $repaired -ge 1 ] ||
3791 error "(5) Fail to repair bad name hash: $repaired"
3793 umount_client $MOUNT || error "(6) umount failed"
3794 mount_client $MOUNT || error "(7) mount failed"
3796 for ((i = 0; i < $MDSCOUNT; i++)); do
3797 stat $DIR/$tdir/striped_dir/d$i ||
3798 error "(8) Fail to stat d$i after LFSCK"
3799 rmdir $DIR/$tdir/striped_dir/d$i ||
3800 error "(9) Fail to unlink d$i after LFSCK"
3803 rmdir $DIR/$tdir/striped_dir ||
3804 error "(10) Fail to remove the striped directory after LFSCK"
3806 run_test 31a "The LFSCK can find/repair the name entry with bad name hash (1)"
3809 [ $MDSCOUNT -lt 2 ] &&
3810 skip "The test needs at least 2 MDTs" && return
3813 echo "For the name entry under a striped directory, if the name"
3814 echo "hash does not match the shard, then the LFSCK will repair"
3815 echo "the bad name entry"
3818 check_mount_and_prep
3820 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3821 error "(1) Fail to create striped directory"
3823 echo "Inject failure stub on client to simulate the case that"
3824 echo "some name entry should be inserted into other non-second"
3825 echo "shard, but inserted into the secod shard by wrong"
3827 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3828 $LCTL set_param fail_loc=0x1628 fail_val=1
3829 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3830 error "(2) Fail to create file under striped directory"
3831 $LCTL set_param fail_loc=0 fail_val=0
3833 echo "Trigger namespace LFSCK to repair bad name hash"
3834 $START_NAMESPACE -r -A ||
3835 error "(3) Fail to start LFSCK for namespace"
3837 wait_update_facet mds2 "$LCTL get_param -n \
3838 mdd.$(facet_svc mds2).lfsck_namespace |
3839 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3840 error "(4) unexpected status"
3842 local repaired=$(do_facet mds2 $LCTL get_param -n \
3843 mdd.$(facet_svc mds2).lfsck_namespace |
3844 awk '/^name_hash_repaired/ { print $2 }')
3845 [ $repaired -ge 1 ] ||
3846 error "(5) Fail to repair bad name hash: $repaired"
3848 umount_client $MOUNT || error "(6) umount failed"
3849 mount_client $MOUNT || error "(7) mount failed"
3851 for ((i = 0; i < $MDSCOUNT; i++)); do
3852 stat $DIR/$tdir/striped_dir/d$i ||
3853 error "(8) Fail to stat d$i after LFSCK"
3854 rmdir $DIR/$tdir/striped_dir/d$i ||
3855 error "(9) Fail to unlink d$i after LFSCK"
3858 rmdir $DIR/$tdir/striped_dir ||
3859 error "(10) Fail to remove the striped directory after LFSCK"
3861 run_test 31b "The LFSCK can find/repair the name entry with bad name hash (2)"
3864 [ $MDSCOUNT -lt 2 ] &&
3865 skip "The test needs at least 2 MDTs" && return
3868 echo "For some reason, the master MDT-object of the striped directory"
3869 echo "may lost its master LMV EA. If nobody created files under the"
3870 echo "master directly after the master LMV EA lost, then the LFSCK"
3871 echo "should re-generate the master LMV EA."
3874 check_mount_and_prep
3876 echo "Inject failure stub on MDT0 to simulate the case that the"
3877 echo "master MDT-object of the striped directory lost the LMV EA."
3879 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3880 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3881 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3882 error "(1) Fail to create striped directory"
3883 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3885 echo "Trigger namespace LFSCK to re-generate master LMV EA"
3886 $START_NAMESPACE -r -A ||
3887 error "(2) Fail to start LFSCK for namespace"
3889 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3890 mdd.${MDT_DEV}.lfsck_namespace |
3891 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3893 error "(3) unexpected status"
3896 local repaired=$($SHOW_NAMESPACE |
3897 awk '/^striped_dirs_repaired/ { print $2 }')
3898 [ $repaired -eq 1 ] ||
3899 error "(4) Fail to re-generate master LMV EA: $repaired"
3901 umount_client $MOUNT || error "(5) umount failed"
3902 mount_client $MOUNT || error "(6) mount failed"
3904 local empty=$(ls $DIR/$tdir/striped_dir/)
3905 [ -z "$empty" ] || error "(7) The master LMV EA is not repaired: $empty"
3907 rmdir $DIR/$tdir/striped_dir ||
3908 error "(8) Fail to remove the striped directory after LFSCK"
3910 run_test 31c "Re-generate the lost master LMV EA for striped directory"
3913 [ $MDSCOUNT -lt 2 ] &&
3914 skip "The test needs at least 2 MDTs" && return
3917 echo "For some reason, the master MDT-object of the striped directory"
3918 echo "may lost its master LMV EA. If somebody created files under the"
3919 echo "master directly after the master LMV EA lost, then the LFSCK"
3920 echo "should NOT re-generate the master LMV EA, instead, it should"
3921 echo "change the broken striped dirctory as read-only to prevent"
3922 echo "further damage"
3925 check_mount_and_prep
3927 echo "Inject failure stub on MDT0 to simulate the case that the"
3928 echo "master MDT-object of the striped directory lost the LMV EA."
3930 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3931 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3932 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3933 error "(1) Fail to create striped directory"
3934 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0
3936 umount_client $MOUNT || error "(2) umount failed"
3937 mount_client $MOUNT || error "(3) mount failed"
3939 touch $DIR/$tdir/striped_dir/dummy ||
3940 error "(4) Fail to touch under broken striped directory"
3942 echo "Trigger namespace LFSCK to find out the inconsistency"
3943 $START_NAMESPACE -r -A ||
3944 error "(5) Fail to start LFSCK for namespace"
3946 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3947 mdd.${MDT_DEV}.lfsck_namespace |
3948 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3950 error "(6) unexpected status"
3953 local repaired=$($SHOW_NAMESPACE |
3954 awk '/^striped_dirs_repaired/ { print $2 }')
3955 [ $repaired -eq 0 ] ||
3956 error "(7) Re-generate master LMV EA unexpected: $repaired"
3958 stat $DIR/$tdir/striped_dir/dummy ||
3959 error "(8) Fail to stat $DIR/$tdir/striped_dir/dummy"
3961 touch $DIR/$tdir/striped_dir/foo &&
3962 error "(9) The broken striped directory should be read-only"
3964 chattr -i $DIR/$tdir/striped_dir ||
3965 error "(10) Fail to chattr on the broken striped directory"
3967 rmdir $DIR/$tdir/striped_dir ||
3968 error "(11) Fail to remove the striped directory after LFSCK"
3970 run_test 31d "Set broken striped directory (modified after broken) as read-only"
3973 [ $MDSCOUNT -lt 2 ] &&
3974 skip "The test needs at least 2 MDTs" && return
3977 echo "For some reason, the slave MDT-object of the striped directory"
3978 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
3979 echo "slave LMV EA."
3982 check_mount_and_prep
3984 echo "Inject failure stub on MDT0 to simulate the case that the"
3985 echo "slave MDT-object (that resides on the same MDT as the master"
3986 echo "MDT-object resides on) lost the LMV EA."
3988 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
3989 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=0
3990 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3991 error "(1) Fail to create striped directory"
3992 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
3994 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
3995 $START_NAMESPACE -r -A ||
3996 error "(2) Fail to start LFSCK for namespace"
3998 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3999 mdd.${MDT_DEV}.lfsck_namespace |
4000 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4002 error "(3) unexpected status"
4005 local repaired=$($SHOW_NAMESPACE |
4006 awk '/^striped_shards_repaired/ { print $2 }')
4007 [ $repaired -eq 1 ] ||
4008 error "(4) Fail to re-generate slave LMV EA: $repaired"
4010 rmdir $DIR/$tdir/striped_dir ||
4011 error "(5) Fail to remove the striped directory after LFSCK"
4013 run_test 31e "Re-generate the lost slave LMV EA for striped directory (1)"
4016 [ $MDSCOUNT -lt 2 ] &&
4017 skip "The test needs at least 2 MDTs" && return
4020 echo "For some reason, the slave MDT-object of the striped directory"
4021 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
4022 echo "slave LMV EA."
4025 check_mount_and_prep
4027 echo "Inject failure stub on MDT0 to simulate the case that the"
4028 echo "slave MDT-object (that resides on differnt MDT as the master"
4029 echo "MDT-object resides on) lost the LMV EA."
4031 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
4032 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=1
4033 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4034 error "(1) Fail to create striped directory"
4035 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4037 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
4038 $START_NAMESPACE -r -A ||
4039 error "(2) Fail to start LFSCK for namespace"
4041 wait_update_facet mds2 "$LCTL get_param -n \
4042 mdd.$(facet_svc mds2).lfsck_namespace |
4043 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
4044 error "(3) unexpected status"
4046 local repaired=$(do_facet mds2 $LCTL get_param -n \
4047 mdd.$(facet_svc mds2).lfsck_namespace |
4048 awk '/^striped_shards_repaired/ { print $2 }')
4049 [ $repaired -eq 1 ] ||
4050 error "(4) Fail to re-generate slave LMV EA: $repaired"
4052 rmdir $DIR/$tdir/striped_dir ||
4053 error "(5) Fail to remove the striped directory after LFSCK"
4055 run_test 31f "Re-generate the lost slave LMV EA for striped directory (2)"
4058 [ $MDSCOUNT -lt 2 ] &&
4059 skip "The test needs at least 2 MDTs" && return
4062 echo "For some reason, the stripe index in the slave LMV EA is"
4063 echo "corrupted. The LFSCK should repair the slave LMV EA."
4066 check_mount_and_prep
4068 echo "Inject failure stub on MDT0 to simulate the case that the"
4069 echo "slave LMV EA on the first shard of the striped directory"
4070 echo "claims the same index as the second shard claims"
4072 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
4073 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162b fail_val=0
4074 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4075 error "(1) Fail to create striped directory"
4076 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4078 echo "Trigger namespace LFSCK to repair the slave LMV EA"
4079 $START_NAMESPACE -r -A ||
4080 error "(2) Fail to start LFSCK for namespace"
4082 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4083 mdd.${MDT_DEV}.lfsck_namespace |
4084 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4086 error "(3) unexpected status"
4089 local repaired=$($SHOW_NAMESPACE |
4090 awk '/^striped_shards_repaired/ { print $2 }')
4091 [ $repaired -eq 1 ] ||
4092 error "(4) Fail to repair slave LMV EA: $repaired"
4094 umount_client $MOUNT || error "(5) umount failed"
4095 mount_client $MOUNT || error "(6) mount failed"
4097 touch $DIR/$tdir/striped_dir/foo ||
4098 error "(7) Fail to touch file after the LFSCK"
4100 rm -f $DIR/$tdir/striped_dir/foo ||
4101 error "(8) Fail to unlink file after the LFSCK"
4103 rmdir $DIR/$tdir/striped_dir ||
4104 error "(9) Fail to remove the striped directory after LFSCK"
4106 run_test 31g "Repair the corrupted slave LMV EA"
4109 [ $MDSCOUNT -lt 2 ] &&
4110 skip "The test needs at least 2 MDTs" && return
4113 echo "For some reason, the shard's name entry in the striped"
4114 echo "directory may be corrupted. The LFSCK should repair the"
4115 echo "bad shard's name entry."
4118 check_mount_and_prep
4120 echo "Inject failure stub on MDT0 to simulate the case that the"
4121 echo "first shard's name entry in the striped directory claims"
4122 echo "the same index as the second shard's name entry claims."
4124 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
4125 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162c fail_val=0
4126 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4127 error "(1) Fail to create striped directory"
4128 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4130 echo "Trigger namespace LFSCK to repair the shard's name entry"
4131 $START_NAMESPACE -r -A ||
4132 error "(2) Fail to start LFSCK for namespace"
4134 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4135 mdd.${MDT_DEV}.lfsck_namespace |
4136 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4138 error "(3) unexpected status"
4141 local repaired=$($SHOW_NAMESPACE |
4142 awk '/^dirent_repaired/ { print $2 }')
4143 [ $repaired -eq 1 ] ||
4144 error "(4) Fail to repair shard's name entry: $repaired"
4146 umount_client $MOUNT || error "(5) umount failed"
4147 mount_client $MOUNT || error "(6) mount failed"
4149 touch $DIR/$tdir/striped_dir/foo ||
4150 error "(7) Fail to touch file after the LFSCK"
4152 rm -f $DIR/$tdir/striped_dir/foo ||
4153 error "(8) Fail to unlink file after the LFSCK"
4155 rmdir $DIR/$tdir/striped_dir ||
4156 error "(9) Fail to remove the striped directory after LFSCK"
4158 run_test 31h "Repair the corrupted shard's name entry"
4160 # restore MDS/OST size
4161 MDSSIZE=${SAVED_MDSSIZE}
4162 OSTSIZE=${SAVED_OSTSIZE}
4163 OSTCOUNT=${SAVED_OSTCOUNT}
4165 # cleanup the system at last