3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
24 SAVED_MDSSIZE=${MDSSIZE}
25 SAVED_OSTSIZE=${OSTSIZE}
26 SAVED_OSTCOUNT=${OSTCOUNT}
27 # use small MDS + OST size to speed formatting time
28 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
31 # no need too many OSTs, to reduce the format/start/stop overhead
32 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
34 # build up a clean test environment.
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
48 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29 30 31"
51 # DNE does not support striped directory on zfs-based backend yet.
52 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
53 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 31"
57 MDT_DEV="${FSNAME}-MDT0000"
58 OST_DEV="${FSNAME}-OST0000"
59 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
60 START_NAMESPACE="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
62 START_LAYOUT="do_facet $SINGLEMDS \
63 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
64 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
65 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
66 SHOW_NAMESPACE="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
68 SHOW_LAYOUT="do_facet $SINGLEMDS \
69 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
70 SHOW_LAYOUT_ON_OST="do_facet ost1 \
71 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
72 MOUNT_OPTS_SCRUB="-o user_xattr"
73 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
82 echo "preparing... $nfiles * $ndirs files will be created $(date)."
83 if [ ! -z $igif ]; then
84 #define OBD_FAIL_FID_IGIF 0x1504
85 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
88 cp $LUSTRE/tests/*.sh $DIR/$tdir/
89 if [ $ndirs -gt 0 ]; then
90 createmany -d $DIR/$tdir/d $ndirs
91 createmany -m $DIR/$tdir/f $ndirs
92 if [ $nfiles -gt 0 ]; then
93 for ((i = 0; i < $ndirs; i++)); do
94 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
95 /dev/null || error "createmany $nfiles"
98 createmany -d $DIR/$tdir/e $ndirs
101 if [ ! -z $igif ]; then
102 touch $DIR/$tdir/dummy
103 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
106 echo "prepared $(date)."
112 #define OBD_FAIL_LFSCK_DELAY1 0x1600
113 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
114 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
116 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
118 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
119 [ "$STATUS" == "scanning-phase1" ] ||
120 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
122 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
124 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
125 [ "$STATUS" == "stopped" ] ||
126 error "(6) Expect 'stopped', but got '$STATUS'"
128 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
130 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
131 [ "$STATUS" == "scanning-phase1" ] ||
132 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
134 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
135 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
136 mdd.${MDT_DEV}.lfsck_namespace |
137 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
139 error "(9) unexpected status"
142 local repaired=$($SHOW_NAMESPACE |
143 awk '/^updated_phase1/ { print $2 }')
144 [ $repaired -eq 0 ] ||
145 error "(10) Expect nothing to be repaired, but got: $repaired"
147 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
148 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
149 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
150 mdd.${MDT_DEV}.lfsck_namespace |
151 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
153 error "(12) unexpected status"
156 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
157 [ $((scanned1 + 1)) -eq $scanned2 ] ||
158 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
160 echo "stopall, should NOT crash LU-3649"
161 stopall || error "(14) Fail to stopall"
163 run_test 0 "Control LFSCK manually"
166 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
167 skip "OI Scrub not implemented for ZFS" && return
171 #define OBD_FAIL_FID_INDIR 0x1501
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
173 touch $DIR/$tdir/dummy
175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
177 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
178 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
179 mdd.${MDT_DEV}.lfsck_namespace |
180 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
182 error "(4) unexpected status"
185 local repaired=$($SHOW_NAMESPACE |
186 awk '/^dirent_repaired/ { print $2 }')
187 # for interop with old server
188 [ -z "$repaired" ] &&
189 repaired=$($SHOW_NAMESPACE |
190 awk '/^updated_phase1/ { print $2 }')
192 [ $repaired -eq 1 ] ||
193 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
195 mount_client $MOUNT || error "(6) Fail to start client!"
197 #define OBD_FAIL_FID_LOOKUP 0x1505
198 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
199 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
201 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
203 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
207 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
208 skip "OI Scrub not implemented for ZFS" && return
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
221 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
222 mdd.${MDT_DEV}.lfsck_namespace |
223 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
225 error "(4) unexpected status"
228 local repaired=$($SHOW_NAMESPACE |
229 awk '/^dirent_repaired/ { print $2 }')
230 # for interop with old server
231 [ -z "$repaired" ] &&
232 repaired=$($SHOW_NAMESPACE |
233 awk '/^updated_phase1/ { print $2 }')
235 [ $repaired -eq 1 ] ||
236 error "(5) Fail to repair the missing FID-in-LMA: $repaired"
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
239 mount_client $MOUNT || error "(6) Fail to start client!"
241 #define OBD_FAIL_FID_LOOKUP 0x1505
242 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
243 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
245 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
247 run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
252 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
254 touch $DIR/$tdir/dummy
256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
258 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
259 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
260 mdd.${MDT_DEV}.lfsck_namespace |
261 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
263 error "(4) unexpected status"
266 local repaired=$($SHOW_NAMESPACE |
267 awk '/^linkea_repaired/ { print $2 }')
268 # for interop with old server
269 [ -z "$repaired" ] &&
270 repaired=$($SHOW_NAMESPACE |
271 awk '/^updated_phase2/ { print $2 }')
273 [ $repaired -eq 1 ] ||
274 error "(5) Fail to repair crashed linkEA: $repaired"
276 mount_client $MOUNT || error "(6) Fail to start client!"
278 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
279 error "(7) Fail to stat $DIR/$tdir/dummy"
281 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
282 local dummyname=$($LFS fid2path $DIR $dummyfid)
283 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
284 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
286 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
292 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
293 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
294 touch $DIR/$tdir/dummy
296 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
298 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
299 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
300 mdd.${MDT_DEV}.lfsck_namespace |
301 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
303 error "(4) unexpected status"
306 local repaired=$($SHOW_NAMESPACE |
307 awk '/^updated_phase2/ { print $2 }')
308 [ $repaired -eq 1 ] ||
309 error "(5) Fail to repair crashed linkEA: $repaired"
311 mount_client $MOUNT || error "(6) Fail to start client!"
313 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
314 error "(7) Fail to stat $DIR/$tdir/dummy"
316 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
317 local dummyname=$($LFS fid2path $DIR $dummyfid)
318 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
319 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
321 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
327 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
328 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
329 touch $DIR/$tdir/dummy
331 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
333 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
334 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
335 mdd.${MDT_DEV}.lfsck_namespace |
336 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
338 error "(4) unexpected status"
341 local repaired=$($SHOW_NAMESPACE |
342 awk '/^updated_phase2/ { print $2 }')
343 [ $repaired -eq 1 ] ||
344 error "(5) Fail to repair crashed linkEA: $repaired"
346 mount_client $MOUNT || error "(6) Fail to start client!"
348 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
349 error "(7) Fail to stat $DIR/$tdir/dummy"
351 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
352 local dummyname=$($LFS fid2path $DIR $dummyfid)
353 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
354 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
356 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
363 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
364 touch $DIR/$tdir/dummy
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
368 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
369 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
370 mdd.${MDT_DEV}.lfsck_namespace |
371 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
373 error "(4) unexpected status"
376 local repaired=$($SHOW_NAMESPACE |
377 awk '/^linkea_repaired/ { print $2 }')
378 [ $repaired -eq 1 ] ||
379 error "(5) Fail to repair crashed linkEA: $repaired"
381 mount_client $MOUNT || error "(6) Fail to start client!"
383 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
384 error "(7) Fail to stat $DIR/$tdir/dummy"
386 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
387 local dummyname=$($LFS fid2path $DIR $dummyfid)
388 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
389 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
391 run_test 2d "LFSCK can recover the missing linkEA entry"
395 [ $MDSCOUNT -lt 2 ] &&
396 skip "We need at least 2 MDSes for this test" && return
400 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT1"
402 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
404 $LFS mkdir -i 0 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT0"
405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
407 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
408 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
409 mdd.${MDT_DEV}.lfsck_namespace |
410 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
412 error "(4) unexpected status"
415 local repaired=$($SHOW_NAMESPACE |
416 awk '/^linkea_repaired/ { print $2 }')
417 [ $repaired -eq 1 ] ||
418 error "(5) Fail to repair crashed linkEA: $repaired"
420 local fid=$($LFS path2fid $DIR/$tdir/d0/d1)
421 local name=$($LFS fid2path $DIR $fid)
422 [ "$name" == "$DIR/$tdir/d0/d1" ] ||
423 error "(6) Fail to repair linkEA: $fid $name"
425 run_test 2e "namespace LFSCK can verify remote object linkEA"
431 mkdir $DIR/$tdir/dummy || error "(1) Fail to mkdir"
432 ln $DIR/$tdir/d0/f0 $DIR/$tdir/dummy/f0 || error "(2) Fail to hardlink"
433 ln $DIR/$tdir/d0/f1 $DIR/$tdir/dummy/f1 || error "(3) Fail to hardlink"
435 $LFS mkdir -i 0 $DIR/$tdir/edir || error "(4) Fail to mkdir"
436 touch $DIR/$tdir/edir/f0 || error "(5) Fail to touch"
437 touch $DIR/$tdir/edir/f1 || error "(6) Fail to touch"
439 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
440 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
441 ln $DIR/$tdir/edir/f0 $DIR/$tdir/edir/w0 || error "(7) Fail to hardlink"
443 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
444 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
445 ln $DIR/$tdir/edir/f1 $DIR/$tdir/edir/w1 || error "(8) Fail to hardlink"
447 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
449 $START_NAMESPACE -r || error "(9) Fail to start LFSCK for namespace!"
450 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
451 mdd.${MDT_DEV}.lfsck_namespace |
452 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
454 error "(10) unexpected status"
457 local checked=$($SHOW_NAMESPACE |
458 awk '/^checked_phase2/ { print $2 }')
459 [ $checked -ge 4 ] ||
460 error "(11) Fail to check multiple-linked object: $checked"
462 local repaired=$($SHOW_NAMESPACE |
463 awk '/^multiple_linked_repaired/ { print $2 }')
464 [ $repaired -ge 2 ] ||
465 error "(12) Fail to repair multiple-linked object: $repaired"
467 run_test 3 "LFSCK can verify multiple-linked objects"
471 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
472 skip "OI Scrub not implemented for ZFS" && return
475 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
476 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
478 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
479 echo "start $SINGLEMDS with disabling OI scrub"
480 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
481 error "(2) Fail to start MDS!"
483 #define OBD_FAIL_LFSCK_DELAY2 0x1601
484 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
485 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
486 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
487 mdd.${MDT_DEV}.lfsck_namespace |
488 awk '/^flags/ { print \\\$2 }'" "inconsistent" 32 || {
490 error "(5) unexpected status"
493 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
494 [ "$STATUS" == "scanning-phase1" ] ||
495 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
497 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
498 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
499 mdd.${MDT_DEV}.lfsck_namespace |
500 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
502 error "(7) unexpected status"
505 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
506 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
508 local repaired=$($SHOW_NAMESPACE |
509 awk '/^dirent_repaired/ { print $2 }')
510 # for interop with old server
511 [ -z "$repaired" ] &&
512 repaired=$($SHOW_NAMESPACE |
513 awk '/^updated_phase1/ { print $2 }')
515 [ $repaired -ge 9 ] ||
516 error "(9) Fail to re-generate FID-in-dirent: $repaired"
518 mount_client $MOUNT || error "(10) Fail to start client!"
520 #define OBD_FAIL_FID_LOOKUP 0x1505
521 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
522 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
523 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
525 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
529 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
530 skip "OI Scrub not implemented for ZFS" && return
533 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
534 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
536 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
537 echo "start $SINGLEMDS with disabling OI scrub"
538 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
539 error "(2) Fail to start MDS!"
541 #define OBD_FAIL_LFSCK_DELAY2 0x1601
542 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
543 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
544 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
545 mdd.${MDT_DEV}.lfsck_namespace |
546 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 32 || {
548 error "(5) unexpected status"
551 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
552 [ "$STATUS" == "scanning-phase1" ] ||
553 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
555 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
556 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
557 mdd.${MDT_DEV}.lfsck_namespace |
558 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
560 error "(7) unexpected status"
563 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
564 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
566 local repaired=$($SHOW_NAMESPACE |
567 awk '/^dirent_repaired/ { print $2 }')
568 # for interop with old server
569 [ -z "$repaired" ] &&
570 repaired=$($SHOW_NAMESPACE |
571 awk '/^updated_phase1/ { print $2 }')
573 [ $repaired -ge 2 ] ||
574 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
576 mount_client $MOUNT || error "(10) Fail to start client!"
578 #define OBD_FAIL_FID_LOOKUP 0x1505
579 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
580 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
582 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
584 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
585 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
586 local dummyname=$($LFS fid2path $DIR $dummyfid)
587 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
588 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
590 run_test 5 "LFSCK can handle IGIF object upgrading"
595 #define OBD_FAIL_LFSCK_DELAY1 0x1600
596 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
597 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
599 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
600 [ "$STATUS" == "scanning-phase1" ] ||
601 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
603 # Sleep 3 sec to guarantee at least one object processed by LFSCK
605 # Fail the LFSCK to guarantee there is at least one checkpoint
606 #define OBD_FAIL_LFSCK_FATAL1 0x1608
607 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
608 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
609 mdd.${MDT_DEV}.lfsck_namespace |
610 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
612 error "(4) unexpected status"
615 local POS0=$($SHOW_NAMESPACE |
616 awk '/^last_checkpoint_position/ { print $2 }' |
619 #define OBD_FAIL_LFSCK_DELAY1 0x1600
620 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
621 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
623 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
624 [ "$STATUS" == "scanning-phase1" ] ||
625 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
627 local POS1=$($SHOW_NAMESPACE |
628 awk '/^latest_start_position/ { print $2 }' |
630 [[ $POS0 -lt $POS1 ]] ||
631 error "(7) Expect larger than: $POS0, but got $POS1"
633 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
634 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
635 mdd.${MDT_DEV}.lfsck_namespace |
636 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
638 error "(8) unexpected status"
641 run_test 6a "LFSCK resumes from last checkpoint (1)"
646 #define OBD_FAIL_LFSCK_DELAY2 0x1601
647 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
648 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
650 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "scanning-phase1" ] ||
652 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
654 # Sleep 5 sec to guarantee that we are in the directory scanning
656 # Fail the LFSCK to guarantee there is at least one checkpoint
657 #define OBD_FAIL_LFSCK_FATAL2 0x1609
658 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
659 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
660 mdd.${MDT_DEV}.lfsck_namespace |
661 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
663 error "(4) unexpected status"
666 local O_POS0=$($SHOW_NAMESPACE |
667 awk '/^last_checkpoint_position/ { print $2 }' |
670 local D_POS0=$($SHOW_NAMESPACE |
671 awk '/^last_checkpoint_position/ { print $4 }')
673 #define OBD_FAIL_LFSCK_DELAY2 0x1601
674 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
675 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
677 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
678 [ "$STATUS" == "scanning-phase1" ] ||
679 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
681 local O_POS1=$($SHOW_NAMESPACE |
682 awk '/^latest_start_position/ { print $2 }' |
684 local D_POS1=$($SHOW_NAMESPACE |
685 awk '/^latest_start_position/ { print $4 }')
687 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
688 [[ $O_POS0 -lt $O_POS1 ]] ||
689 error "(7.1) $O_POS1 is not larger than $O_POS0"
691 [[ $D_POS0 -lt $D_POS1 ]] ||
692 error "(7.2) $D_POS1 is not larger than $D_POS0"
695 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
696 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
697 mdd.${MDT_DEV}.lfsck_namespace |
698 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
700 error "(8) unexpected status"
703 run_test 6b "LFSCK resumes from last checkpoint (2)"
710 #define OBD_FAIL_LFSCK_DELAY2 0x1601
711 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
712 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
714 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
715 [ "$STATUS" == "scanning-phase1" ] ||
716 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
718 # Sleep 3 sec to guarantee at least one object processed by LFSCK
720 echo "stop $SINGLEMDS"
721 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
723 echo "start $SINGLEMDS"
724 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
725 error "(5) Fail to start MDS!"
727 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
728 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
729 mdd.${MDT_DEV}.lfsck_namespace |
730 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
732 error "(6) unexpected status"
735 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
741 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
743 for ((i = 0; i < 20; i++)); do
744 touch $DIR/$tdir/dummy${i}
747 #define OBD_FAIL_LFSCK_DELAY3 0x1602
748 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
749 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
750 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
751 mdd.${MDT_DEV}.lfsck_namespace |
752 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
754 error "(4) unexpected status"
757 echo "stop $SINGLEMDS"
758 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
760 echo "start $SINGLEMDS"
761 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
762 error "(6) Fail to start MDS!"
764 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
765 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
766 mdd.${MDT_DEV}.lfsck_namespace |
767 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
769 error "(7) unexpected status"
772 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
777 formatall > /dev/null
783 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
784 [ "$STATUS" == "init" ] ||
785 error "(2) Expect 'init', but got '$STATUS'"
787 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
788 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
789 mkdir $DIR/$tdir/crashed
791 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
792 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
793 for ((i = 0; i < 5; i++)); do
794 touch $DIR/$tdir/dummy${i}
797 umount_client $MOUNT || error "(3) Fail to stop client!"
799 #define OBD_FAIL_LFSCK_DELAY2 0x1601
800 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
801 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
803 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
804 [ "$STATUS" == "scanning-phase1" ] ||
805 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
807 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
809 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
810 [ "$STATUS" == "stopped" ] ||
811 error "(7) Expect 'stopped', but got '$STATUS'"
813 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
815 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
816 [ "$STATUS" == "scanning-phase1" ] ||
817 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
819 #define OBD_FAIL_LFSCK_FATAL2 0x1609
820 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
821 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
822 mdd.${MDT_DEV}.lfsck_namespace |
823 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
825 error "(10) unexpected status"
828 #define OBD_FAIL_LFSCK_DELAY1 0x1600
829 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
830 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
832 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
833 [ "$STATUS" == "scanning-phase1" ] ||
834 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
836 #define OBD_FAIL_LFSCK_CRASH 0x160a
837 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
840 echo "stop $SINGLEMDS"
841 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
843 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
844 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
846 echo "start $SINGLEMDS"
847 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
848 error "(14) Fail to start MDS!"
850 local timeout=$(max_recovery_time)
853 while [ $timer -lt $timeout ]; do
854 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
855 mdt.${MDT_DEV}.recovery_status |
856 awk '/^status/ { print \\\$2 }'")
857 [ "$STATUS" != "RECOVERING" ] && break;
862 [ $timer != $timeout ] ||
863 error "(14.1) recovery timeout"
865 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
866 [ "$STATUS" == "crashed" ] ||
867 error "(15) Expect 'crashed', but got '$STATUS'"
869 #define OBD_FAIL_LFSCK_DELAY2 0x1601
870 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
871 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
873 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
874 [ "$STATUS" == "scanning-phase1" ] ||
875 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
877 echo "stop $SINGLEMDS"
878 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
880 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
881 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
883 echo "start $SINGLEMDS"
884 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
885 error "(19) Fail to start MDS!"
888 while [ $timer -lt $timeout ]; do
889 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
890 mdt.${MDT_DEV}.recovery_status |
891 awk '/^status/ { print \\\$2 }'")
892 [ "$STATUS" != "RECOVERING" ] && break;
897 [ $timer != $timeout ] ||
898 error "(19.1) recovery timeout"
900 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
901 [ "$STATUS" == "paused" ] ||
902 error "(20) Expect 'paused', but got '$STATUS'"
904 #define OBD_FAIL_LFSCK_DELAY3 0x1602
905 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
907 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
908 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
909 mdd.${MDT_DEV}.lfsck_namespace |
910 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
912 error "(22) unexpected status"
915 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
916 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
917 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
919 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
920 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
921 mdd.${MDT_DEV}.lfsck_namespace |
922 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
924 error "(24) unexpected status"
927 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
928 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
930 run_test 8 "LFSCK state machine"
933 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
934 skip "Testing on UP system, the speed may be inaccurate."
940 local BASE_SPEED1=100
942 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
945 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
946 [ "$STATUS" == "scanning-phase1" ] ||
947 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
949 local SPEED=$($SHOW_NAMESPACE |
950 awk '/^average_speed_phase1/ { print $2 }')
952 # There may be time error, normally it should be less than 2 seconds.
953 # We allow another 20% schedule error.
955 # MAX_MARGIN = 1.2 = 12 / 10
956 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
957 RUN_TIME1 * 12 / 10))
958 [ $SPEED -lt $MAX_SPEED ] ||
959 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
962 local BASE_SPEED2=300
964 do_facet $SINGLEMDS \
965 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
968 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
969 # MIN_MARGIN = 0.8 = 8 / 10
970 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
971 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
972 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
973 [ $SPEED -gt $MIN_SPEED ] || {
974 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
975 error_ignore LU-5624 \
976 "(5.1) Got speed $SPEED, expected more than $MIN_SPEED"
979 "(5.2) Got speed $SPEED, expected more than $MIN_SPEED"
983 # MAX_MARGIN = 1.2 = 12 / 10
984 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
985 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
986 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
987 [ $SPEED -lt $MAX_SPEED ] ||
988 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
990 do_facet $SINGLEMDS \
991 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
993 wait_update_facet $SINGLEMDS \
994 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
995 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
996 error "(7) Failed to get expected 'completed'"
998 run_test 9a "LFSCK speed control (1)"
1001 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1002 skip "Testing on UP system, the speed may be inaccurate."
1008 echo "Preparing another 50 * 50 files (with error) at $(date)."
1009 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1010 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1011 createmany -d $DIR/$tdir/d 50
1012 createmany -m $DIR/$tdir/f 50
1013 for ((i = 0; i < 50; i++)); do
1014 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
1017 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
1018 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
1019 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
1020 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1021 mdd.${MDT_DEV}.lfsck_namespace |
1022 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
1024 error "(5) unexpected status"
1027 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1028 echo "Prepared at $(date)."
1030 local BASE_SPEED1=50
1032 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
1035 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1036 [ "$STATUS" == "scanning-phase2" ] ||
1037 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
1039 local SPEED=$($SHOW_NAMESPACE |
1040 awk '/^average_speed_phase2/ { print $2 }')
1041 # There may be time error, normally it should be less than 2 seconds.
1042 # We allow another 20% schedule error.
1044 # MAX_MARGIN = 1.2 = 12 / 10
1045 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1046 RUN_TIME1 * 12 / 10))
1047 [ $SPEED -lt $MAX_SPEED ] ||
1048 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
1050 # adjust speed limit
1051 local BASE_SPEED2=150
1053 do_facet $SINGLEMDS \
1054 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1057 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
1058 # MIN_MARGIN = 0.8 = 8 / 10
1059 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1060 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1061 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
1062 [ $SPEED -gt $MIN_SPEED ] || {
1063 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
1064 error_ignore LU-5624 \
1065 "(9.1) Got speed $SPEED, expected more than $MIN_SPEED"
1068 "(9.2) Got speed $SPEED, expected more than $MIN_SPEED"
1072 # MAX_MARGIN = 1.2 = 12 / 10
1073 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1074 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1075 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
1076 [ $SPEED -lt $MAX_SPEED ] ||
1077 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
1079 do_facet $SINGLEMDS \
1080 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1081 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1082 mdd.${MDT_DEV}.lfsck_namespace |
1083 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1085 error "(11) unexpected status"
1088 run_test 9b "LFSCK speed control (2)"
1092 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
1093 skip "lookup(..)/linkea on ZFS issue" && return
1097 echo "Preparing more files with error at $(date)."
1098 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
1099 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
1101 for ((i = 0; i < 1000; i = $((i+2)))); do
1102 mkdir -p $DIR/$tdir/d${i}
1103 touch $DIR/$tdir/f${i}
1104 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1107 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1108 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1110 for ((i = 1; i < 1000; i = $((i+2)))); do
1111 mkdir -p $DIR/$tdir/d${i}
1112 touch $DIR/$tdir/f${i}
1113 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1116 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1117 echo "Prepared at $(date)."
1119 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
1121 umount_client $MOUNT
1122 mount_client $MOUNT || error "(3) Fail to start client!"
1124 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
1127 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1128 [ "$STATUS" == "scanning-phase1" ] ||
1129 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
1131 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
1133 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
1135 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
1137 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
1139 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
1141 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
1143 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
1145 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1146 error "(14) Fail to softlink!"
1148 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1149 [ "$STATUS" == "scanning-phase1" ] ||
1150 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1152 do_facet $SINGLEMDS \
1153 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1154 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1155 mdd.${MDT_DEV}.lfsck_namespace |
1156 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1158 error "(16) unexpected status"
1161 run_test 10 "System is available during LFSCK scanning"
1164 ost_remove_lastid() {
1167 local rcmd="do_facet ost${ost}"
1169 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1171 # step 1: local mount
1172 mount_fstype ost${ost} || return 1
1173 # step 2: remove the specified LAST_ID
1174 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1176 unmount_fstype ost${ost} || return 2
1180 check_mount_and_prep
1181 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1182 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1187 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1189 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1190 error "(2) Fail to start ost1"
1192 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1193 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1195 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1196 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1198 wait_update_facet ost1 "$LCTL get_param -n \
1199 obdfilter.${OST_DEV}.lfsck_layout |
1200 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1202 error "(5) unexpected status"
1205 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1207 wait_update_facet ost1 "$LCTL get_param -n \
1208 obdfilter.${OST_DEV}.lfsck_layout |
1209 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1211 error "(6) unexpected status"
1214 echo "the LAST_ID(s) should have been rebuilt"
1215 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1216 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1218 run_test 11a "LFSCK can rebuild lost last_id"
1221 check_mount_and_prep
1222 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1224 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1225 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1226 do_facet ost1 $LCTL set_param fail_loc=0x160d
1228 local count=$(precreated_ost_obj_count 0 0)
1230 createmany -o $DIR/$tdir/f $((count + 32))
1232 local proc_path="${FSNAME}-OST0000-osc-MDT0000"
1233 local seq=$(do_facet mds1 $LCTL get_param -n \
1234 osp.${proc_path}.prealloc_last_seq)
1235 local lastid1=$(do_facet ost1 "lctl get_param -n \
1236 obdfilter.${ost1_svc}.last_id" | grep $seq |
1237 awk -F: '{ print $2 }')
1239 umount_client $MOUNT
1240 stop ost1 || error "(1) Fail to stop ost1"
1242 #define OBD_FAIL_OST_ENOSPC 0x215
1243 do_facet ost1 $LCTL set_param fail_loc=0x215
1245 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1246 error "(2) Fail to start ost1"
1248 for ((i = 0; i < 60; i++)); do
1249 lastid2=$(do_facet ost1 "lctl get_param -n \
1250 obdfilter.${ost1_svc}.last_id" | grep $seq |
1251 awk -F: '{ print $2 }')
1252 [ ! -z $lastid2 ] && break;
1256 echo "the on-disk LAST_ID should be smaller than the expected one"
1257 [ $lastid1 -gt $lastid2 ] ||
1258 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1260 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1261 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1263 wait_update_facet ost1 "$LCTL get_param -n \
1264 obdfilter.${OST_DEV}.lfsck_layout |
1265 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1267 error "(6) unexpected status"
1270 stop ost1 || error "(7) Fail to stop ost1"
1272 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1273 error "(8) Fail to start ost1"
1275 echo "the on-disk LAST_ID should have been rebuilt"
1276 wait_update_facet ost1 "$LCTL get_param -n \
1277 obdfilter.${ost1_svc}.last_id | grep $seq |
1278 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1279 do_facet ost1 $LCTL get_param -n \
1280 obdfilter.${ost1_svc}.last_id
1281 error "(9) expect lastid1 $seq:$lastid1"
1284 do_facet ost1 $LCTL set_param fail_loc=0
1285 stopall || error "(10) Fail to stopall"
1287 run_test 11b "LFSCK can rebuild crashed last_id"
1290 [ $MDSCOUNT -lt 2 ] &&
1291 skip "We need at least 2 MDSes for test_12" && return
1293 check_mount_and_prep
1294 for k in $(seq $MDSCOUNT); do
1295 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1296 createmany -o $DIR/$tdir/${k}/f 100 ||
1297 error "(0) Fail to create 100 files."
1300 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1301 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1302 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1304 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1305 for k in $(seq $MDSCOUNT); do
1306 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1307 mdd.$(facet_svc mds${k}).lfsck_namespace |
1308 awk '/^status/ { print $2 }')
1309 [ "$STATUS" == "scanning-phase1" ] ||
1310 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1313 echo "Stop namespace LFSCK on all targets by single lctl command."
1314 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1315 error "(4) Fail to stop LFSCK on all devices!"
1317 echo "All the LFSCK targets should be in 'stopped' status."
1318 for k in $(seq $MDSCOUNT); do
1319 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1320 mdd.$(facet_svc mds${k}).lfsck_namespace |
1321 awk '/^status/ { print $2 }')
1322 [ "$STATUS" == "stopped" ] ||
1323 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1326 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1327 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1328 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1330 echo "All the LFSCK targets should be in 'completed' status."
1331 for k in $(seq $MDSCOUNT); do
1332 wait_update_facet mds${k} "$LCTL get_param -n \
1333 mdd.$(facet_svc mds${k}).lfsck_namespace |
1334 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1335 error "(7) MDS${k} is not the expected 'completed'"
1338 start_full_debug_logging
1340 echo "Start layout LFSCK on all targets by single command (-s 1)."
1341 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1342 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1344 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1345 for k in $(seq $MDSCOUNT); do
1346 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1347 mdd.$(facet_svc mds${k}).lfsck_layout |
1348 awk '/^status/ { print $2 }')
1349 [ "$STATUS" == "scanning-phase1" ] ||
1350 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1353 echo "Stop layout LFSCK on all targets by single lctl command."
1354 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1355 error "(10) Fail to stop LFSCK on all devices!"
1357 echo "All the LFSCK targets should be in 'stopped' status."
1358 for k in $(seq $MDSCOUNT); do
1359 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1360 mdd.$(facet_svc mds${k}).lfsck_layout |
1361 awk '/^status/ { print $2 }')
1362 [ "$STATUS" == "stopped" ] ||
1363 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1366 for k in $(seq $OSTCOUNT); do
1367 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1368 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1369 awk '/^status/ { print $2 }')
1370 [ "$STATUS" == "stopped" ] ||
1371 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1374 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1375 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1376 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1378 echo "All the LFSCK targets should be in 'completed' status."
1379 for k in $(seq $MDSCOUNT); do
1380 # The LFSCK status query internal is 30 seconds. For the case
1381 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1382 # time to guarantee the status sync up.
1383 wait_update_facet mds${k} "$LCTL get_param -n \
1384 mdd.$(facet_svc mds${k}).lfsck_layout |
1385 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1386 error "(14) MDS${k} is not the expected 'completed'"
1389 stop_full_debug_logging
1391 run_test 12 "single command to trigger LFSCK on all devices"
1395 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1396 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1397 echo "MDT-object FID."
1400 check_mount_and_prep
1402 echo "Inject failure stub to simulate bad lmm_oi"
1403 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1404 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1405 createmany -o $DIR/$tdir/f 32
1406 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1408 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1409 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1411 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1412 mdd.${MDT_DEV}.lfsck_layout |
1413 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1415 error "(2) unexpected status"
1418 local repaired=$($SHOW_LAYOUT |
1419 awk '/^repaired_others/ { print $2 }')
1420 [ $repaired -eq 32 ] ||
1421 error "(3) Fail to repair crashed lmm_oi: $repaired"
1423 run_test 13 "LFSCK can repair crashed lmm_oi"
1427 echo "The OST-object referenced by the MDT-object should be there;"
1428 echo "otherwise, the LFSCK should re-create the missing OST-object."
1431 check_mount_and_prep
1432 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1434 echo "Inject failure stub to simulate dangling referenced MDT-object"
1435 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1436 do_facet ost1 $LCTL set_param fail_loc=0x1610
1437 local count=$(precreated_ost_obj_count 0 0)
1439 createmany -o $DIR/$tdir/f $((count + 31))
1440 touch $DIR/$tdir/guard
1441 do_facet ost1 $LCTL set_param fail_loc=0
1443 start_full_debug_logging
1445 # exhaust other pre-created dangling cases
1446 count=$(precreated_ost_obj_count 0 0)
1447 createmany -o $DIR/$tdir/a $count ||
1448 error "(0) Fail to create $count files."
1450 echo "'ls' should fail because of dangling referenced MDT-object"
1451 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1453 echo "Trigger layout LFSCK to find out dangling reference"
1454 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1456 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1457 mdd.${MDT_DEV}.lfsck_layout |
1458 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1460 error "(3) unexpected status"
1463 local repaired=$($SHOW_LAYOUT |
1464 awk '/^repaired_dangling/ { print $2 }')
1465 [ $repaired -ge 32 ] ||
1466 error "(4) Fail to repair dangling reference: $repaired"
1468 echo "'stat' should fail because of not repair dangling by default"
1469 stat $DIR/$tdir/guard > /dev/null 2>&1 && error "(5) stat should fail"
1471 echo "Trigger layout LFSCK to repair dangling reference"
1472 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1474 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1475 mdd.${MDT_DEV}.lfsck_layout |
1476 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1478 error "(7) unexpected status"
1481 # There may be some async LFSCK updates in processing, wait for
1482 # a while until the target reparation has been done. LU-4970.
1484 echo "'stat' should success after layout LFSCK repairing"
1485 wait_update_facet client "stat $DIR/$tdir/guard |
1486 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1487 stat $DIR/$tdir/guard
1489 error "(8) unexpected size"
1492 repaired=$($SHOW_LAYOUT |
1493 awk '/^repaired_dangling/ { print $2 }')
1494 [ $repaired -ge 32 ] ||
1495 error "(9) Fail to repair dangling reference: $repaired"
1497 stop_full_debug_logging
1499 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1503 echo "If the OST-object referenced by the MDT-object back points"
1504 echo "to some non-exist MDT-object, then the LFSCK should repair"
1505 echo "the OST-object to back point to the right MDT-object."
1508 check_mount_and_prep
1509 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1511 echo "Inject failure stub to make the OST-object to back point to"
1512 echo "non-exist MDT-object."
1513 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1515 do_facet ost1 $LCTL set_param fail_loc=0x1611
1516 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1517 cancel_lru_locks osc
1518 do_facet ost1 $LCTL set_param fail_loc=0
1520 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1521 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1523 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1524 mdd.${MDT_DEV}.lfsck_layout |
1525 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1527 error "(2) unexpected status"
1530 local repaired=$($SHOW_LAYOUT |
1531 awk '/^repaired_unmatched_pair/ { print $2 }')
1532 [ $repaired -eq 1 ] ||
1533 error "(3) Fail to repair unmatched pair: $repaired"
1535 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1539 echo "If the OST-object referenced by the MDT-object back points"
1540 echo "to other MDT-object that doesn't recognize the OST-object,"
1541 echo "then the LFSCK should repair it to back point to the right"
1542 echo "MDT-object (the first one)."
1545 check_mount_and_prep
1546 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1547 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1548 cancel_lru_locks osc
1550 echo "Inject failure stub to make the OST-object to back point to"
1551 echo "other MDT-object"
1553 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1554 do_facet ost1 $LCTL set_param fail_loc=0x1612
1555 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1556 cancel_lru_locks osc
1557 do_facet ost1 $LCTL set_param fail_loc=0
1559 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1560 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1562 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1563 mdd.${MDT_DEV}.lfsck_layout |
1564 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1566 error "(2) unexpected status"
1569 local repaired=$($SHOW_LAYOUT |
1570 awk '/^repaired_unmatched_pair/ { print $2 }')
1571 [ $repaired -eq 1 ] ||
1572 error "(3) Fail to repair unmatched pair: $repaired"
1574 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1578 echo "If the OST-object's owner information does not match the owner"
1579 echo "information stored in the MDT-object, then the LFSCK trust the"
1580 echo "MDT-object and update the OST-object's owner information."
1583 check_mount_and_prep
1584 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1585 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1586 cancel_lru_locks osc
1588 echo "Inject failure stub to skip OST-object owner changing"
1589 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1590 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1591 chown 1.1 $DIR/$tdir/f0
1592 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1594 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1597 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1599 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1600 mdd.${MDT_DEV}.lfsck_layout |
1601 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1603 error "(2) unexpected status"
1606 local repaired=$($SHOW_LAYOUT |
1607 awk '/^repaired_inconsistent_owner/ { print $2 }')
1608 [ $repaired -eq 1 ] ||
1609 error "(3) Fail to repair inconsistent owner: $repaired"
1611 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1615 echo "If more than one MDT-objects reference the same OST-object,"
1616 echo "and the OST-object only recognizes one MDT-object, then the"
1617 echo "LFSCK should create new OST-objects for such non-recognized"
1621 check_mount_and_prep
1622 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1624 echo "Inject failure stub to make two MDT-objects to refernce"
1625 echo "the OST-object"
1627 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1628 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1630 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1631 cancel_lru_locks osc
1633 createmany -o $DIR/$tdir/f 1
1635 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1637 cancel_lru_locks mdc
1638 cancel_lru_locks osc
1640 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1641 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1642 [ $size -eq 1048576 ] ||
1643 error "(1) f0 (wrong) size should be 1048576, but got $size"
1645 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1648 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1650 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1651 mdd.${MDT_DEV}.lfsck_layout |
1652 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1654 error "(3) unexpected status"
1657 local repaired=$($SHOW_LAYOUT |
1658 awk '/^repaired_multiple_referenced/ { print $2 }')
1659 [ $repaired -eq 1 ] ||
1660 error "(4) Fail to repair multiple references: $repaired"
1662 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1663 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1664 error "(5) Fail to write f0."
1665 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1666 [ $size -eq 1048576 ] ||
1667 error "(6) guard size should be 1048576, but got $size"
1669 run_test 17 "LFSCK can repair multiple references"
1671 $LCTL set_param debug=+cache > /dev/null
1675 echo "The target MDT-object is there, but related stripe information"
1676 echo "is lost or partly lost. The LFSCK should regenerate the missing"
1677 echo "layout EA entries."
1680 check_mount_and_prep
1681 $LFS mkdir -i 0 $DIR/$tdir/a1
1682 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1683 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1685 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1687 $LFS path2fid $DIR/$tdir/a1/f1
1688 $LFS getstripe $DIR/$tdir/a1/f1
1690 if [ $MDSCOUNT -ge 2 ]; then
1691 $LFS mkdir -i 1 $DIR/$tdir/a2
1692 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1693 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1694 $LFS path2fid $DIR/$tdir/a2/f2
1695 $LFS getstripe $DIR/$tdir/a2/f2
1698 cancel_lru_locks osc
1700 echo "Inject failure, to make the MDT-object lost its layout EA"
1701 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1702 do_facet mds1 $LCTL set_param fail_loc=0x1615
1703 chown 1.1 $DIR/$tdir/a1/f1
1705 if [ $MDSCOUNT -ge 2 ]; then
1706 do_facet mds2 $LCTL set_param fail_loc=0x1615
1707 chown 1.1 $DIR/$tdir/a2/f2
1713 do_facet mds1 $LCTL set_param fail_loc=0
1714 if [ $MDSCOUNT -ge 2 ]; then
1715 do_facet mds2 $LCTL set_param fail_loc=0
1718 cancel_lru_locks mdc
1719 cancel_lru_locks osc
1721 echo "The file size should be incorrect since layout EA is lost"
1722 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1723 [ "$cur_size" != "$saved_size" ] ||
1724 error "(1) Expect incorrect file1 size"
1726 if [ $MDSCOUNT -ge 2 ]; then
1727 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1728 [ "$cur_size" != "$saved_size" ] ||
1729 error "(2) Expect incorrect file2 size"
1732 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1733 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1735 for k in $(seq $MDSCOUNT); do
1736 # The LFSCK status query internal is 30 seconds. For the case
1737 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1738 # time to guarantee the status sync up.
1739 wait_update_facet mds${k} "$LCTL get_param -n \
1740 mdd.$(facet_svc mds${k}).lfsck_layout |
1741 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1742 error "(4) MDS${k} is not the expected 'completed'"
1745 for k in $(seq $OSTCOUNT); do
1746 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1747 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1748 awk '/^status/ { print $2 }')
1749 [ "$cur_status" == "completed" ] ||
1750 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1753 local repaired=$(do_facet mds1 $LCTL get_param -n \
1754 mdd.$(facet_svc mds1).lfsck_layout |
1755 awk '/^repaired_orphan/ { print $2 }')
1756 [ $repaired -eq 1 ] ||
1757 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1759 if [ $MDSCOUNT -ge 2 ]; then
1760 repaired=$(do_facet mds2 $LCTL get_param -n \
1761 mdd.$(facet_svc mds2).lfsck_layout |
1762 awk '/^repaired_orphan/ { print $2 }')
1763 [ $repaired -eq 2 ] ||
1764 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1767 $LFS path2fid $DIR/$tdir/a1/f1
1768 $LFS getstripe $DIR/$tdir/a1/f1
1770 if [ $MDSCOUNT -ge 2 ]; then
1771 $LFS path2fid $DIR/$tdir/a2/f2
1772 $LFS getstripe $DIR/$tdir/a2/f2
1775 echo "The file size should be correct after layout LFSCK scanning"
1776 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1777 [ "$cur_size" == "$saved_size" ] ||
1778 error "(7) Expect file1 size $saved_size, but got $cur_size"
1780 if [ $MDSCOUNT -ge 2 ]; then
1781 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1782 [ "$cur_size" == "$saved_size" ] ||
1783 error "(8) Expect file2 size $saved_size, but got $cur_size"
1786 run_test 18a "Find out orphan OST-object and repair it (1)"
1790 echo "The target MDT-object is lost. The LFSCK should re-create the"
1791 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1792 echo "can move it back to normal namespace manually."
1795 check_mount_and_prep
1796 $LFS mkdir -i 0 $DIR/$tdir/a1
1797 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1798 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1799 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1800 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1802 $LFS getstripe $DIR/$tdir/a1/f1
1804 if [ $MDSCOUNT -ge 2 ]; then
1805 $LFS mkdir -i 1 $DIR/$tdir/a2
1806 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1807 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1808 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1810 $LFS getstripe $DIR/$tdir/a2/f2
1813 cancel_lru_locks osc
1815 echo "Inject failure, to simulate the case of missing the MDT-object"
1816 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1817 do_facet mds1 $LCTL set_param fail_loc=0x1616
1818 rm -f $DIR/$tdir/a1/f1
1820 if [ $MDSCOUNT -ge 2 ]; then
1821 do_facet mds2 $LCTL set_param fail_loc=0x1616
1822 rm -f $DIR/$tdir/a2/f2
1828 do_facet mds1 $LCTL set_param fail_loc=0
1829 if [ $MDSCOUNT -ge 2 ]; then
1830 do_facet mds2 $LCTL set_param fail_loc=0
1833 cancel_lru_locks mdc
1834 cancel_lru_locks osc
1836 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1837 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1839 for k in $(seq $MDSCOUNT); do
1840 # The LFSCK status query internal is 30 seconds. For the case
1841 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1842 # time to guarantee the status sync up.
1843 wait_update_facet mds${k} "$LCTL get_param -n \
1844 mdd.$(facet_svc mds${k}).lfsck_layout |
1845 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1846 error "(2) MDS${k} is not the expected 'completed'"
1849 for k in $(seq $OSTCOUNT); do
1850 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1851 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1852 awk '/^status/ { print $2 }')
1853 [ "$cur_status" == "completed" ] ||
1854 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1857 local repaired=$(do_facet mds1 $LCTL get_param -n \
1858 mdd.$(facet_svc mds1).lfsck_layout |
1859 awk '/^repaired_orphan/ { print $2 }')
1860 [ $repaired -eq 1 ] ||
1861 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1863 if [ $MDSCOUNT -ge 2 ]; then
1864 repaired=$(do_facet mds2 $LCTL get_param -n \
1865 mdd.$(facet_svc mds2).lfsck_layout |
1866 awk '/^repaired_orphan/ { print $2 }')
1867 [ $repaired -eq 2 ] ||
1868 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1871 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1872 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1873 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1875 if [ $MDSCOUNT -ge 2 ]; then
1876 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1877 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1880 $LFS path2fid $DIR/$tdir/a1/f1
1881 $LFS getstripe $DIR/$tdir/a1/f1
1883 if [ $MDSCOUNT -ge 2 ]; then
1884 $LFS path2fid $DIR/$tdir/a2/f2
1885 $LFS getstripe $DIR/$tdir/a2/f2
1888 echo "The file size should be correct after layout LFSCK scanning"
1889 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1890 [ "$cur_size" == "$saved_size" ] ||
1891 error "(7) Expect file1 size $saved_size, but got $cur_size"
1893 if [ $MDSCOUNT -ge 2 ]; then
1894 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1895 [ "$cur_size" == "$saved_size" ] ||
1896 error "(8) Expect file2 size $saved_size, but got $cur_size"
1899 run_test 18b "Find out orphan OST-object and repair it (2)"
1903 echo "The target MDT-object is lost, and the OST-object FID is missing."
1904 echo "The LFSCK should re-create the MDT-object with new FID under the "
1905 echo "directory .lustre/lost+found/MDTxxxx."
1908 check_mount_and_prep
1909 $LFS mkdir -i 0 $DIR/$tdir/a1
1910 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1912 echo "Inject failure, to simulate the case of missing parent FID"
1913 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1914 do_facet ost1 $LCTL set_param fail_loc=0x1617
1916 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1917 $LFS getstripe $DIR/$tdir/a1/f1
1919 if [ $MDSCOUNT -ge 2 ]; then
1920 $LFS mkdir -i 1 $DIR/$tdir/a2
1921 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a2
1922 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1923 $LFS getstripe $DIR/$tdir/a2/f2
1926 cancel_lru_locks osc
1928 echo "Inject failure, to simulate the case of missing the MDT-object"
1929 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1930 do_facet mds1 $LCTL set_param fail_loc=0x1616
1931 rm -f $DIR/$tdir/a1/f1
1933 if [ $MDSCOUNT -ge 2 ]; then
1934 do_facet mds2 $LCTL set_param fail_loc=0x1616
1935 rm -f $DIR/$tdir/a2/f2
1941 do_facet mds1 $LCTL set_param fail_loc=0
1942 if [ $MDSCOUNT -ge 2 ]; then
1943 do_facet mds2 $LCTL set_param fail_loc=0
1946 cancel_lru_locks mdc
1947 cancel_lru_locks osc
1949 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1950 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1952 for k in $(seq $MDSCOUNT); do
1953 # The LFSCK status query internal is 30 seconds. For the case
1954 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1955 # time to guarantee the status sync up.
1956 wait_update_facet mds${k} "$LCTL get_param -n \
1957 mdd.$(facet_svc mds${k}).lfsck_layout |
1958 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1959 error "(2) MDS${k} is not the expected 'completed'"
1962 for k in $(seq $OSTCOUNT); do
1963 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1964 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1965 awk '/^status/ { print $2 }')
1966 [ "$cur_status" == "completed" ] ||
1967 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1970 if [ $MDSCOUNT -ge 2 ]; then
1976 local repaired=$(do_facet mds1 $LCTL get_param -n \
1977 mdd.$(facet_svc mds1).lfsck_layout |
1978 awk '/^repaired_orphan/ { print $2 }')
1979 [ $repaired -eq $expected ] ||
1980 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1982 if [ $MDSCOUNT -ge 2 ]; then
1983 repaired=$(do_facet mds2 $LCTL get_param -n \
1984 mdd.$(facet_svc mds2).lfsck_layout |
1985 awk '/^repaired_orphan/ { print $2 }')
1986 [ $repaired -eq 0 ] ||
1987 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1990 ls -ail $MOUNT/.lustre/lost+found/
1992 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
1993 if [ -d $MOUNT/.lustre/lost+found/MDT0001 ]; then
1994 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-N-*)
1996 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1999 echo "There should be some stub under .lustre/lost+found/MDT0000/"
2000 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2001 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2003 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-N-*)
2004 [ ! -z "$cname" ] ||
2005 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2007 run_test 18c "Find out orphan OST-object and repair it (3)"
2011 echo "The target MDT-object layout EA slot is occpuied by some new"
2012 echo "created OST-object when repair dangling reference case. Such"
2013 echo "conflict OST-object has never been modified. Then when found"
2014 echo "the orphan OST-object, LFSCK will replace it with the orphan"
2018 check_mount_and_prep
2020 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2021 echo "guard" > $DIR/$tdir/a1/f1
2022 echo "foo" > $DIR/$tdir/a1/f2
2023 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2024 $LFS path2fid $DIR/$tdir/a1/f1
2025 $LFS getstripe $DIR/$tdir/a1/f1
2026 $LFS path2fid $DIR/$tdir/a1/f2
2027 $LFS getstripe $DIR/$tdir/a1/f2
2028 cancel_lru_locks osc
2030 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2031 echo "to reference the same OST-object (which is f1's OST-obejct)."
2032 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2033 echo "dangling reference case, but f2's old OST-object is there."
2036 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2037 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2038 chown 1.1 $DIR/$tdir/a1/f2
2039 rm -f $DIR/$tdir/a1/f1
2042 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2044 echo "stopall to cleanup object cache"
2047 setupall > /dev/null
2049 echo "The file size should be incorrect since dangling referenced"
2050 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2051 [ "$cur_size" != "$saved_size" ] ||
2052 error "(1) Expect incorrect file2 size"
2054 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2055 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
2057 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2058 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2060 wait_update_facet mds1 "$LCTL get_param -n \
2061 mdd.$(facet_svc mds1).lfsck_layout |
2062 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2063 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
2065 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2067 for k in $(seq $MDSCOUNT); do
2068 # The LFSCK status query internal is 30 seconds. For the case
2069 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2070 # time to guarantee the status sync up.
2071 wait_update_facet mds${k} "$LCTL get_param -n \
2072 mdd.$(facet_svc mds${k}).lfsck_layout |
2073 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2074 error "(3) MDS${k} is not the expected 'completed'"
2077 for k in $(seq $OSTCOUNT); do
2078 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2079 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2080 awk '/^status/ { print $2 }')
2081 [ "$cur_status" == "completed" ] ||
2082 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
2085 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2086 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2087 awk '/^repaired_orphan/ { print $2 }')
2088 [ $repaired -eq 1 ] ||
2089 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
2091 echo "The file size should be correct after layout LFSCK scanning"
2092 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2093 [ "$cur_size" == "$saved_size" ] ||
2094 error "(6) Expect file2 size $saved_size, but got $cur_size"
2096 echo "The LFSCK should find back the original data."
2097 cat $DIR/$tdir/a1/f2
2098 $LFS path2fid $DIR/$tdir/a1/f2
2099 $LFS getstripe $DIR/$tdir/a1/f2
2101 run_test 18d "Find out orphan OST-object and repair it (4)"
2105 echo "The target MDT-object layout EA slot is occpuied by some new"
2106 echo "created OST-object when repair dangling reference case. Such"
2107 echo "conflict OST-object has been modified by others. To keep the"
2108 echo "new data, the LFSCK will create a new file to refernece this"
2109 echo "old orphan OST-object."
2112 check_mount_and_prep
2114 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2115 echo "guard" > $DIR/$tdir/a1/f1
2116 echo "foo" > $DIR/$tdir/a1/f2
2117 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2118 $LFS path2fid $DIR/$tdir/a1/f1
2119 $LFS getstripe $DIR/$tdir/a1/f1
2120 $LFS path2fid $DIR/$tdir/a1/f2
2121 $LFS getstripe $DIR/$tdir/a1/f2
2122 cancel_lru_locks osc
2124 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2125 echo "to reference the same OST-object (which is f1's OST-obejct)."
2126 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2127 echo "dangling reference case, but f2's old OST-object is there."
2130 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2131 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2132 chown 1.1 $DIR/$tdir/a1/f2
2133 rm -f $DIR/$tdir/a1/f1
2136 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2138 echo "stopall to cleanup object cache"
2141 setupall > /dev/null
2143 echo "The file size should be incorrect since dangling referenced"
2144 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2145 [ "$cur_size" != "$saved_size" ] ||
2146 error "(1) Expect incorrect file2 size"
2148 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2149 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
2151 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2152 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2154 wait_update_facet mds1 "$LCTL get_param -n \
2155 mdd.$(facet_svc mds1).lfsck_layout |
2156 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2157 error "(3) MDS1 is not the expected 'scanning-phase2'"
2159 # to guarantee all updates are synced.
2163 echo "Write new data to f2 to modify the new created OST-object."
2164 echo "dummy" >> $DIR/$tdir/a1/f2
2166 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2168 for k in $(seq $MDSCOUNT); do
2169 # The LFSCK status query internal is 30 seconds. For the case
2170 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2171 # time to guarantee the status sync up.
2172 wait_update_facet mds${k} "$LCTL get_param -n \
2173 mdd.$(facet_svc mds${k}).lfsck_layout |
2174 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2175 error "(4) MDS${k} is not the expected 'completed'"
2178 for k in $(seq $OSTCOUNT); do
2179 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2180 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2181 awk '/^status/ { print $2 }')
2182 [ "$cur_status" == "completed" ] ||
2183 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2186 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2187 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2188 awk '/^repaired_orphan/ { print $2 }')
2189 [ $repaired -eq 1 ] ||
2190 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2192 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2193 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2194 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2196 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-*)
2197 [ ! -z "$cname" ] ||
2198 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2200 echo "The stub file should keep the original f2 data"
2201 cur_size=$(ls -il $cname | awk '{ print $6 }')
2202 [ "$cur_size" == "$saved_size" ] ||
2203 error "(9) Expect file2 size $saved_size, but got $cur_size"
2206 $LFS path2fid $cname
2207 $LFS getstripe $cname
2209 echo "The f2 should contains new data."
2210 cat $DIR/$tdir/a1/f2
2211 $LFS path2fid $DIR/$tdir/a1/f2
2212 $LFS getstripe $DIR/$tdir/a1/f2
2214 run_test 18e "Find out orphan OST-object and repair it (5)"
2217 [ $OSTCOUNT -lt 2 ] &&
2218 skip "The test needs at least 2 OSTs" && return
2221 echo "The target MDT-object is lost. The LFSCK should re-create the"
2222 echo "MDT-object under .lustre/lost+found/MDTxxxx. If some OST fail"
2223 echo "to verify some OST-object(s) during the first stage-scanning,"
2224 echo "the LFSCK should skip orphan OST-objects for such OST. Others"
2225 echo "should not be affected."
2228 check_mount_and_prep
2229 $LFS mkdir -i 0 $DIR/$tdir/a1
2230 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2231 dd if=/dev/zero of=$DIR/$tdir/a1/guard bs=1M count=2
2232 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2233 $LFS mkdir -i 0 $DIR/$tdir/a2
2234 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a2
2235 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2236 $LFS getstripe $DIR/$tdir/a1/f1
2237 $LFS getstripe $DIR/$tdir/a2/f2
2239 if [ $MDSCOUNT -ge 2 ]; then
2240 $LFS mkdir -i 1 $DIR/$tdir/a3
2241 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a3
2242 dd if=/dev/zero of=$DIR/$tdir/a3/guard bs=1M count=2
2243 dd if=/dev/zero of=$DIR/$tdir/a3/f3 bs=1M count=2
2244 $LFS mkdir -i 1 $DIR/$tdir/a4
2245 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a4
2246 dd if=/dev/zero of=$DIR/$tdir/a4/f4 bs=1M count=2
2247 $LFS getstripe $DIR/$tdir/a3/f3
2248 $LFS getstripe $DIR/$tdir/a4/f4
2251 cancel_lru_locks osc
2253 echo "Inject failure, to simulate the case of missing the MDT-object"
2254 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2255 do_facet mds1 $LCTL set_param fail_loc=0x1616
2256 rm -f $DIR/$tdir/a1/f1
2257 rm -f $DIR/$tdir/a2/f2
2259 if [ $MDSCOUNT -ge 2 ]; then
2260 do_facet mds2 $LCTL set_param fail_loc=0x1616
2261 rm -f $DIR/$tdir/a3/f3
2262 rm -f $DIR/$tdir/a4/f4
2268 do_facet mds1 $LCTL set_param fail_loc=0
2269 if [ $MDSCOUNT -ge 2 ]; then
2270 do_facet mds2 $LCTL set_param fail_loc=0
2273 cancel_lru_locks mdc
2274 cancel_lru_locks osc
2276 echo "Inject failure, to simulate the OST0 fail to handle"
2277 echo "MDT0 LFSCK request during the first-stage scanning."
2278 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
2279 do_facet mds1 $LCTL set_param fail_loc=0x161c fail_val=0
2281 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2282 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2284 for k in $(seq $MDSCOUNT); do
2285 # The LFSCK status query internal is 30 seconds. For the case
2286 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2287 # time to guarantee the status sync up.
2288 wait_update_facet mds${k} "$LCTL get_param -n \
2289 mdd.$(facet_svc mds${k}).lfsck_layout |
2290 awk '/^status/ { print \\\$2 }'" "partial" $LTIME ||
2291 error "(2) MDS${k} is not the expected 'partial'"
2294 wait_update_facet ost1 "$LCTL get_param -n \
2295 obdfilter.$(facet_svc ost1).lfsck_layout |
2296 awk '/^status/ { print \\\$2 }'" "partial" $LTIME || {
2297 error "(3) OST1 is not the expected 'partial'"
2300 wait_update_facet ost2 "$LCTL get_param -n \
2301 obdfilter.$(facet_svc ost2).lfsck_layout |
2302 awk '/^status/ { print \\\$2 }'" "completed" $LTIME || {
2303 error "(4) OST2 is not the expected 'completed'"
2306 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2308 local repaired=$(do_facet mds1 $LCTL get_param -n \
2309 mdd.$(facet_svc mds1).lfsck_layout |
2310 awk '/^repaired_orphan/ { print $2 }')
2311 [ $repaired -eq 1 ] ||
2312 error "(5) Expect 1 fixed on mds{1}, but got: $repaired"
2314 if [ $MDSCOUNT -ge 2 ]; then
2315 repaired=$(do_facet mds2 $LCTL get_param -n \
2316 mdd.$(facet_svc mds2).lfsck_layout |
2317 awk '/^repaired_orphan/ { print $2 }')
2318 [ $repaired -eq 1 ] ||
2319 error "(6) Expect 1 fixed on mds{2}, but got: $repaired"
2322 echo "Trigger layout LFSCK on all devices again to cleanup"
2323 $START_LAYOUT -r -o || error "(7) Fail to start LFSCK for layout!"
2325 for k in $(seq $MDSCOUNT); do
2326 # The LFSCK status query internal is 30 seconds. For the case
2327 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2328 # time to guarantee the status sync up.
2329 wait_update_facet mds${k} "$LCTL get_param -n \
2330 mdd.$(facet_svc mds${k}).lfsck_layout |
2331 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2332 error "(8) MDS${k} is not the expected 'completed'"
2335 for k in $(seq $OSTCOUNT); do
2336 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2337 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2338 awk '/^status/ { print $2 }')
2339 [ "$cur_status" == "completed" ] ||
2340 error "(9) OST${k} Expect 'completed', but got '$cur_status'"
2344 local repaired=$(do_facet mds1 $LCTL get_param -n \
2345 mdd.$(facet_svc mds1).lfsck_layout |
2346 awk '/^repaired_orphan/ { print $2 }')
2347 [ $repaired -eq 2 ] ||
2348 error "(10) Expect 2 fixed on mds{1}, but got: $repaired"
2350 if [ $MDSCOUNT -ge 2 ]; then
2351 repaired=$(do_facet mds2 $LCTL get_param -n \
2352 mdd.$(facet_svc mds2).lfsck_layout |
2353 awk '/^repaired_orphan/ { print $2 }')
2354 [ $repaired -eq 2 ] ||
2355 error "(11) Expect 2 fixed on mds{2}, but got: $repaired"
2358 run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
2360 $LCTL set_param debug=-cache > /dev/null
2363 check_mount_and_prep
2364 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2366 echo "foo" > $DIR/$tdir/a0
2367 echo "guard" > $DIR/$tdir/a1
2368 cancel_lru_locks osc
2370 echo "Inject failure, then client will offer wrong parent FID when read"
2371 do_facet ost1 $LCTL set_param -n \
2372 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2373 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2374 $LCTL set_param fail_loc=0x1619
2376 echo "Read RPC with wrong parent FID should be denied"
2377 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2378 $LCTL set_param fail_loc=0
2380 run_test 19a "OST-object inconsistency self detect"
2383 check_mount_and_prep
2384 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2386 echo "Inject failure stub to make the OST-object to back point to"
2387 echo "non-exist MDT-object"
2389 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2390 do_facet ost1 $LCTL set_param fail_loc=0x1611
2391 echo "foo" > $DIR/$tdir/f0
2392 cancel_lru_locks osc
2393 do_facet ost1 $LCTL set_param fail_loc=0
2395 echo "Nothing should be fixed since self detect and repair is disabled"
2396 local repaired=$(do_facet ost1 $LCTL get_param -n \
2397 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2398 awk '/^repaired/ { print $2 }')
2399 [ $repaired -eq 0 ] ||
2400 error "(1) Expected 0 repaired, but got $repaired"
2402 echo "Read RPC with right parent FID should be accepted,"
2403 echo "and cause parent FID on OST to be fixed"
2405 do_facet ost1 $LCTL set_param -n \
2406 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2407 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2409 repaired=$(do_facet ost1 $LCTL get_param -n \
2410 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2411 awk '/^repaired/ { print $2 }')
2412 [ $repaired -eq 1 ] ||
2413 error "(3) Expected 1 repaired, but got $repaired"
2415 run_test 19b "OST-object inconsistency self repair"
2418 [ $OSTCOUNT -lt 2 ] &&
2419 skip "The test needs at least 2 OSTs" && return
2422 echo "The target MDT-object and some of its OST-object are lost."
2423 echo "The LFSCK should find out the left OST-objects and re-create"
2424 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2425 echo "with the partial OST-objects (LOV EA hole)."
2427 echo "New client can access the file with LOV EA hole via normal"
2428 echo "system tools or commands without crash the system."
2430 echo "For old client, even though it cannot access the file with"
2431 echo "LOV EA hole, it should not cause the system crash."
2434 check_mount_and_prep
2435 $LFS mkdir -i 0 $DIR/$tdir/a1
2436 if [ $OSTCOUNT -gt 2 ]; then
2437 $LFS setstripe -c 3 -i 0 -S 1M $DIR/$tdir/a1
2440 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a1
2444 # 256 blocks on the stripe0.
2445 # 1 block on the stripe1 for 2 OSTs case.
2446 # 256 blocks on the stripe1 for other cases.
2447 # 1 block on the stripe2 if OSTs > 2
2448 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2449 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2450 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2452 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2453 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2454 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2457 $LFS getstripe $DIR/$tdir/a1/f0
2459 $LFS getstripe $DIR/$tdir/a1/f1
2461 $LFS getstripe $DIR/$tdir/a1/f2
2463 if [ $OSTCOUNT -gt 2 ]; then
2464 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2465 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2467 $LFS getstripe $DIR/$tdir/a1/f3
2470 cancel_lru_locks osc
2472 echo "Inject failure..."
2473 echo "To simulate f0 lost MDT-object"
2474 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2475 do_facet mds1 $LCTL set_param fail_loc=0x1616
2476 rm -f $DIR/$tdir/a1/f0
2478 echo "To simulate f1 lost MDT-object and OST-object0"
2479 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2480 do_facet mds1 $LCTL set_param fail_loc=0x161a
2481 rm -f $DIR/$tdir/a1/f1
2483 echo "To simulate f2 lost MDT-object and OST-object1"
2484 do_facet mds1 $LCTL set_param fail_val=1
2485 rm -f $DIR/$tdir/a1/f2
2487 if [ $OSTCOUNT -gt 2 ]; then
2488 echo "To simulate f3 lost MDT-object and OST-object2"
2489 do_facet mds1 $LCTL set_param fail_val=2
2490 rm -f $DIR/$tdir/a1/f3
2493 umount_client $MOUNT
2496 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2498 echo "Inject failure to slow down the LFSCK on OST0"
2499 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2500 do_facet ost1 $LCTL set_param fail_loc=0x161b
2502 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2503 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2506 do_facet ost1 $LCTL set_param fail_loc=0
2508 for k in $(seq $MDSCOUNT); do
2509 # The LFSCK status query internal is 30 seconds. For the case
2510 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2511 # time to guarantee the status sync up.
2512 wait_update_facet mds${k} "$LCTL get_param -n \
2513 mdd.$(facet_svc mds${k}).lfsck_layout |
2514 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2515 error "(2) MDS${k} is not the expected 'completed'"
2518 for k in $(seq $OSTCOUNT); do
2519 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2520 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2521 awk '/^status/ { print $2 }')
2522 [ "$cur_status" == "completed" ] ||
2523 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2526 local repaired=$(do_facet mds1 $LCTL get_param -n \
2527 mdd.$(facet_svc mds1).lfsck_layout |
2528 awk '/^repaired_orphan/ { print $2 }')
2529 if [ $OSTCOUNT -gt 2 ]; then
2530 [ $repaired -eq 9 ] ||
2531 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2533 [ $repaired -eq 4 ] ||
2534 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2537 mount_client $MOUNT || error "(5.0) Fail to start client!"
2539 LOV_PATTERN_F_HOLE=0x40000000
2542 # ${fid0}-R-0 is the old f0
2544 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2545 echo "Check $name, which is the old f0"
2547 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2549 local pattern=0x$($LFS getstripe -L $name)
2550 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2551 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2553 local stripes=$($LFS getstripe -c $name)
2554 if [ $OSTCOUNT -gt 2 ]; then
2555 [ $stripes -eq 3 ] ||
2556 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2558 [ $stripes -eq 2 ] ||
2559 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2562 local size=$(stat $name | awk '/Size:/ { print $2 }')
2563 [ $size -eq $((4096 * $bcount)) ] ||
2564 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2566 cat $name > /dev/null || error "(5.5) cannot read $name"
2568 echo "dummy" >> $name || error "(5.6) cannot write $name"
2570 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2572 touch $name || error "(5.8) cannot touch $name"
2574 rm -f $name || error "(5.9) cannot unlink $name"
2577 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2579 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2580 if [ $OSTCOUNT -gt 2 ]; then
2581 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2583 echo "Check $name, it contains the old f1's stripe1"
2586 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2588 pattern=0x$($LFS getstripe -L $name)
2589 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2590 error "(6.2) expect pattern flag hole, but got $pattern"
2592 stripes=$($LFS getstripe -c $name)
2593 if [ $OSTCOUNT -gt 2 ]; then
2594 [ $stripes -eq 3 ] ||
2595 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2597 [ $stripes -eq 2 ] ||
2598 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2601 size=$(stat $name | awk '/Size:/ { print $2 }')
2602 [ $size -eq $((4096 * $bcount)) ] ||
2603 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2605 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2607 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2608 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2611 [ $failures -eq 256 ] ||
2612 error "(6.6) expect 256 IO failures, but get $failures"
2614 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2615 [ $size -eq $((4096 * $bcount)) ] ||
2616 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2618 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2619 error "(6.8) write to the LOV EA hole should fail"
2621 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2622 error "(6.9) write to normal stripe should NOT fail"
2624 echo "foo" >> $name && error "(6.10) append write $name should fail"
2626 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2628 touch $name || error "(6.12) cannot touch $name"
2630 rm -f $name || error "(6.13) cannot unlink $name"
2633 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2635 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2636 if [ $OSTCOUNT -gt 2 ]; then
2637 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2639 echo "Check $name, it contains the old f2's stripe0"
2642 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2644 pattern=0x$($LFS getstripe -L $name)
2645 stripes=$($LFS getstripe -c $name)
2646 size=$(stat $name | awk '/Size:/ { print $2 }')
2647 if [ $OSTCOUNT -gt 2 ]; then
2648 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2649 error "(7.2.1) expect pattern flag hole, but got $pattern"
2651 [ $stripes -eq 3 ] ||
2652 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2654 [ $size -eq $((4096 * $bcount)) ] ||
2655 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2657 cat $name > /dev/null &&
2658 error "(7.5.1) normal read $name should fail"
2660 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2661 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2663 [ $failures -eq 256 ] ||
2664 error "(7.6) expect 256 IO failures, but get $failures"
2666 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2667 [ $size -eq $((4096 * $bcount)) ] ||
2668 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2670 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2671 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2673 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2674 error "(7.8.1) write to normal stripe should NOT fail"
2676 echo "foo" >> $name &&
2677 error "(7.8.3) append write $name should fail"
2679 chown $RUNAS_ID:$RUNAS_GID $name ||
2680 error "(7.9.1) cannot chown on $name"
2682 touch $name || error "(7.10.1) cannot touch $name"
2684 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2685 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2687 [ $stripes -eq 1 ] ||
2688 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2691 [ $size -eq $((4096 * (256 + 0))) ] ||
2692 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2694 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2696 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2698 chown $RUNAS_ID:$RUNAS_GID $name ||
2699 error "(7.9.2) cannot chown on $name"
2701 touch $name || error "(7.10.2) cannot touch $name"
2704 rm -f $name || error "(7.11) cannot unlink $name"
2706 [ $OSTCOUNT -le 2 ] && return
2709 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2711 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2712 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2714 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2716 pattern=0x$($LFS getstripe -L $name)
2717 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2718 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2720 stripes=$($LFS getstripe -c $name)
2721 # LFSCK does not know the old f3 had 3 stripes.
2722 # It only tries to find as much as possible.
2723 # The stripe count depends on the last stripe's offset.
2724 [ $stripes -eq 2 ] ||
2725 error "(8.3) expect the stripe count is 2, but got $stripes"
2727 size=$(stat $name | awk '/Size:/ { print $2 }')
2729 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2730 error "(8.4) expect the size $((4096 * 512)), but got $size"
2732 cat $name > /dev/null || error "(8.5) cannot read $name"
2734 echo "dummy" >> $name || error "(8.6) cannot write $name"
2736 chown $RUNAS_ID:$RUNAS_GID $name ||
2737 error "(8.7) cannot chown on $name"
2739 touch $name || error "(8.8) cannot touch $name"
2741 rm -f $name || error "(8.9) cannot unlink $name"
2743 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2746 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2747 skip "ignore the test if MDS is older than 2.5.59" && return
2749 check_mount_and_prep
2750 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2752 echo "Start all LFSCK components by default (-s 1)"
2753 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2754 error "Fail to start LFSCK"
2756 echo "namespace LFSCK should be in 'scanning-phase1' status"
2757 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2758 [ "$STATUS" == "scanning-phase1" ] ||
2759 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2761 echo "layout LFSCK should be in 'scanning-phase1' status"
2762 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2763 [ "$STATUS" == "scanning-phase1" ] ||
2764 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2766 echo "Stop all LFSCK components by default"
2767 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2768 error "Fail to stop LFSCK"
2770 run_test 21 "run all LFSCK components by default"
2773 [ $MDSCOUNT -lt 2 ] &&
2774 skip "We need at least 2 MDSes for this test" && return
2777 echo "The parent_A references the child directory via some name entry,"
2778 echo "but the child directory back references another parent_B via its"
2779 echo "".." name entry. The parent_B does not exist. Then the namesapce"
2780 echo "LFSCK will repair the child directory's ".." name entry."
2783 check_mount_and_prep
2785 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2786 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2788 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2789 echo "The dummy's dotdot name entry references the guard."
2790 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2791 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2792 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2793 error "(3) Fail to mkdir on MDT0"
2794 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2796 rmdir $DIR/$tdir/guard || error "(4) Fail to rmdir $DIR/$tdir/guard"
2798 echo "Trigger namespace LFSCK to repair unmatched pairs"
2799 $START_NAMESPACE -A -r ||
2800 error "(5) Fail to start LFSCK for namespace"
2802 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2803 mdd.${MDT_DEV}.lfsck_namespace |
2804 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2806 error "(6) unexpected status"
2809 local repaired=$($SHOW_NAMESPACE |
2810 awk '/^unmatched_pairs_repaired/ { print $2 }')
2811 [ $repaired -eq 1 ] ||
2812 error "(7) Fail to repair unmatched pairs: $repaired"
2814 echo "'ls' should success after namespace LFSCK repairing"
2815 ls -ail $DIR/$tdir/foo/dummy > /dev/null ||
2816 error "(8) ls should success."
2818 run_test 22a "LFSCK can repair unmatched pairs (1)"
2821 [ $MDSCOUNT -lt 2 ] &&
2822 skip "We need at least 2 MDSes for this test" && return
2825 echo "The parent_A references the child directory via the name entry_B,"
2826 echo "but the child directory back references another parent_C via its"
2827 echo "".." name entry. The parent_C exists, but there is no the name"
2828 echo "entry_B under the parent_C. Then the namesapce LFSCK will repair"
2829 echo "the child directory's ".." name entry and its linkEA."
2832 check_mount_and_prep
2834 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2835 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2837 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2838 echo "and bad linkEA. The dummy's dotdot name entry references the"
2839 echo "guard. The dummy's linkEA references n non-exist name entry."
2840 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2841 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2842 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2843 error "(3) Fail to mkdir on MDT0"
2844 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2846 local dummyfid=$($LFS path2fid $DIR/$tdir/foo/dummy)
2847 echo "fid2path should NOT work on the dummy's FID $dummyfid"
2848 local dummyname=$($LFS fid2path $DIR $dummyfid)
2849 [ "$dummyname" != "$DIR/$tdir/foo/dummy" ] ||
2850 error "(4) fid2path works unexpectedly."
2852 echo "Trigger namespace LFSCK to repair unmatched pairs"
2853 $START_NAMESPACE -A -r ||
2854 error "(5) Fail to start LFSCK for namespace"
2856 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2857 mdd.${MDT_DEV}.lfsck_namespace |
2858 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2860 error "(6) unexpected status"
2863 local repaired=$($SHOW_NAMESPACE |
2864 awk '/^unmatched_pairs_repaired/ { print $2 }')
2865 [ $repaired -eq 1 ] ||
2866 error "(7) Fail to repair unmatched pairs: $repaired"
2868 echo "fid2path should work on the dummy's FID $dummyfid after LFSCK"
2869 local dummyname=$($LFS fid2path $DIR $dummyfid)
2870 [ "$dummyname" == "$DIR/$tdir/foo/dummy" ] ||
2871 error "(8) fid2path does not work"
2873 run_test 22b "LFSCK can repair unmatched pairs (2)"
2876 [ $MDSCOUNT -lt 2 ] &&
2877 skip "We need at least 2 MDSes for this test" && return
2880 echo "The name entry is there, but the MDT-object for such name "
2881 echo "entry does not exist. The namespace LFSCK should find out "
2882 echo "and repair the inconsistency as required."
2885 check_mount_and_prep
2887 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2888 $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
2890 echo "Inject failure stub on MDT1 to simulate dangling name entry"
2891 #define OBD_FAIL_LFSCK_DANGLING2 0x1620
2892 do_facet mds2 $LCTL set_param fail_loc=0x1620
2893 rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
2894 do_facet mds2 $LCTL set_param fail_loc=0
2896 echo "'ls' should fail because of dangling name entry"
2897 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
2899 echo "Trigger namespace LFSCK to find out dangling name entry"
2900 $START_NAMESPACE -A -r ||
2901 error "(5) Fail to start LFSCK for namespace"
2903 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2904 mdd.${MDT_DEV}.lfsck_namespace |
2905 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2907 error "(6) unexpected status"
2910 local repaired=$($SHOW_NAMESPACE |
2911 awk '/^dangling_repaired/ { print $2 }')
2912 [ $repaired -eq 1 ] ||
2913 error "(7) Fail to repair dangling name entry: $repaired"
2915 echo "'ls' should fail because not re-create MDT-object by default"
2916 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
2918 echo "Trigger namespace LFSCK again to repair dangling name entry"
2919 $START_NAMESPACE -A -r -C ||
2920 error "(9) Fail to start LFSCK for namespace"
2922 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2923 mdd.${MDT_DEV}.lfsck_namespace |
2924 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2926 error "(10) unexpected status"
2929 repaired=$($SHOW_NAMESPACE |
2930 awk '/^dangling_repaired/ { print $2 }')
2931 [ $repaired -eq 1 ] ||
2932 error "(11) Fail to repair dangling name entry: $repaired"
2934 echo "'ls' should success after namespace LFSCK repairing"
2935 ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
2937 run_test 23a "LFSCK can repair dangling name entry (1)"
2941 echo "The objectA has multiple hard links, one of them corresponding"
2942 echo "to the name entry_B. But there is something wrong for the name"
2943 echo "entry_B and cause entry_B to references non-exist object_C."
2944 echo "In the first-stage scanning, the LFSCK will think the entry_B"
2945 echo "as dangling, and re-create the lost object_C. When the LFSCK"
2946 echo "comes to the second-stage scanning, it will find that the"
2947 echo "former re-creating object_C is not proper, and will try to"
2948 echo "replace the object_C with the real object_A."
2951 check_mount_and_prep
2953 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2954 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
2955 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
2957 echo "Inject failure stub on MDT0 to simulate dangling name entry"
2958 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
2959 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
2960 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
2961 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2963 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
2965 echo "'ls' should fail because of dangling name entry"
2966 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
2967 error "(6) ls should fail."
2969 echo "Trigger namespace LFSCK to find out dangling name entry"
2970 $START_NAMESPACE -r -C ||
2971 error "(7) Fail to start LFSCK for namespace"
2973 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2974 mdd.${MDT_DEV}.lfsck_namespace |
2975 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2977 error "(8) unexpected status"
2980 local repaired=$($SHOW_NAMESPACE |
2981 awk '/^dangling_repaired/ { print $2 }')
2982 [ $repaired -eq 1 ] ||
2983 error "(9) Fail to repair dangling name entry: $repaired"
2985 repaired=$($SHOW_NAMESPACE |
2986 awk '/^multiple_linked_repaired/ { print $2 }')
2987 [ $repaired -eq 1 ] ||
2988 error "(10) Fail to drop the former created object: $repaired"
2990 local data=$(cat $DIR/$tdir/d0/foo)
2991 [ "$data" == "dummy" ] ||
2992 error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
2994 run_test 23b "LFSCK can repair dangling name entry (2)"
2998 echo "The objectA has multiple hard links, one of them corresponding"
2999 echo "to the name entry_B. But there is something wrong for the name"
3000 echo "entry_B and cause entry_B to references non-exist object_C."
3001 echo "In the first-stage scanning, the LFSCK will think the entry_B"
3002 echo "as dangling, and re-create the lost object_C. And then others"
3003 echo "modified the re-created object_C. When the LFSCK comes to the"
3004 echo "second-stage scanning, it will find that the former re-creating"
3005 echo "object_C maybe wrong and try to replace the object_C with the"
3006 echo "real object_A. But because object_C has been modified, so the"
3007 echo "LFSCK cannot replace it."
3010 check_mount_and_prep
3012 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3013 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3014 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3016 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3017 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3018 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
3019 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3020 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3022 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3024 echo "'ls' should fail because of dangling name entry"
3025 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3026 error "(6) ls should fail."
3028 #define OBD_FAIL_LFSCK_DELAY3 0x1602
3029 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
3031 echo "Trigger namespace LFSCK to find out dangling name entry"
3032 $START_NAMESPACE -r -C ||
3033 error "(7) Fail to start LFSCK for namespace"
3035 wait_update_facet client "stat $DIR/$tdir/d0/foo |
3036 awk '/Size/ { print \\\$2 }'" "0" 32 || {
3037 stat $DIR/$tdir/guard
3039 error "(8) unexpected size"
3042 echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
3043 cancel_lru_locks osc
3045 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3046 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3047 mdd.${MDT_DEV}.lfsck_namespace |
3048 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3050 error "(10) unexpected status"
3053 local repaired=$($SHOW_NAMESPACE |
3054 awk '/^dangling_repaired/ { print $2 }')
3055 [ $repaired -eq 1 ] ||
3056 error "(11) Fail to repair dangling name entry: $repaired"
3058 local data=$(cat $DIR/$tdir/d0/foo)
3059 [ "$data" != "dummy" ] ||
3060 error "(12) The $DIR/$tdir/d0/foo should not be recovered"
3062 run_test 23c "LFSCK can repair dangling name entry (3)"
3065 [ $MDSCOUNT -lt 2 ] &&
3066 skip "We need at least 2 MDSes for this test" && return
3069 echo "Two MDT-objects back reference the same name entry via their"
3070 echo "each own linkEA entry, but the name entry only references one"
3071 echo "MDT-object. The namespace LFSCK will remove the linkEA entry"
3072 echo "for the MDT-object that is not recognized. If such MDT-object"
3073 echo "has no other linkEA entry after the removing, then the LFSCK"
3074 echo "will add it as orphan under the .lustre/lost+found/MDTxxxx/."
3077 check_mount_and_prep
3079 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3081 mkdir $DIR/$tdir/d0/guard || error "(1) Fail to mkdir guard"
3082 $LFS path2fid $DIR/$tdir/d0/guard
3084 mkdir $DIR/$tdir/d0/dummy || error "(2) Fail to mkdir dummy"
3085 $LFS path2fid $DIR/$tdir/d0/dummy
3088 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
3089 pfid=$($LFS path2fid $DIR/$tdir/d0/guard)
3091 pfid=$($LFS path2fid $DIR/$tdir/d0/dummy)
3094 touch $DIR/$tdir/d0/guard/foo ||
3095 error "(3) Fail to touch $DIR/$tdir/d0/guard/foo"
3097 echo "Inject failure stub on MDT0 to simulate the case that"
3098 echo "the $DIR/$tdir/d0/dummy/foo has the 'bad' linkEA entry"
3099 echo "that references $DIR/$tdir/d0/guard/foo."
3100 echo "Then remove the name entry $DIR/$tdir/d0/dummy/foo."
3101 echo "So the MDT-object $DIR/$tdir/d0/dummy/foo will be left"
3102 echo "there with the same linkEA entry as another MDT-object"
3103 echo "$DIR/$tdir/d0/guard/foo has"
3105 #define OBD_FAIL_LFSCK_MUL_REF 0x1622
3106 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1622
3107 $LFS mkdir -i 0 $DIR/$tdir/d0/dummy/foo ||
3108 error "(4) Fail to mkdir $DIR/$tdir/d0/dummy/foo"
3109 $LFS path2fid $DIR/$tdir/d0/dummy/foo
3110 local cfid=$($LFS path2fid $DIR/$tdir/d0/dummy/foo)
3111 rmdir $DIR/$tdir/d0/dummy/foo ||
3112 error "(5) Fail to remove $DIR/$tdir/d0/dummy/foo name entry"
3113 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3115 echo "stat $DIR/$tdir/d0/dummy/foo should fail"
3116 stat $DIR/$tdir/d0/dummy/foo > /dev/null 2>&1 &&
3117 error "(6) stat successfully unexpectedly"
3119 echo "Trigger namespace LFSCK to repair multiple-referenced name entry"
3120 $START_NAMESPACE -A -r ||
3121 error "(7) Fail to start LFSCK for namespace"
3123 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3124 mdd.${MDT_DEV}.lfsck_namespace |
3125 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3127 error "(8) unexpected status"
3130 local repaired=$($SHOW_NAMESPACE |
3131 awk '/^multiple_referenced_repaired/ { print $2 }')
3132 [ $repaired -eq 1 ] ||
3133 error "(9) Fail to repair multiple referenced name entry: $repaired"
3135 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3136 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3137 error "(10) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3139 local cname="$cfid-$pfid-D-0"
3140 ls -ail $MOUNT/.lustre/lost+found/MDT0000/$cname ||
3141 error "(11) .lustre/lost+found/MDT0000/ should not be empty"
3143 run_test 24 "LFSCK can repair multiple-referenced name entry"
3146 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3147 skip "Only support to inject failure on ldiskfs" && return
3150 echo "The file type in the name entry does not match the file type"
3151 echo "claimed by the referenced object. Then the LFSCK will update"
3152 echo "the file type in the name entry."
3155 check_mount_and_prep
3157 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3159 echo "Inject failure stub on MDT0 to simulate the case that"
3160 echo "the file type stored in the name entry is wrong."
3162 #define OBD_FAIL_LFSCK_BAD_TYPE 0x1623
3163 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1623
3164 touch $DIR/$tdir/d0/foo || error "(2) Fail to touch $DIR/$tdir/d0/foo"
3165 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3167 echo "Trigger namespace LFSCK to repair bad file type in the name entry"
3168 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace"
3170 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3171 mdd.${MDT_DEV}.lfsck_namespace |
3172 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3174 error "(4) unexpected status"
3177 local repaired=$($SHOW_NAMESPACE |
3178 awk '/^bad_file_type_repaired/ { print $2 }')
3179 [ $repaired -eq 1 ] ||
3180 error "(5) Fail to repair bad file type in name entry: $repaired"
3182 ls -ail $DIR/$tdir/d0 || error "(6) Fail to 'ls' the $DIR/$tdir/d0"
3184 run_test 25 "LFSCK can repair bad file type in the name entry"
3188 echo "The local name entry back referenced by the MDT-object is lost."
3189 echo "The namespace LFSCK will add the missing local name entry back"
3190 echo "to the normal namespace."
3193 check_mount_and_prep
3195 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3196 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3197 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3199 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3200 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3202 echo "Inject failure stub on MDT0 to simulate the case that"
3203 echo "foo's name entry will be removed, but the foo's object"
3204 echo "and its linkEA are kept in the system."
3206 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3207 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3208 rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3209 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3211 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail"
3213 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3214 $START_NAMESPACE -r -A ||
3215 error "(6) Fail to start LFSCK for namespace"
3217 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3218 mdd.${MDT_DEV}.lfsck_namespace |
3219 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3221 error "(7) unexpected status"
3224 local repaired=$($SHOW_NAMESPACE |
3225 awk '/^lost_dirent_repaired/ { print $2 }')
3226 [ $repaired -eq 1 ] ||
3227 error "(8) Fail to repair lost dirent: $repaired"
3229 ls -ail $DIR/$tdir/d0/foo ||
3230 error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
3232 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3233 [ "$foofid" == "$foofid2" ] ||
3234 error "(10) foo's FID changed: $foofid, $foofid2"
3236 run_test 26a "LFSCK can add the missing local name entry back to the namespace"
3239 [ $MDSCOUNT -lt 2 ] &&
3240 skip "We need at least 2 MDSes for this test" && return
3243 echo "The remote name entry back referenced by the MDT-object is lost."
3244 echo "The namespace LFSCK will add the missing remote name entry back"
3245 echo "to the normal namespace."
3248 check_mount_and_prep
3250 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3251 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3252 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3254 echo "Inject failure stub on MDT0 to simulate the case that"
3255 echo "foo's name entry will be removed, but the foo's object"
3256 echo "and its linkEA are kept in the system."
3258 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3259 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3260 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3261 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3263 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail"
3265 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3266 $START_NAMESPACE -r -A ||
3267 error "(5) Fail to start LFSCK for namespace"
3269 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3270 mdd.${MDT_DEV}.lfsck_namespace |
3271 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3273 error "(6) unexpected status"
3276 local repaired=$($SHOW_NAMESPACE |
3277 awk '/^lost_dirent_repaired/ { print $2 }')
3278 [ $repaired -eq 1 ] ||
3279 error "(7) Fail to repair lost dirent: $repaired"
3281 ls -ail $DIR/$tdir/d0/foo ||
3282 error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
3284 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3285 [ "$foofid" == "$foofid2" ] ||
3286 error "(9) foo's FID changed: $foofid, $foofid2"
3288 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
3292 echo "The local parent referenced by the MDT-object linkEA is lost."
3293 echo "The namespace LFSCK will re-create the lost parent as orphan."
3296 check_mount_and_prep
3298 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3299 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3300 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3301 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3303 echo "Inject failure stub on MDT0 to simulate the case that"
3304 echo "foo's name entry will be removed, but the foo's object"
3305 echo "and its linkEA are kept in the system. And then remove"
3306 echo "another hard link and the parent directory."
3308 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3309 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3310 rm -f $DIR/$tdir/d0/foo ||
3311 error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3312 rm -f $DIR/$tdir/d0/dummy ||
3313 error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
3314 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3316 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
3317 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail"
3319 echo "Trigger namespace LFSCK to repair the lost parent"
3320 $START_NAMESPACE -r -A ||
3321 error "(6) Fail to start LFSCK for namespace"
3323 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3324 mdd.${MDT_DEV}.lfsck_namespace |
3325 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3327 error "(7) unexpected status"
3330 local repaired=$($SHOW_NAMESPACE |
3331 awk '/^lost_dirent_repaired/ { print $2 }')
3332 [ $repaired -eq 1 ] ||
3333 error "(8) Fail to repair lost dirent: $repaired"
3335 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3336 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3337 error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3339 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3341 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
3342 [ ! -z "$cname" ] ||
3343 error "(10) .lustre/lost+found/MDT0000/ should not be empty"
3345 run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
3348 [ $MDSCOUNT -lt 2 ] &&
3349 skip "We need at least 2 MDSes for this test" && return
3352 echo "The remote parent referenced by the MDT-object linkEA is lost."
3353 echo "The namespace LFSCK will re-create the lost parent as orphan."
3356 check_mount_and_prep
3358 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3359 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3361 $LFS path2fid $DIR/$tdir/d0
3363 echo "Inject failure stub on MDT0 to simulate the case that"
3364 echo "foo's name entry will be removed, but the foo's object"
3365 echo "and its linkEA are kept in the system. And then remove"
3366 echo "the parent directory."
3368 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3369 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3370 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3371 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3373 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
3374 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail"
3376 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3377 $START_NAMESPACE -r -A ||
3378 error "(6) Fail to start LFSCK for namespace"
3380 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3381 mdd.${MDT_DEV}.lfsck_namespace |
3382 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3384 error "(7) unexpected status"
3387 local repaired=$($SHOW_NAMESPACE |
3388 awk '/^lost_dirent_repaired/ { print $2 }')
3389 [ $repaired -eq 1 ] ||
3390 error "(8) Fail to repair lost dirent: $repaired"
3392 ls -ail $MOUNT/.lustre/lost+found/
3394 echo "There should be an orphan under .lustre/lost+found/MDT0001/"
3395 [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
3396 error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
3398 ls -ail $MOUNT/.lustre/lost+found/MDT0001/
3400 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
3401 [ ! -z "$cname" ] ||
3402 error "(10) .lustre/lost+found/MDT0001/ should not be empty"
3404 run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
3407 [ $MDSCOUNT -lt 2 ] &&
3408 skip "The test needs at least 2 MDTs" && return
3411 echo "The target name entry is lost. The LFSCK should insert the"
3412 echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if"
3413 echo "the MDT (on which the orphan MDT-object resides) has ever"
3414 echo "failed to respond some name entry verification during the"
3415 echo "first stage-scanning, then the LFSCK should skip to handle"
3416 echo "orphan MDT-object on this MDT. But other MDTs should not"
3420 check_mount_and_prep
3421 $LFS mkdir -i 0 $DIR/$tdir/d1
3422 $LFS mkdir -i 1 $DIR/$tdir/d1/a1
3423 $LFS mkdir -i 1 $DIR/$tdir/d1/a2
3425 $LFS mkdir -i 1 $DIR/$tdir/d2
3426 $LFS mkdir -i 0 $DIR/$tdir/d2/a1
3427 $LFS mkdir -i 0 $DIR/$tdir/d2/a2
3429 echo "Inject failure stub on MDT0 to simulate the case that"
3430 echo "d1/a1's name entry will be removed, but the d1/a1's object"
3431 echo "and its linkEA are kept in the system. And the case that"
3432 echo "d2/a2's name entry will be removed, but the d2/a2's object"
3433 echo "and its linkEA are kept in the system."
3435 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3436 do_facet mds1 $LCTL set_param fail_loc=0x1624
3437 do_facet mds2 $LCTL set_param fail_loc=0x1624
3438 rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1"
3439 rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2"
3440 do_facet mds1 $LCTL set_param fail_loc=0
3441 do_facet mds2 $LCTL set_param fail_loc=0
3443 cancel_lru_locks mdc
3444 cancel_lru_locks osc
3446 echo "Inject failure, to simulate the MDT0 fail to handle"
3447 echo "MDT1 LFSCK request during the first-stage scanning."
3448 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
3449 do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0
3451 echo "Trigger namespace LFSCK on all devices to find out orphan object"
3452 $START_NAMESPACE -r -A ||
3453 error "(3) Fail to start LFSCK for namespace"
3455 wait_update_facet mds1 "$LCTL get_param -n \
3456 mdd.$(facet_svc mds1).lfsck_namespace |
3457 awk '/^status/ { print \\\$2 }'" "partial" 32 || {
3458 error "(4) mds1 is not the expected 'partial'"
3461 wait_update_facet mds2 "$LCTL get_param -n \
3462 mdd.$(facet_svc mds2).lfsck_namespace |
3463 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3464 error "(5) mds2 is not the expected 'completed'"
3467 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
3469 local repaired=$(do_facet mds1 $LCTL get_param -n \
3470 mdd.$(facet_svc mds1).lfsck_namespace |
3471 awk '/^lost_dirent_repaired/ { print $2 }')
3472 [ $repaired -eq 0 ] ||
3473 error "(6) Expect 0 fixed on mds1, but got: $repaired"
3475 repaired=$(do_facet mds2 $LCTL get_param -n \
3476 mdd.$(facet_svc mds2).lfsck_namespace |
3477 awk '/^lost_dirent_repaired/ { print $2 }')
3478 [ $repaired -eq 1 ] ||
3479 error "(7) Expect 1 fixed on mds2, but got: $repaired"
3481 echo "Trigger namespace LFSCK on all devices again to cleanup"
3482 $START_NAMESPACE -r -A ||
3483 error "(8) Fail to start LFSCK for namespace"
3485 for k in $(seq $MDSCOUNT); do
3486 # The LFSCK status query internal is 30 seconds. For the case
3487 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3488 # time to guarantee the status sync up.
3489 wait_update_facet mds${k} "$LCTL get_param -n \
3490 mdd.$(facet_svc mds${k}).lfsck_namespace |
3491 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3492 error "(9) MDS${k} is not the expected 'completed'"
3495 local repaired=$(do_facet mds1 $LCTL get_param -n \
3496 mdd.$(facet_svc mds1).lfsck_namespace |
3497 awk '/^lost_dirent_repaired/ { print $2 }')
3498 [ $repaired -eq 1 ] ||
3499 error "(10) Expect 1 fixed on mds1, but got: $repaired"
3501 repaired=$(do_facet mds2 $LCTL get_param -n \
3502 mdd.$(facet_svc mds2).lfsck_namespace |
3503 awk '/^lost_dirent_repaired/ { print $2 }')
3504 [ $repaired -eq 0 ] ||
3505 error "(11) Expect 0 fixed on mds2, but got: $repaired"
3507 run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects"
3511 echo "The object's nlink attribute is larger than the object's known"
3512 echo "name entries count. The LFSCK will repair the object's nlink"
3513 echo "attribute to match the known name entries count"
3516 check_mount_and_prep
3518 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3519 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3521 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3522 echo "nlink attribute is larger than its name entries count."
3524 #define OBD_FAIL_LFSCK_MORE_NLINK 0x1625
3525 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1625
3526 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3527 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3528 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3530 cancel_lru_locks mdc
3531 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3532 [ $count -eq 3 ] || error "(4) Cannot inject error: $count"
3534 echo "Trigger namespace LFSCK to repair the nlink count"
3535 $START_NAMESPACE -r -A ||
3536 error "(5) Fail to start LFSCK for namespace"
3538 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3539 mdd.${MDT_DEV}.lfsck_namespace |
3540 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3542 error "(6) unexpected status"
3545 local repaired=$($SHOW_NAMESPACE |
3546 awk '/^nlinks_repaired/ { print $2 }')
3547 [ $repaired -eq 1 ] ||
3548 error "(7) Fail to repair nlink count: $repaired"
3550 cancel_lru_locks mdc
3551 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3552 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3554 run_test 29a "LFSCK can repair bad nlink count (1)"
3558 echo "The object's nlink attribute is smaller than the object's known"
3559 echo "name entries count. The LFSCK will repair the object's nlink"
3560 echo "attribute to match the known name entries count"
3563 check_mount_and_prep
3565 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3566 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3568 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3569 echo "nlink attribute is smaller than its name entries count."
3571 #define OBD_FAIL_LFSCK_LESS_NLINK 0x1626
3572 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1626
3573 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3574 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3575 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3577 cancel_lru_locks mdc
3578 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3579 [ $count -eq 1 ] || error "(4) Cannot inject error: $count"
3581 echo "Trigger namespace LFSCK to repair the nlink count"
3582 $START_NAMESPACE -r -A ||
3583 error "(5) Fail to start LFSCK for namespace"
3585 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3586 mdd.${MDT_DEV}.lfsck_namespace |
3587 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3589 error "(6) unexpected status"
3592 local repaired=$($SHOW_NAMESPACE |
3593 awk '/^nlinks_repaired/ { print $2 }')
3594 [ $repaired -eq 1 ] ||
3595 error "(7) Fail to repair nlink count: $repaired"
3597 cancel_lru_locks mdc
3598 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3599 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3601 run_test 29b "LFSCK can repair bad nlink count (2)"
3605 echo "There are too many hard links to the object, and exceeds the"
3606 echo "object's linkEA limitation, as to NOT all the known name entries"
3607 echo "will be recorded in the linkEA. Under such case, LFSCK should"
3608 echo "skip the nlink verification for this object."
3611 check_mount_and_prep
3613 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3614 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3615 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3616 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3618 echo "Inject failure stub on MDT0 to simulate the case that"
3619 echo "foo's hard links exceed the object's linkEA limitation."
3621 #define OBD_FAIL_LFSCK_LINKEA_OVERFLOW 0x1627
3622 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1627
3623 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h2 ||
3624 error "(4) Fail to hard link to $DIR/$tdir/d0/foo"
3626 cancel_lru_locks mdc
3628 local count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3629 [ $count1 -eq 3 ] || error "(5) Stat failure: $count1"
3631 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3632 $LFS fid2path $DIR $foofid
3633 local count2=$($LFS fid2path $DIR $foofid | wc -l)
3634 [ $count2 -eq 2 ] || "(6) Fail to inject error: $count2"
3636 echo "Trigger namespace LFSCK to repair the nlink count"
3637 $START_NAMESPACE -r -A ||
3638 error "(7) Fail to start LFSCK for namespace"
3640 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3641 mdd.${MDT_DEV}.lfsck_namespace |
3642 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3644 error "(8) unexpected status"
3647 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3648 local repaired=$($SHOW_NAMESPACE |
3649 awk '/^nlinks_repaired/ { print $2 }')
3650 [ $repaired -eq 0 ] ||
3651 error "(9) Repair nlink count unexpcetedly: $repaired"
3653 cancel_lru_locks mdc
3655 count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3656 [ $count1 -eq 3 ] || error "(10) Stat failure: $count1"
3658 count2=$($LFS fid2path $DIR $foofid | wc -l)
3659 [ $count2 -eq 2 ] ||
3660 error "(11) Repaired something unexpectedly: $count2"
3662 run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
3665 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3666 skip "Only support backend /lost+found for ldiskfs" && return
3669 echo "The namespace LFSCK will move the orphans from backend"
3670 echo "/lost+found directory to normal client visible namespace"
3671 echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
3674 check_mount_and_prep
3676 $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
3677 touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
3679 echo "Inject failure stub on MDT0 to simulate the case that"
3680 echo "directory d0 has no linkEA entry, then the LFSCK will"
3681 echo "move it into .lustre/lost+found/MDTxxxx/ later."
3683 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
3684 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
3685 mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
3686 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3688 touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
3689 mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
3691 echo "Inject failure stub on MDT0 to simulate the case that the"
3692 echo "object's name entry will be removed, but not destroy the"
3693 echo "object. Then backend e2fsck will handle it as orphan and"
3694 echo "add them into the backend /lost+found directory."
3696 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3697 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3698 rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
3699 rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
3700 rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
3701 rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
3702 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3704 umount_client $MOUNT || error "(10) Fail to stop client!"
3706 stop $SINGLEMDS || error "(11) Fail to stop MDT0"
3709 run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
3710 error "(12) Fail to run e2fsck"
3712 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
3713 error "(13) Fail to start MDT0"
3715 echo "Trigger namespace LFSCK to recover backend orphans"
3716 $START_NAMESPACE -r -A ||
3717 error "(14) Fail to start LFSCK for namespace"
3719 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3720 mdd.${MDT_DEV}.lfsck_namespace |
3721 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3723 error "(15) unexpected status"
3726 local repaired=$($SHOW_NAMESPACE |
3727 awk '/^local_lost_found_moved/ { print $2 }')
3728 [ $repaired -ge 4 ] ||
3729 error "(16) Fail to recover backend orphans: $repaired"
3731 mount_client $MOUNT || error "(17) Fail to start client!"
3733 stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered"
3735 ls -ail $MOUNT/.lustre/lost+found/
3737 echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
3738 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3739 error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3741 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3743 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-*-D-*)
3744 [ ! -z "$cname" ] || error "(20) d0 is not recovered"
3746 stat ${cname}/d1 || error "(21) d0 is not recovered"
3747 stat ${cname}/f1 || error "(22) f1 is not recovered"
3749 run_test 30 "LFSCK can recover the orphans from backend /lost+found"
3752 [ $MDSCOUNT -lt 2 ] &&
3753 skip "The test needs at least 2 MDTs" && return
3756 echo "For the name entry under a striped directory, if the name"
3757 echo "hash does not match the shard, then the LFSCK will repair"
3758 echo "the bad name entry"
3761 check_mount_and_prep
3763 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3764 error "(1) Fail to create striped directory"
3766 echo "Inject failure stub on client to simulate the case that"
3767 echo "some name entry should be inserted into other non-first"
3768 echo "shard, but inserted into the first shard by wrong"
3770 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3771 $LCTL set_param fail_loc=0x1628 fail_val=0
3772 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3773 error "(2) Fail to create file under striped directory"
3774 $LCTL set_param fail_loc=0 fail_val=0
3776 echo "Trigger namespace LFSCK to repair bad name hash"
3777 $START_NAMESPACE -r -A ||
3778 error "(3) Fail to start LFSCK for namespace"
3780 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3781 mdd.${MDT_DEV}.lfsck_namespace |
3782 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3784 error "(4) unexpected status"
3787 local repaired=$($SHOW_NAMESPACE |
3788 awk '/^name_hash_repaired/ { print $2 }')
3789 [ $repaired -ge 1 ] ||
3790 error "(5) Fail to repair bad name hash: $repaired"
3792 umount_client $MOUNT || error "(6) umount failed"
3793 mount_client $MOUNT || error "(7) mount failed"
3795 for ((i = 0; i < $MDSCOUNT; i++)); do
3796 stat $DIR/$tdir/striped_dir/d$i ||
3797 error "(8) Fail to stat d$i after LFSCK"
3798 rmdir $DIR/$tdir/striped_dir/d$i ||
3799 error "(9) Fail to unlink d$i after LFSCK"
3802 rmdir $DIR/$tdir/striped_dir ||
3803 error "(10) Fail to remove the striped directory after LFSCK"
3805 run_test 31a "The LFSCK can find/repair the name entry with bad name hash (1)"
3808 [ $MDSCOUNT -lt 2 ] &&
3809 skip "The test needs at least 2 MDTs" && return
3812 echo "For the name entry under a striped directory, if the name"
3813 echo "hash does not match the shard, then the LFSCK will repair"
3814 echo "the bad name entry"
3817 check_mount_and_prep
3819 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3820 error "(1) Fail to create striped directory"
3822 echo "Inject failure stub on client to simulate the case that"
3823 echo "some name entry should be inserted into other non-second"
3824 echo "shard, but inserted into the secod shard by wrong"
3826 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3827 $LCTL set_param fail_loc=0x1628 fail_val=1
3828 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3829 error "(2) Fail to create file under striped directory"
3830 $LCTL set_param fail_loc=0 fail_val=0
3832 echo "Trigger namespace LFSCK to repair bad name hash"
3833 $START_NAMESPACE -r -A ||
3834 error "(3) Fail to start LFSCK for namespace"
3836 wait_update_facet mds2 "$LCTL get_param -n \
3837 mdd.$(facet_svc mds2).lfsck_namespace |
3838 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3839 error "(4) unexpected status"
3841 local repaired=$(do_facet mds2 $LCTL get_param -n \
3842 mdd.$(facet_svc mds2).lfsck_namespace |
3843 awk '/^name_hash_repaired/ { print $2 }')
3844 [ $repaired -ge 1 ] ||
3845 error "(5) Fail to repair bad name hash: $repaired"
3847 umount_client $MOUNT || error "(6) umount failed"
3848 mount_client $MOUNT || error "(7) mount failed"
3850 for ((i = 0; i < $MDSCOUNT; i++)); do
3851 stat $DIR/$tdir/striped_dir/d$i ||
3852 error "(8) Fail to stat d$i after LFSCK"
3853 rmdir $DIR/$tdir/striped_dir/d$i ||
3854 error "(9) Fail to unlink d$i after LFSCK"
3857 rmdir $DIR/$tdir/striped_dir ||
3858 error "(10) Fail to remove the striped directory after LFSCK"
3860 run_test 31b "The LFSCK can find/repair the name entry with bad name hash (2)"
3863 [ $MDSCOUNT -lt 2 ] &&
3864 skip "The test needs at least 2 MDTs" && return
3867 echo "For some reason, the master MDT-object of the striped directory"
3868 echo "may lost its master LMV EA. If nobody created files under the"
3869 echo "master directly after the master LMV EA lost, then the LFSCK"
3870 echo "should re-generate the master LMV EA."
3873 check_mount_and_prep
3875 echo "Inject failure stub on MDT0 to simulate the case that the"
3876 echo "master MDT-object of the striped directory lost the LMV EA."
3878 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3879 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3880 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3881 error "(1) Fail to create striped directory"
3882 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3884 echo "Trigger namespace LFSCK to re-generate master LMV EA"
3885 $START_NAMESPACE -r -A ||
3886 error "(2) Fail to start LFSCK for namespace"
3888 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3889 mdd.${MDT_DEV}.lfsck_namespace |
3890 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3892 error "(3) unexpected status"
3895 local repaired=$($SHOW_NAMESPACE |
3896 awk '/^striped_dirs_repaired/ { print $2 }')
3897 [ $repaired -eq 1 ] ||
3898 error "(4) Fail to re-generate master LMV EA: $repaired"
3900 umount_client $MOUNT || error "(5) umount failed"
3901 mount_client $MOUNT || error "(6) mount failed"
3903 local empty=$(ls $DIR/$tdir/striped_dir/)
3904 [ -z "$empty" ] || error "(7) The master LMV EA is not repaired: $empty"
3906 rmdir $DIR/$tdir/striped_dir ||
3907 error "(8) Fail to remove the striped directory after LFSCK"
3909 run_test 31c "Re-generate the lost master LMV EA for striped directory"
3912 [ $MDSCOUNT -lt 2 ] &&
3913 skip "The test needs at least 2 MDTs" && return
3916 echo "For some reason, the master MDT-object of the striped directory"
3917 echo "may lost its master LMV EA. If somebody created files under the"
3918 echo "master directly after the master LMV EA lost, then the LFSCK"
3919 echo "should NOT re-generate the master LMV EA, instead, it should"
3920 echo "change the broken striped dirctory as read-only to prevent"
3921 echo "further damage"
3924 check_mount_and_prep
3926 echo "Inject failure stub on MDT0 to simulate the case that the"
3927 echo "master MDT-object of the striped directory lost the LMV EA."
3929 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3930 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3931 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3932 error "(1) Fail to create striped directory"
3933 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0
3935 umount_client $MOUNT || error "(2) umount failed"
3936 mount_client $MOUNT || error "(3) mount failed"
3938 touch $DIR/$tdir/striped_dir/dummy ||
3939 error "(4) Fail to touch under broken striped directory"
3941 echo "Trigger namespace LFSCK to find out the inconsistency"
3942 $START_NAMESPACE -r -A ||
3943 error "(5) Fail to start LFSCK for namespace"
3945 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3946 mdd.${MDT_DEV}.lfsck_namespace |
3947 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3949 error "(6) unexpected status"
3952 local repaired=$($SHOW_NAMESPACE |
3953 awk '/^striped_dirs_repaired/ { print $2 }')
3954 [ $repaired -eq 0 ] ||
3955 error "(7) Re-generate master LMV EA unexpected: $repaired"
3957 stat $DIR/$tdir/striped_dir/dummy ||
3958 error "(8) Fail to stat $DIR/$tdir/striped_dir/dummy"
3960 touch $DIR/$tdir/striped_dir/foo &&
3961 error "(9) The broken striped directory should be read-only"
3963 chattr -i $DIR/$tdir/striped_dir ||
3964 error "(10) Fail to chattr on the broken striped directory"
3966 rmdir $DIR/$tdir/striped_dir ||
3967 error "(11) Fail to remove the striped directory after LFSCK"
3969 run_test 31d "Set broken striped directory (modified after broken) as read-only"
3972 [ $MDSCOUNT -lt 2 ] &&
3973 skip "The test needs at least 2 MDTs" && return
3976 echo "For some reason, the slave MDT-object of the striped directory"
3977 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
3978 echo "slave LMV EA."
3981 check_mount_and_prep
3983 echo "Inject failure stub on MDT0 to simulate the case that the"
3984 echo "slave MDT-object (that resides on the same MDT as the master"
3985 echo "MDT-object resides on) lost the LMV EA."
3987 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
3988 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=0
3989 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3990 error "(1) Fail to create striped directory"
3991 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
3993 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
3994 $START_NAMESPACE -r -A ||
3995 error "(2) Fail to start LFSCK for namespace"
3997 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3998 mdd.${MDT_DEV}.lfsck_namespace |
3999 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4001 error "(3) unexpected status"
4004 local repaired=$($SHOW_NAMESPACE |
4005 awk '/^striped_shards_repaired/ { print $2 }')
4006 [ $repaired -eq 1 ] ||
4007 error "(4) Fail to re-generate slave LMV EA: $repaired"
4009 rmdir $DIR/$tdir/striped_dir ||
4010 error "(5) Fail to remove the striped directory after LFSCK"
4012 run_test 31e "Re-generate the lost slave LMV EA for striped directory (1)"
4015 [ $MDSCOUNT -lt 2 ] &&
4016 skip "The test needs at least 2 MDTs" && return
4019 echo "For some reason, the slave MDT-object of the striped directory"
4020 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
4021 echo "slave LMV EA."
4024 check_mount_and_prep
4026 echo "Inject failure stub on MDT0 to simulate the case that the"
4027 echo "slave MDT-object (that resides on differnt MDT as the master"
4028 echo "MDT-object resides on) lost the LMV EA."
4030 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
4031 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=1
4032 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4033 error "(1) Fail to create striped directory"
4034 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4036 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
4037 $START_NAMESPACE -r -A ||
4038 error "(2) Fail to start LFSCK for namespace"
4040 wait_update_facet mds2 "$LCTL get_param -n \
4041 mdd.$(facet_svc mds2).lfsck_namespace |
4042 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
4043 error "(3) unexpected status"
4045 local repaired=$(do_facet mds2 $LCTL get_param -n \
4046 mdd.$(facet_svc mds2).lfsck_namespace |
4047 awk '/^striped_shards_repaired/ { print $2 }')
4048 [ $repaired -eq 1 ] ||
4049 error "(4) Fail to re-generate slave LMV EA: $repaired"
4051 rmdir $DIR/$tdir/striped_dir ||
4052 error "(5) Fail to remove the striped directory after LFSCK"
4054 run_test 31f "Re-generate the lost slave LMV EA for striped directory (2)"
4057 [ $MDSCOUNT -lt 2 ] &&
4058 skip "The test needs at least 2 MDTs" && return
4061 echo "For some reason, the stripe index in the slave LMV EA is"
4062 echo "corrupted. The LFSCK should repair the slave LMV EA."
4065 check_mount_and_prep
4067 echo "Inject failure stub on MDT0 to simulate the case that the"
4068 echo "slave LMV EA on the first shard of the striped directory"
4069 echo "claims the same index as the second shard claims"
4071 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
4072 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162b fail_val=0
4073 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4074 error "(1) Fail to create striped directory"
4075 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4077 echo "Trigger namespace LFSCK to repair the slave LMV EA"
4078 $START_NAMESPACE -r -A ||
4079 error "(2) Fail to start LFSCK for namespace"
4081 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4082 mdd.${MDT_DEV}.lfsck_namespace |
4083 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4085 error "(3) unexpected status"
4088 local repaired=$($SHOW_NAMESPACE |
4089 awk '/^striped_shards_repaired/ { print $2 }')
4090 [ $repaired -eq 1 ] ||
4091 error "(4) Fail to repair slave LMV EA: $repaired"
4093 umount_client $MOUNT || error "(5) umount failed"
4094 mount_client $MOUNT || error "(6) mount failed"
4096 touch $DIR/$tdir/striped_dir/foo ||
4097 error "(7) Fail to touch file after the LFSCK"
4099 rm -f $DIR/$tdir/striped_dir/foo ||
4100 error "(8) Fail to unlink file after the LFSCK"
4102 rmdir $DIR/$tdir/striped_dir ||
4103 error "(9) Fail to remove the striped directory after LFSCK"
4105 run_test 31g "Repair the corrupted slave LMV EA"
4108 [ $MDSCOUNT -lt 2 ] &&
4109 skip "The test needs at least 2 MDTs" && return
4112 echo "For some reason, the shard's name entry in the striped"
4113 echo "directory may be corrupted. The LFSCK should repair the"
4114 echo "bad shard's name entry."
4117 check_mount_and_prep
4119 echo "Inject failure stub on MDT0 to simulate the case that the"
4120 echo "first shard's name entry in the striped directory claims"
4121 echo "the same index as the second shard's name entry claims."
4123 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
4124 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162c fail_val=0
4125 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4126 error "(1) Fail to create striped directory"
4127 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4129 echo "Trigger namespace LFSCK to repair the shard's name entry"
4130 $START_NAMESPACE -r -A ||
4131 error "(2) Fail to start LFSCK for namespace"
4133 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4134 mdd.${MDT_DEV}.lfsck_namespace |
4135 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4137 error "(3) unexpected status"
4140 local repaired=$($SHOW_NAMESPACE |
4141 awk '/^dirent_repaired/ { print $2 }')
4142 [ $repaired -eq 1 ] ||
4143 error "(4) Fail to repair shard's name entry: $repaired"
4145 umount_client $MOUNT || error "(5) umount failed"
4146 mount_client $MOUNT || error "(6) mount failed"
4148 touch $DIR/$tdir/striped_dir/foo ||
4149 error "(7) Fail to touch file after the LFSCK"
4151 rm -f $DIR/$tdir/striped_dir/foo ||
4152 error "(8) Fail to unlink file after the LFSCK"
4154 rmdir $DIR/$tdir/striped_dir ||
4155 error "(9) Fail to remove the striped directory after LFSCK"
4157 run_test 31h "Repair the corrupted shard's name entry"
4159 # restore MDS/OST size
4160 MDSSIZE=${SAVED_MDSSIZE}
4161 OSTSIZE=${SAVED_OSTSIZE}
4162 OSTCOUNT=${SAVED_OSTCOUNT}
4164 # cleanup the system at last