3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
24 SAVED_MDSSIZE=${MDSSIZE}
25 SAVED_OSTSIZE=${OSTSIZE}
26 SAVED_OSTCOUNT=${OSTCOUNT}
27 # use small MDS + OST size to speed formatting time
28 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
31 # no need too many OSTs, to reduce the format/start/stop overhead
32 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
34 # build up a clean test environment.
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
48 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29 30 31"
51 # DNE does not support striped directory on zfs-based backend yet.
52 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
53 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 31"
57 MDT_DEV="${FSNAME}-MDT0000"
58 OST_DEV="${FSNAME}-OST0000"
59 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
60 START_NAMESPACE="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
62 START_LAYOUT="do_facet $SINGLEMDS \
63 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
64 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
65 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
66 SHOW_NAMESPACE="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
68 SHOW_LAYOUT="do_facet $SINGLEMDS \
69 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
70 SHOW_LAYOUT_ON_OST="do_facet ost1 \
71 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
72 MOUNT_OPTS_SCRUB="-o user_xattr"
73 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
82 echo "preparing... $nfiles * $ndirs files will be created $(date)."
83 if [ ! -z $igif ]; then
84 #define OBD_FAIL_FID_IGIF 0x1504
85 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
88 cp $LUSTRE/tests/*.sh $DIR/$tdir/
89 if [ $ndirs -gt 0 ]; then
90 createmany -d $DIR/$tdir/d $ndirs
91 createmany -m $DIR/$tdir/f $ndirs
92 if [ $nfiles -gt 0 ]; then
93 for ((i = 0; i < $ndirs; i++)); do
94 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
95 /dev/null || error "createmany $nfiles"
98 createmany -d $DIR/$tdir/e $ndirs
101 if [ ! -z $igif ]; then
102 touch $DIR/$tdir/dummy
103 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
106 echo "prepared $(date)."
112 #define OBD_FAIL_LFSCK_DELAY1 0x1600
113 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
114 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
116 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
118 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
119 [ "$STATUS" == "scanning-phase1" ] ||
120 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
122 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
124 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
125 [ "$STATUS" == "stopped" ] ||
126 error "(6) Expect 'stopped', but got '$STATUS'"
128 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
130 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
131 [ "$STATUS" == "scanning-phase1" ] ||
132 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
134 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
135 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
136 mdd.${MDT_DEV}.lfsck_namespace |
137 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
139 error "(9) unexpected status"
142 local repaired=$($SHOW_NAMESPACE |
143 awk '/^updated_phase1/ { print $2 }')
144 [ $repaired -eq 0 ] ||
145 error "(10) Expect nothing to be repaired, but got: $repaired"
147 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
148 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
149 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
150 mdd.${MDT_DEV}.lfsck_namespace |
151 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
153 error "(12) unexpected status"
156 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
157 [ $((scanned1 + 1)) -eq $scanned2 ] ||
158 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
160 echo "stopall, should NOT crash LU-3649"
161 stopall || error "(14) Fail to stopall"
163 run_test 0 "Control LFSCK manually"
166 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
167 skip "OI Scrub not implemented for ZFS" && return
171 #define OBD_FAIL_FID_INDIR 0x1501
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
173 touch $DIR/$tdir/dummy
175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
177 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
178 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
179 mdd.${MDT_DEV}.lfsck_namespace |
180 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
182 error "(4) unexpected status"
185 local repaired=$($SHOW_NAMESPACE |
186 awk '/^dirent_repaired/ { print $2 }')
187 # for interop with old server
188 [ -z "$repaired" ] &&
189 repaired=$($SHOW_NAMESPACE |
190 awk '/^updated_phase1/ { print $2 }')
192 [ $repaired -eq 1 ] ||
193 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
195 mount_client $MOUNT || error "(6) Fail to start client!"
197 #define OBD_FAIL_FID_LOOKUP 0x1505
198 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
199 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
201 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
203 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
207 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
208 skip "OI Scrub not implemented for ZFS" && return
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
221 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
222 mdd.${MDT_DEV}.lfsck_namespace |
223 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
225 error "(4) unexpected status"
228 local repaired=$($SHOW_NAMESPACE |
229 awk '/^dirent_repaired/ { print $2 }')
230 # for interop with old server
231 [ -z "$repaired" ] &&
232 repaired=$($SHOW_NAMESPACE |
233 awk '/^updated_phase1/ { print $2 }')
235 [ $repaired -eq 1 ] ||
236 error "(5) Fail to repair the missing FID-in-LMA: $repaired"
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
239 mount_client $MOUNT || error "(6) Fail to start client!"
241 #define OBD_FAIL_FID_LOOKUP 0x1505
242 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
243 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
245 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
247 run_test 1b "LFSCK can find out and repair the missing FID-in-LMA"
252 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
254 touch $DIR/$tdir/dummy
256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
258 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
259 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
260 mdd.${MDT_DEV}.lfsck_namespace |
261 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
263 error "(4) unexpected status"
266 local repaired=$($SHOW_NAMESPACE |
267 awk '/^linkea_repaired/ { print $2 }')
268 # for interop with old server
269 [ -z "$repaired" ] &&
270 repaired=$($SHOW_NAMESPACE |
271 awk '/^updated_phase2/ { print $2 }')
273 [ $repaired -eq 1 ] ||
274 error "(5) Fail to repair crashed linkEA: $repaired"
276 mount_client $MOUNT || error "(6) Fail to start client!"
278 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
279 error "(7) Fail to stat $DIR/$tdir/dummy"
281 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
282 local dummyname=$($LFS fid2path $DIR $dummyfid)
283 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
284 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
286 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
292 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
293 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
294 touch $DIR/$tdir/dummy
296 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
298 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
299 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
300 mdd.${MDT_DEV}.lfsck_namespace |
301 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
303 error "(4) unexpected status"
306 local repaired=$($SHOW_NAMESPACE |
307 awk '/^updated_phase2/ { print $2 }')
308 [ $repaired -eq 1 ] ||
309 error "(5) Fail to repair crashed linkEA: $repaired"
311 mount_client $MOUNT || error "(6) Fail to start client!"
313 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
314 error "(7) Fail to stat $DIR/$tdir/dummy"
316 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
317 local dummyname=$($LFS fid2path $DIR $dummyfid)
318 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
319 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
321 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
327 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
328 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
329 touch $DIR/$tdir/dummy
331 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
333 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
334 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
335 mdd.${MDT_DEV}.lfsck_namespace |
336 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
338 error "(4) unexpected status"
341 local repaired=$($SHOW_NAMESPACE |
342 awk '/^updated_phase2/ { print $2 }')
343 [ $repaired -eq 1 ] ||
344 error "(5) Fail to repair crashed linkEA: $repaired"
346 mount_client $MOUNT || error "(6) Fail to start client!"
348 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
349 error "(7) Fail to stat $DIR/$tdir/dummy"
351 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
352 local dummyname=$($LFS fid2path $DIR $dummyfid)
353 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
354 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
356 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
363 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
364 touch $DIR/$tdir/dummy
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
368 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
369 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
370 mdd.${MDT_DEV}.lfsck_namespace |
371 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
373 error "(4) unexpected status"
376 local repaired=$($SHOW_NAMESPACE |
377 awk '/^linkea_repaired/ { print $2 }')
378 [ $repaired -eq 1 ] ||
379 error "(5) Fail to repair crashed linkEA: $repaired"
381 mount_client $MOUNT || error "(6) Fail to start client!"
383 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
384 error "(7) Fail to stat $DIR/$tdir/dummy"
386 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
387 local dummyname=$($LFS fid2path $DIR $dummyfid)
388 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
389 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
391 run_test 2d "LFSCK can recover the missing linkEA entry"
395 [ $MDSCOUNT -lt 2 ] &&
396 skip "We need at least 2 MDSes for this test" && return
400 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT1"
402 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
404 $LFS mkdir -i 0 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT0"
405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
407 $START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
408 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
409 mdd.${MDT_DEV}.lfsck_namespace |
410 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
412 error "(4) unexpected status"
415 local repaired=$($SHOW_NAMESPACE |
416 awk '/^linkea_repaired/ { print $2 }')
417 [ $repaired -eq 1 ] ||
418 error "(5) Fail to repair crashed linkEA: $repaired"
420 local fid=$($LFS path2fid $DIR/$tdir/d0/d1)
421 local name=$($LFS fid2path $DIR $fid)
422 [ "$name" == "$DIR/$tdir/d0/d1" ] ||
423 error "(6) Fail to repair linkEA: $fid $name"
425 run_test 2e "namespace LFSCK can verify remote object linkEA"
431 mkdir $DIR/$tdir/dummy || error "(1) Fail to mkdir"
432 ln $DIR/$tdir/d0/f0 $DIR/$tdir/dummy/f0 || error "(2) Fail to hardlink"
433 ln $DIR/$tdir/d0/f1 $DIR/$tdir/dummy/f1 || error "(3) Fail to hardlink"
435 $LFS mkdir -i 0 $DIR/$tdir/edir || error "(4) Fail to mkdir"
436 touch $DIR/$tdir/edir/f0 || error "(5) Fail to touch"
437 touch $DIR/$tdir/edir/f1 || error "(6) Fail to touch"
439 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
440 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
441 ln $DIR/$tdir/edir/f0 $DIR/$tdir/edir/w0 || error "(7) Fail to hardlink"
443 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
444 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
445 ln $DIR/$tdir/edir/f1 $DIR/$tdir/edir/w1 || error "(8) Fail to hardlink"
447 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
449 $START_NAMESPACE -r || error "(9) Fail to start LFSCK for namespace!"
450 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
451 mdd.${MDT_DEV}.lfsck_namespace |
452 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
454 error "(10) unexpected status"
457 local checked=$($SHOW_NAMESPACE |
458 awk '/^checked_phase2/ { print $2 }')
459 [ $checked -ge 4 ] ||
460 error "(11) Fail to check multiple-linked object: $checked"
462 local repaired=$($SHOW_NAMESPACE |
463 awk '/^multiple_linked_repaired/ { print $2 }')
464 [ $repaired -ge 2 ] ||
465 error "(12) Fail to repair multiple-linked object: $repaired"
467 run_test 3 "LFSCK can verify multiple-linked objects"
471 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
472 skip "OI Scrub not implemented for ZFS" && return
475 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
476 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
478 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
479 echo "start $SINGLEMDS with disabling OI scrub"
480 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
481 error "(2) Fail to start MDS!"
483 #define OBD_FAIL_LFSCK_DELAY2 0x1601
484 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
485 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
486 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
487 mdd.${MDT_DEV}.lfsck_namespace |
488 awk '/^flags/ { print \\\$2 }'" "inconsistent" 32 || {
490 error "(5) unexpected status"
493 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
494 [ "$STATUS" == "scanning-phase1" ] ||
495 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
497 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
498 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
499 mdd.${MDT_DEV}.lfsck_namespace |
500 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
502 error "(7) unexpected status"
505 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
506 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
508 local repaired=$($SHOW_NAMESPACE |
509 awk '/^dirent_repaired/ { print $2 }')
510 # for interop with old server
511 [ -z "$repaired" ] &&
512 repaired=$($SHOW_NAMESPACE |
513 awk '/^updated_phase1/ { print $2 }')
515 [ $repaired -ge 9 ] ||
516 error "(9) Fail to re-generate FID-in-dirent: $repaired"
518 mount_client $MOUNT || error "(10) Fail to start client!"
520 #define OBD_FAIL_FID_LOOKUP 0x1505
521 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
522 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
523 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
525 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
529 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
530 skip "OI Scrub not implemented for ZFS" && return
533 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
534 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
536 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
537 echo "start $SINGLEMDS with disabling OI scrub"
538 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
539 error "(2) Fail to start MDS!"
541 #define OBD_FAIL_LFSCK_DELAY2 0x1601
542 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
543 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
544 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
545 mdd.${MDT_DEV}.lfsck_namespace |
546 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 32 || {
548 error "(5) unexpected status"
551 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
552 [ "$STATUS" == "scanning-phase1" ] ||
553 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
555 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
556 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
557 mdd.${MDT_DEV}.lfsck_namespace |
558 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
560 error "(7) unexpected status"
563 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
564 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
566 local repaired=$($SHOW_NAMESPACE |
567 awk '/^dirent_repaired/ { print $2 }')
568 # for interop with old server
569 [ -z "$repaired" ] &&
570 repaired=$($SHOW_NAMESPACE |
571 awk '/^updated_phase1/ { print $2 }')
573 [ $repaired -ge 2 ] ||
574 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
576 mount_client $MOUNT || error "(10) Fail to start client!"
578 #define OBD_FAIL_FID_LOOKUP 0x1505
579 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
580 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
582 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
584 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
585 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
586 local dummyname=$($LFS fid2path $DIR $dummyfid)
587 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
588 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
590 run_test 5 "LFSCK can handle IGIF object upgrading"
595 #define OBD_FAIL_LFSCK_DELAY1 0x1600
596 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
597 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
599 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
600 [ "$STATUS" == "scanning-phase1" ] ||
601 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
603 # Sleep 3 sec to guarantee at least one object processed by LFSCK
605 # Fail the LFSCK to guarantee there is at least one checkpoint
606 #define OBD_FAIL_LFSCK_FATAL1 0x1608
607 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
608 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
609 mdd.${MDT_DEV}.lfsck_namespace |
610 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
612 error "(4) unexpected status"
615 local POS0=$($SHOW_NAMESPACE |
616 awk '/^last_checkpoint_position/ { print $2 }' |
619 #define OBD_FAIL_LFSCK_DELAY1 0x1600
620 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
621 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
623 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
624 [ "$STATUS" == "scanning-phase1" ] ||
625 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
627 local POS1=$($SHOW_NAMESPACE |
628 awk '/^latest_start_position/ { print $2 }' |
630 [[ $POS0 -lt $POS1 ]] ||
631 error "(7) Expect larger than: $POS0, but got $POS1"
633 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
634 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
635 mdd.${MDT_DEV}.lfsck_namespace |
636 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
638 error "(8) unexpected status"
641 run_test 6a "LFSCK resumes from last checkpoint (1)"
646 #define OBD_FAIL_LFSCK_DELAY2 0x1601
647 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
648 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
650 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "scanning-phase1" ] ||
652 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
654 # Sleep 5 sec to guarantee that we are in the directory scanning
656 # Fail the LFSCK to guarantee there is at least one checkpoint
657 #define OBD_FAIL_LFSCK_FATAL2 0x1609
658 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
659 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
660 mdd.${MDT_DEV}.lfsck_namespace |
661 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
663 error "(4) unexpected status"
666 local O_POS0=$($SHOW_NAMESPACE |
667 awk '/^last_checkpoint_position/ { print $2 }' |
670 local D_POS0=$($SHOW_NAMESPACE |
671 awk '/^last_checkpoint_position/ { print $4 }')
673 #define OBD_FAIL_LFSCK_DELAY2 0x1601
674 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
675 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
677 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
678 [ "$STATUS" == "scanning-phase1" ] ||
679 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
681 local O_POS1=$($SHOW_NAMESPACE |
682 awk '/^latest_start_position/ { print $2 }' |
684 local D_POS1=$($SHOW_NAMESPACE |
685 awk '/^latest_start_position/ { print $4 }')
687 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
688 [[ $O_POS0 -lt $O_POS1 ]] ||
689 error "(7.1) $O_POS1 is not larger than $O_POS0"
691 [[ $D_POS0 -lt $D_POS1 ]] ||
692 error "(7.2) $D_POS1 is not larger than $D_POS0"
695 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
696 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
697 mdd.${MDT_DEV}.lfsck_namespace |
698 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
700 error "(8) unexpected status"
703 run_test 6b "LFSCK resumes from last checkpoint (2)"
710 #define OBD_FAIL_LFSCK_DELAY2 0x1601
711 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
712 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
714 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
715 [ "$STATUS" == "scanning-phase1" ] ||
716 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
718 # Sleep 3 sec to guarantee at least one object processed by LFSCK
720 echo "stop $SINGLEMDS"
721 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
723 echo "start $SINGLEMDS"
724 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
725 error "(5) Fail to start MDS!"
727 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
728 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
729 mdd.${MDT_DEV}.lfsck_namespace |
730 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
732 error "(6) unexpected status"
735 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
741 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
743 for ((i = 0; i < 20; i++)); do
744 touch $DIR/$tdir/dummy${i}
747 #define OBD_FAIL_LFSCK_DELAY3 0x1602
748 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
749 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
750 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
751 mdd.${MDT_DEV}.lfsck_namespace |
752 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
754 error "(4) unexpected status"
757 echo "stop $SINGLEMDS"
758 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
760 echo "start $SINGLEMDS"
761 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
762 error "(6) Fail to start MDS!"
764 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
765 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
766 mdd.${MDT_DEV}.lfsck_namespace |
767 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
769 error "(7) unexpected status"
772 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
777 formatall > /dev/null
783 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
784 [ "$STATUS" == "init" ] ||
785 error "(2) Expect 'init', but got '$STATUS'"
787 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
788 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
789 mkdir $DIR/$tdir/crashed
791 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
792 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
793 for ((i = 0; i < 5; i++)); do
794 touch $DIR/$tdir/dummy${i}
797 umount_client $MOUNT || error "(3) Fail to stop client!"
799 #define OBD_FAIL_LFSCK_DELAY2 0x1601
800 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
801 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
803 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
804 [ "$STATUS" == "scanning-phase1" ] ||
805 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
807 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
809 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
810 [ "$STATUS" == "stopped" ] ||
811 error "(7) Expect 'stopped', but got '$STATUS'"
813 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
815 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
816 [ "$STATUS" == "scanning-phase1" ] ||
817 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
819 #define OBD_FAIL_LFSCK_FATAL2 0x1609
820 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
821 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
822 mdd.${MDT_DEV}.lfsck_namespace |
823 awk '/^status/ { print \\\$2 }'" "failed" 32 || {
825 error "(10) unexpected status"
828 #define OBD_FAIL_LFSCK_DELAY1 0x1600
829 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
830 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
832 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
833 [ "$STATUS" == "scanning-phase1" ] ||
834 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
836 #define OBD_FAIL_LFSCK_CRASH 0x160a
837 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
840 echo "stop $SINGLEMDS"
841 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
843 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
844 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
846 echo "start $SINGLEMDS"
847 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
848 error "(14) Fail to start MDS!"
850 local timeout=$(max_recovery_time)
853 while [ $timer -lt $timeout ]; do
854 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
855 mdt.${MDT_DEV}.recovery_status |
856 awk '/^status/ { print \\\$2 }'")
857 [ "$STATUS" != "RECOVERING" ] && break;
862 [ $timer != $timeout ] ||
863 error "(14.1) recovery timeout"
865 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
866 [ "$STATUS" == "crashed" ] ||
867 error "(15) Expect 'crashed', but got '$STATUS'"
869 #define OBD_FAIL_LFSCK_DELAY2 0x1601
870 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
871 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
873 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
874 [ "$STATUS" == "scanning-phase1" ] ||
875 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
877 echo "stop $SINGLEMDS"
878 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
880 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
881 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
883 echo "start $SINGLEMDS"
884 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
885 error "(19) Fail to start MDS!"
888 while [ $timer -lt $timeout ]; do
889 STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
890 mdt.${MDT_DEV}.recovery_status |
891 awk '/^status/ { print \\\$2 }'")
892 [ "$STATUS" != "RECOVERING" ] && break;
897 [ $timer != $timeout ] ||
898 error "(19.1) recovery timeout"
900 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
901 [ "$STATUS" == "paused" ] ||
902 error "(20) Expect 'paused', but got '$STATUS'"
904 #define OBD_FAIL_LFSCK_DELAY3 0x1602
905 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
907 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
908 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
909 mdd.${MDT_DEV}.lfsck_namespace |
910 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 32 || {
912 error "(22) unexpected status"
915 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
916 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
917 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
919 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
920 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
921 mdd.${MDT_DEV}.lfsck_namespace |
922 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
924 error "(24) unexpected status"
927 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
928 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
930 run_test 8 "LFSCK state machine"
933 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
934 skip "Testing on UP system, the speed may be inaccurate."
940 local BASE_SPEED1=100
942 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
945 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
946 [ "$STATUS" == "scanning-phase1" ] ||
947 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
949 local SPEED=$($SHOW_NAMESPACE |
950 awk '/^average_speed_phase1/ { print $2 }')
952 # There may be time error, normally it should be less than 2 seconds.
953 # We allow another 20% schedule error.
955 # MAX_MARGIN = 1.2 = 12 / 10
956 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
957 RUN_TIME1 * 12 / 10))
958 [ $SPEED -lt $MAX_SPEED ] ||
959 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
962 local BASE_SPEED2=300
964 do_facet $SINGLEMDS \
965 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
968 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
969 # MIN_MARGIN = 0.8 = 8 / 10
970 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
971 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
972 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
973 [ $SPEED -gt $MIN_SPEED ] || {
974 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
975 error_ignore LU-5624 \
976 "(5.1) Got speed $SPEED, expected more than $MIN_SPEED"
979 "(5.2) Got speed $SPEED, expected more than $MIN_SPEED"
983 # MAX_MARGIN = 1.2 = 12 / 10
984 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
985 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
986 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
987 [ $SPEED -lt $MAX_SPEED ] ||
988 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
990 do_facet $SINGLEMDS \
991 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
993 wait_update_facet $SINGLEMDS \
994 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
995 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
996 error "(7) Failed to get expected 'completed'"
998 run_test 9a "LFSCK speed control (1)"
1001 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
1002 skip "Testing on UP system, the speed may be inaccurate."
1008 echo "Preparing another 50 * 50 files (with error) at $(date)."
1009 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1010 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1011 createmany -d $DIR/$tdir/d 50
1012 createmany -m $DIR/$tdir/f 50
1013 for ((i = 0; i < 50; i++)); do
1014 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
1017 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
1018 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
1019 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
1020 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1021 mdd.${MDT_DEV}.lfsck_namespace |
1022 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
1024 error "(5) unexpected status"
1027 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1028 echo "Prepared at $(date)."
1030 local BASE_SPEED1=50
1032 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
1035 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1036 [ "$STATUS" == "scanning-phase2" ] ||
1037 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
1039 local SPEED=$($SHOW_NAMESPACE |
1040 awk '/^average_speed_phase2/ { print $2 }')
1041 # There may be time error, normally it should be less than 2 seconds.
1042 # We allow another 20% schedule error.
1044 # MAX_MARGIN = 1.2 = 12 / 10
1045 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
1046 RUN_TIME1 * 12 / 10))
1047 [ $SPEED -lt $MAX_SPEED ] ||
1048 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
1050 # adjust speed limit
1051 local BASE_SPEED2=150
1053 do_facet $SINGLEMDS \
1054 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
1057 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
1058 # MIN_MARGIN = 0.8 = 8 / 10
1059 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
1060 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
1061 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
1062 [ $SPEED -gt $MIN_SPEED ] || {
1063 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
1064 error_ignore LU-5624 \
1065 "(9.1) Got speed $SPEED, expected more than $MIN_SPEED"
1068 "(9.2) Got speed $SPEED, expected more than $MIN_SPEED"
1072 # MAX_MARGIN = 1.2 = 12 / 10
1073 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
1074 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
1075 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
1076 [ $SPEED -lt $MAX_SPEED ] ||
1077 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
1079 do_facet $SINGLEMDS \
1080 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1081 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1082 mdd.${MDT_DEV}.lfsck_namespace |
1083 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1085 error "(11) unexpected status"
1088 run_test 9b "LFSCK speed control (2)"
1092 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
1093 skip "lookup(..)/linkea on ZFS issue" && return
1097 echo "Preparing more files with error at $(date)."
1098 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
1099 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
1101 for ((i = 0; i < 1000; i = $((i+2)))); do
1102 mkdir -p $DIR/$tdir/d${i}
1103 touch $DIR/$tdir/f${i}
1104 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1107 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
1108 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
1110 for ((i = 1; i < 1000; i = $((i+2)))); do
1111 mkdir -p $DIR/$tdir/d${i}
1112 touch $DIR/$tdir/f${i}
1113 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
1116 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1117 echo "Prepared at $(date)."
1119 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
1121 umount_client $MOUNT
1122 mount_client $MOUNT || error "(3) Fail to start client!"
1124 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
1127 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1128 [ "$STATUS" == "scanning-phase1" ] ||
1129 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
1131 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
1133 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
1135 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
1137 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
1139 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
1141 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
1143 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
1145 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1146 error "(14) Fail to softlink!"
1148 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1149 [ "$STATUS" == "scanning-phase1" ] ||
1150 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1152 do_facet $SINGLEMDS \
1153 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1154 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1155 mdd.${MDT_DEV}.lfsck_namespace |
1156 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1158 error "(16) unexpected status"
1161 run_test 10 "System is available during LFSCK scanning"
1164 ost_remove_lastid() {
1167 local rcmd="do_facet ost${ost}"
1169 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1171 # step 1: local mount
1172 mount_fstype ost${ost} || return 1
1173 # step 2: remove the specified LAST_ID
1174 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1176 unmount_fstype ost${ost} || return 2
1180 check_mount_and_prep
1181 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1182 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1187 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1189 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1190 error "(2) Fail to start ost1"
1192 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1193 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1195 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1196 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1198 wait_update_facet ost1 "$LCTL get_param -n \
1199 obdfilter.${OST_DEV}.lfsck_layout |
1200 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1202 error "(5) unexpected status"
1205 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1207 wait_update_facet ost1 "$LCTL get_param -n \
1208 obdfilter.${OST_DEV}.lfsck_layout |
1209 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1211 error "(6) unexpected status"
1214 echo "the LAST_ID(s) should have been rebuilt"
1215 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1216 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1218 run_test 11a "LFSCK can rebuild lost last_id"
1221 check_mount_and_prep
1222 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1224 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1225 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1226 do_facet ost1 $LCTL set_param fail_loc=0x160d
1227 createmany -o $DIR/$tdir/f 64
1228 local lastid1=$(do_facet ost1 "lctl get_param -n \
1229 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1230 awk -F: '{ print $2 }')
1232 umount_client $MOUNT
1233 stop ost1 || error "(1) Fail to stop ost1"
1235 #define OBD_FAIL_OST_ENOSPC 0x215
1236 do_facet ost1 $LCTL set_param fail_loc=0x215
1238 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1239 error "(2) Fail to start ost1"
1241 for ((i = 0; i < 60; i++)); do
1242 lastid2=$(do_facet ost1 "lctl get_param -n \
1243 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1244 awk -F: '{ print $2 }')
1245 [ ! -z $lastid2 ] && break;
1249 echo "the on-disk LAST_ID should be smaller than the expected one"
1250 [ $lastid1 -gt $lastid2 ] ||
1251 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1253 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1254 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1256 wait_update_facet ost1 "$LCTL get_param -n \
1257 obdfilter.${OST_DEV}.lfsck_layout |
1258 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1260 error "(6) unexpected status"
1263 stop ost1 || error "(7) Fail to stop ost1"
1265 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1266 error "(8) Fail to start ost1"
1268 echo "the on-disk LAST_ID should have been rebuilt"
1269 wait_update_facet ost1 "$LCTL get_param -n \
1270 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1271 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1272 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1273 error "(9) expect lastid1 0x100000000:$lastid1"
1276 do_facet ost1 $LCTL set_param fail_loc=0
1277 stopall || error "(10) Fail to stopall"
1279 run_test 11b "LFSCK can rebuild crashed last_id"
1282 [ $MDSCOUNT -lt 2 ] &&
1283 skip "We need at least 2 MDSes for test_12" && return
1285 check_mount_and_prep
1286 for k in $(seq $MDSCOUNT); do
1287 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1288 createmany -o $DIR/$tdir/${k}/f 100 ||
1289 error "(0) Fail to create 100 files."
1292 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1293 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1294 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1296 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1297 for k in $(seq $MDSCOUNT); do
1298 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1299 mdd.$(facet_svc mds${k}).lfsck_namespace |
1300 awk '/^status/ { print $2 }')
1301 [ "$STATUS" == "scanning-phase1" ] ||
1302 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1305 echo "Stop namespace LFSCK on all targets by single lctl command."
1306 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1307 error "(4) Fail to stop LFSCK on all devices!"
1309 echo "All the LFSCK targets should be in 'stopped' status."
1310 for k in $(seq $MDSCOUNT); do
1311 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1312 mdd.$(facet_svc mds${k}).lfsck_namespace |
1313 awk '/^status/ { print $2 }')
1314 [ "$STATUS" == "stopped" ] ||
1315 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1318 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1319 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1320 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1322 echo "All the LFSCK targets should be in 'completed' status."
1323 for k in $(seq $MDSCOUNT); do
1324 wait_update_facet mds${k} "$LCTL get_param -n \
1325 mdd.$(facet_svc mds${k}).lfsck_namespace |
1326 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1327 error "(7) MDS${k} is not the expected 'completed'"
1330 echo "Start layout LFSCK on all targets by single command (-s 1)."
1331 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1332 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1334 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1335 for k in $(seq $MDSCOUNT); do
1336 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1337 mdd.$(facet_svc mds${k}).lfsck_layout |
1338 awk '/^status/ { print $2 }')
1339 [ "$STATUS" == "scanning-phase1" ] ||
1340 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1343 echo "Stop layout LFSCK on all targets by single lctl command."
1344 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1345 error "(10) Fail to stop LFSCK on all devices!"
1347 echo "All the LFSCK targets should be in 'stopped' status."
1348 for k in $(seq $MDSCOUNT); do
1349 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1350 mdd.$(facet_svc mds${k}).lfsck_layout |
1351 awk '/^status/ { print $2 }')
1352 [ "$STATUS" == "stopped" ] ||
1353 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1356 for k in $(seq $OSTCOUNT); do
1357 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1358 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1359 awk '/^status/ { print $2 }')
1360 [ "$STATUS" == "stopped" ] ||
1361 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1364 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1365 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1366 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1368 echo "All the LFSCK targets should be in 'completed' status."
1369 for k in $(seq $MDSCOUNT); do
1370 # The LFSCK status query internal is 30 seconds. For the case
1371 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1372 # time to guarantee the status sync up.
1373 wait_update_facet mds${k} "$LCTL get_param -n \
1374 mdd.$(facet_svc mds${k}).lfsck_layout |
1375 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1376 error "(14) MDS${k} is not the expected 'completed'"
1379 run_test 12 "single command to trigger LFSCK on all devices"
1383 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1384 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1385 echo "MDT-object FID."
1388 check_mount_and_prep
1390 echo "Inject failure stub to simulate bad lmm_oi"
1391 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1392 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1393 createmany -o $DIR/$tdir/f 32
1394 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1396 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1397 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1399 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1400 mdd.${MDT_DEV}.lfsck_layout |
1401 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1403 error "(2) unexpected status"
1406 local repaired=$($SHOW_LAYOUT |
1407 awk '/^repaired_others/ { print $2 }')
1408 [ $repaired -eq 32 ] ||
1409 error "(3) Fail to repair crashed lmm_oi: $repaired"
1411 run_test 13 "LFSCK can repair crashed lmm_oi"
1415 echo "The OST-object referenced by the MDT-object should be there;"
1416 echo "otherwise, the LFSCK should re-create the missing OST-object."
1419 check_mount_and_prep
1420 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1422 echo "Inject failure stub to simulate dangling referenced MDT-object"
1423 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1424 do_facet ost1 $LCTL set_param fail_loc=0x1610
1425 local count=$(precreated_ost_obj_count 0 0)
1427 createmany -o $DIR/$tdir/f $((count + 31))
1428 touch $DIR/$tdir/guard
1429 do_facet ost1 $LCTL set_param fail_loc=0
1431 start_full_debug_logging
1433 # exhaust other pre-created dangling cases
1434 count=$(precreated_ost_obj_count 0 0)
1435 createmany -o $DIR/$tdir/a $count ||
1436 error "(0) Fail to create $count files."
1438 echo "'ls' should fail because of dangling referenced MDT-object"
1439 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1441 echo "Trigger layout LFSCK to find out dangling reference"
1442 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1444 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1445 mdd.${MDT_DEV}.lfsck_layout |
1446 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1448 error "(3) unexpected status"
1451 local repaired=$($SHOW_LAYOUT |
1452 awk '/^repaired_dangling/ { print $2 }')
1453 [ $repaired -ge 32 ] ||
1454 error "(4) Fail to repair dangling reference: $repaired"
1456 echo "'stat' should fail because of not repair dangling by default"
1457 stat $DIR/$tdir/guard > /dev/null 2>&1 && error "(5) stat should fail"
1459 echo "Trigger layout LFSCK to repair dangling reference"
1460 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1462 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1463 mdd.${MDT_DEV}.lfsck_layout |
1464 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1466 error "(7) unexpected status"
1469 # There may be some async LFSCK updates in processing, wait for
1470 # a while until the target reparation has been done. LU-4970.
1472 echo "'stat' should success after layout LFSCK repairing"
1473 wait_update_facet client "stat $DIR/$tdir/guard |
1474 awk '/Size/ { print \\\$2 }'" "0" 32 || {
1475 stat $DIR/$tdir/guard
1477 error "(8) unexpected size"
1480 repaired=$($SHOW_LAYOUT |
1481 awk '/^repaired_dangling/ { print $2 }')
1482 [ $repaired -ge 32 ] ||
1483 error "(9) Fail to repair dangling reference: $repaired"
1485 stop_full_debug_logging
1487 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1491 echo "If the OST-object referenced by the MDT-object back points"
1492 echo "to some non-exist MDT-object, then the LFSCK should repair"
1493 echo "the OST-object to back point to the right MDT-object."
1496 check_mount_and_prep
1497 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1499 echo "Inject failure stub to make the OST-object to back point to"
1500 echo "non-exist MDT-object."
1501 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1503 do_facet ost1 $LCTL set_param fail_loc=0x1611
1504 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1505 cancel_lru_locks osc
1506 do_facet ost1 $LCTL set_param fail_loc=0
1508 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1509 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1511 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1512 mdd.${MDT_DEV}.lfsck_layout |
1513 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1515 error "(2) unexpected status"
1518 local repaired=$($SHOW_LAYOUT |
1519 awk '/^repaired_unmatched_pair/ { print $2 }')
1520 [ $repaired -eq 1 ] ||
1521 error "(3) Fail to repair unmatched pair: $repaired"
1523 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1527 echo "If the OST-object referenced by the MDT-object back points"
1528 echo "to other MDT-object that doesn't recognize the OST-object,"
1529 echo "then the LFSCK should repair it to back point to the right"
1530 echo "MDT-object (the first one)."
1533 check_mount_and_prep
1534 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1535 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1536 cancel_lru_locks osc
1538 echo "Inject failure stub to make the OST-object to back point to"
1539 echo "other MDT-object"
1541 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1542 do_facet ost1 $LCTL set_param fail_loc=0x1612
1543 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1544 cancel_lru_locks osc
1545 do_facet ost1 $LCTL set_param fail_loc=0
1547 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1548 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1550 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1551 mdd.${MDT_DEV}.lfsck_layout |
1552 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1554 error "(2) unexpected status"
1557 local repaired=$($SHOW_LAYOUT |
1558 awk '/^repaired_unmatched_pair/ { print $2 }')
1559 [ $repaired -eq 1 ] ||
1560 error "(3) Fail to repair unmatched pair: $repaired"
1562 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1566 echo "If the OST-object's owner information does not match the owner"
1567 echo "information stored in the MDT-object, then the LFSCK trust the"
1568 echo "MDT-object and update the OST-object's owner information."
1571 check_mount_and_prep
1572 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1573 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1574 cancel_lru_locks osc
1576 echo "Inject failure stub to skip OST-object owner changing"
1577 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1578 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1579 chown 1.1 $DIR/$tdir/f0
1580 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1582 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1585 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1587 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1588 mdd.${MDT_DEV}.lfsck_layout |
1589 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1591 error "(2) unexpected status"
1594 local repaired=$($SHOW_LAYOUT |
1595 awk '/^repaired_inconsistent_owner/ { print $2 }')
1596 [ $repaired -eq 1 ] ||
1597 error "(3) Fail to repair inconsistent owner: $repaired"
1599 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1603 echo "If more than one MDT-objects reference the same OST-object,"
1604 echo "and the OST-object only recognizes one MDT-object, then the"
1605 echo "LFSCK should create new OST-objects for such non-recognized"
1609 check_mount_and_prep
1610 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1612 echo "Inject failure stub to make two MDT-objects to refernce"
1613 echo "the OST-object"
1615 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1616 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1618 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1619 cancel_lru_locks osc
1621 createmany -o $DIR/$tdir/f 1
1623 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1625 cancel_lru_locks mdc
1626 cancel_lru_locks osc
1628 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1629 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1630 [ $size -eq 1048576 ] ||
1631 error "(1) f0 (wrong) size should be 1048576, but got $size"
1633 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1636 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1638 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1639 mdd.${MDT_DEV}.lfsck_layout |
1640 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
1642 error "(3) unexpected status"
1645 local repaired=$($SHOW_LAYOUT |
1646 awk '/^repaired_multiple_referenced/ { print $2 }')
1647 [ $repaired -eq 1 ] ||
1648 error "(4) Fail to repair multiple references: $repaired"
1650 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1651 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1652 error "(5) Fail to write f0."
1653 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1654 [ $size -eq 1048576 ] ||
1655 error "(6) guard size should be 1048576, but got $size"
1657 run_test 17 "LFSCK can repair multiple references"
1659 $LCTL set_param debug=+cache > /dev/null
1663 echo "The target MDT-object is there, but related stripe information"
1664 echo "is lost or partly lost. The LFSCK should regenerate the missing"
1665 echo "layout EA entries."
1668 check_mount_and_prep
1669 $LFS mkdir -i 0 $DIR/$tdir/a1
1670 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1671 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1673 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1675 $LFS path2fid $DIR/$tdir/a1/f1
1676 $LFS getstripe $DIR/$tdir/a1/f1
1678 if [ $MDSCOUNT -ge 2 ]; then
1679 $LFS mkdir -i 1 $DIR/$tdir/a2
1680 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1681 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1682 $LFS path2fid $DIR/$tdir/a2/f2
1683 $LFS getstripe $DIR/$tdir/a2/f2
1686 cancel_lru_locks osc
1688 echo "Inject failure, to make the MDT-object lost its layout EA"
1689 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1690 do_facet mds1 $LCTL set_param fail_loc=0x1615
1691 chown 1.1 $DIR/$tdir/a1/f1
1693 if [ $MDSCOUNT -ge 2 ]; then
1694 do_facet mds2 $LCTL set_param fail_loc=0x1615
1695 chown 1.1 $DIR/$tdir/a2/f2
1701 do_facet mds1 $LCTL set_param fail_loc=0
1702 if [ $MDSCOUNT -ge 2 ]; then
1703 do_facet mds2 $LCTL set_param fail_loc=0
1706 cancel_lru_locks mdc
1707 cancel_lru_locks osc
1709 echo "The file size should be incorrect since layout EA is lost"
1710 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1711 [ "$cur_size" != "$saved_size" ] ||
1712 error "(1) Expect incorrect file1 size"
1714 if [ $MDSCOUNT -ge 2 ]; then
1715 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1716 [ "$cur_size" != "$saved_size" ] ||
1717 error "(2) Expect incorrect file2 size"
1720 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1721 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1723 for k in $(seq $MDSCOUNT); do
1724 # The LFSCK status query internal is 30 seconds. For the case
1725 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1726 # time to guarantee the status sync up.
1727 wait_update_facet mds${k} "$LCTL get_param -n \
1728 mdd.$(facet_svc mds${k}).lfsck_layout |
1729 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1730 error "(4) MDS${k} is not the expected 'completed'"
1733 for k in $(seq $OSTCOUNT); do
1734 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1735 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1736 awk '/^status/ { print $2 }')
1737 [ "$cur_status" == "completed" ] ||
1738 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1741 local repaired=$(do_facet mds1 $LCTL get_param -n \
1742 mdd.$(facet_svc mds1).lfsck_layout |
1743 awk '/^repaired_orphan/ { print $2 }')
1744 [ $repaired -eq 1 ] ||
1745 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1747 if [ $MDSCOUNT -ge 2 ]; then
1748 repaired=$(do_facet mds2 $LCTL get_param -n \
1749 mdd.$(facet_svc mds2).lfsck_layout |
1750 awk '/^repaired_orphan/ { print $2 }')
1751 [ $repaired -eq 2 ] ||
1752 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1755 $LFS path2fid $DIR/$tdir/a1/f1
1756 $LFS getstripe $DIR/$tdir/a1/f1
1758 if [ $MDSCOUNT -ge 2 ]; then
1759 $LFS path2fid $DIR/$tdir/a2/f2
1760 $LFS getstripe $DIR/$tdir/a2/f2
1763 echo "The file size should be correct after layout LFSCK scanning"
1764 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1765 [ "$cur_size" == "$saved_size" ] ||
1766 error "(7) Expect file1 size $saved_size, but got $cur_size"
1768 if [ $MDSCOUNT -ge 2 ]; then
1769 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1770 [ "$cur_size" == "$saved_size" ] ||
1771 error "(8) Expect file2 size $saved_size, but got $cur_size"
1774 run_test 18a "Find out orphan OST-object and repair it (1)"
1778 echo "The target MDT-object is lost. The LFSCK should re-create the"
1779 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1780 echo "can move it back to normal namespace manually."
1783 check_mount_and_prep
1784 $LFS mkdir -i 0 $DIR/$tdir/a1
1785 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1786 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1787 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1788 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1790 $LFS getstripe $DIR/$tdir/a1/f1
1792 if [ $MDSCOUNT -ge 2 ]; then
1793 $LFS mkdir -i 1 $DIR/$tdir/a2
1794 $LFS setstripe -c 2 -i 1 -S 1M $DIR/$tdir/a2
1795 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1796 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1798 $LFS getstripe $DIR/$tdir/a2/f2
1801 cancel_lru_locks osc
1803 echo "Inject failure, to simulate the case of missing the MDT-object"
1804 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1805 do_facet mds1 $LCTL set_param fail_loc=0x1616
1806 rm -f $DIR/$tdir/a1/f1
1808 if [ $MDSCOUNT -ge 2 ]; then
1809 do_facet mds2 $LCTL set_param fail_loc=0x1616
1810 rm -f $DIR/$tdir/a2/f2
1816 do_facet mds1 $LCTL set_param fail_loc=0
1817 if [ $MDSCOUNT -ge 2 ]; then
1818 do_facet mds2 $LCTL set_param fail_loc=0
1821 cancel_lru_locks mdc
1822 cancel_lru_locks osc
1824 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1825 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1827 for k in $(seq $MDSCOUNT); do
1828 # The LFSCK status query internal is 30 seconds. For the case
1829 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1830 # time to guarantee the status sync up.
1831 wait_update_facet mds${k} "$LCTL get_param -n \
1832 mdd.$(facet_svc mds${k}).lfsck_layout |
1833 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1834 error "(2) MDS${k} is not the expected 'completed'"
1837 for k in $(seq $OSTCOUNT); do
1838 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1839 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1840 awk '/^status/ { print $2 }')
1841 [ "$cur_status" == "completed" ] ||
1842 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1845 local repaired=$(do_facet mds1 $LCTL get_param -n \
1846 mdd.$(facet_svc mds1).lfsck_layout |
1847 awk '/^repaired_orphan/ { print $2 }')
1848 [ $repaired -eq 1 ] ||
1849 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1851 if [ $MDSCOUNT -ge 2 ]; then
1852 repaired=$(do_facet mds2 $LCTL get_param -n \
1853 mdd.$(facet_svc mds2).lfsck_layout |
1854 awk '/^repaired_orphan/ { print $2 }')
1855 [ $repaired -eq 2 ] ||
1856 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1859 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1860 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1861 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1863 if [ $MDSCOUNT -ge 2 ]; then
1864 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1865 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1868 $LFS path2fid $DIR/$tdir/a1/f1
1869 $LFS getstripe $DIR/$tdir/a1/f1
1871 if [ $MDSCOUNT -ge 2 ]; then
1872 $LFS path2fid $DIR/$tdir/a2/f2
1873 $LFS getstripe $DIR/$tdir/a2/f2
1876 echo "The file size should be correct after layout LFSCK scanning"
1877 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1878 [ "$cur_size" == "$saved_size" ] ||
1879 error "(7) Expect file1 size $saved_size, but got $cur_size"
1881 if [ $MDSCOUNT -ge 2 ]; then
1882 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1883 [ "$cur_size" == "$saved_size" ] ||
1884 error "(8) Expect file2 size $saved_size, but got $cur_size"
1887 run_test 18b "Find out orphan OST-object and repair it (2)"
1891 echo "The target MDT-object is lost, and the OST-object FID is missing."
1892 echo "The LFSCK should re-create the MDT-object with new FID under the "
1893 echo "directory .lustre/lost+found/MDTxxxx."
1896 check_mount_and_prep
1897 $LFS mkdir -i 0 $DIR/$tdir/a1
1898 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
1900 echo "Inject failure, to simulate the case of missing parent FID"
1901 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1902 do_facet ost1 $LCTL set_param fail_loc=0x1617
1904 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1905 $LFS getstripe $DIR/$tdir/a1/f1
1907 if [ $MDSCOUNT -ge 2 ]; then
1908 $LFS mkdir -i 1 $DIR/$tdir/a2
1909 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a2
1910 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1911 $LFS getstripe $DIR/$tdir/a2/f2
1914 cancel_lru_locks osc
1916 echo "Inject failure, to simulate the case of missing the MDT-object"
1917 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1918 do_facet mds1 $LCTL set_param fail_loc=0x1616
1919 rm -f $DIR/$tdir/a1/f1
1921 if [ $MDSCOUNT -ge 2 ]; then
1922 do_facet mds2 $LCTL set_param fail_loc=0x1616
1923 rm -f $DIR/$tdir/a2/f2
1929 do_facet mds1 $LCTL set_param fail_loc=0
1930 if [ $MDSCOUNT -ge 2 ]; then
1931 do_facet mds2 $LCTL set_param fail_loc=0
1934 cancel_lru_locks mdc
1935 cancel_lru_locks osc
1937 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1938 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1940 for k in $(seq $MDSCOUNT); do
1941 # The LFSCK status query internal is 30 seconds. For the case
1942 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1943 # time to guarantee the status sync up.
1944 wait_update_facet mds${k} "$LCTL get_param -n \
1945 mdd.$(facet_svc mds${k}).lfsck_layout |
1946 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
1947 error "(2) MDS${k} is not the expected 'completed'"
1950 for k in $(seq $OSTCOUNT); do
1951 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1952 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1953 awk '/^status/ { print $2 }')
1954 [ "$cur_status" == "completed" ] ||
1955 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1958 if [ $MDSCOUNT -ge 2 ]; then
1964 local repaired=$(do_facet mds1 $LCTL get_param -n \
1965 mdd.$(facet_svc mds1).lfsck_layout |
1966 awk '/^repaired_orphan/ { print $2 }')
1967 [ $repaired -eq $expected ] ||
1968 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1970 if [ $MDSCOUNT -ge 2 ]; then
1971 repaired=$(do_facet mds2 $LCTL get_param -n \
1972 mdd.$(facet_svc mds2).lfsck_layout |
1973 awk '/^repaired_orphan/ { print $2 }')
1974 [ $repaired -eq 0 ] ||
1975 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1978 ls -ail $MOUNT/.lustre/lost+found/
1980 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
1981 if [ -d $MOUNT/.lustre/lost+found/MDT0001 ]; then
1982 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-N-*)
1984 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1987 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1988 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
1989 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
1991 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-N-*)
1992 [ ! -z "$cname" ] ||
1993 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
1995 run_test 18c "Find out orphan OST-object and repair it (3)"
1999 echo "The target MDT-object layout EA slot is occpuied by some new"
2000 echo "created OST-object when repair dangling reference case. Such"
2001 echo "conflict OST-object has never been modified. Then when found"
2002 echo "the orphan OST-object, LFSCK will replace it with the orphan"
2006 check_mount_and_prep
2008 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2009 echo "guard" > $DIR/$tdir/a1/f1
2010 echo "foo" > $DIR/$tdir/a1/f2
2011 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2012 $LFS path2fid $DIR/$tdir/a1/f1
2013 $LFS getstripe $DIR/$tdir/a1/f1
2014 $LFS path2fid $DIR/$tdir/a1/f2
2015 $LFS getstripe $DIR/$tdir/a1/f2
2016 cancel_lru_locks osc
2018 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2019 echo "to reference the same OST-object (which is f1's OST-obejct)."
2020 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2021 echo "dangling reference case, but f2's old OST-object is there."
2024 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2025 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2026 chown 1.1 $DIR/$tdir/a1/f2
2027 rm -f $DIR/$tdir/a1/f1
2030 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2032 echo "stopall to cleanup object cache"
2035 setupall > /dev/null
2037 echo "The file size should be incorrect since dangling referenced"
2038 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2039 [ "$cur_size" != "$saved_size" ] ||
2040 error "(1) Expect incorrect file2 size"
2042 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2043 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
2045 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2046 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2048 wait_update_facet mds1 "$LCTL get_param -n \
2049 mdd.$(facet_svc mds1).lfsck_layout |
2050 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2051 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
2053 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2055 for k in $(seq $MDSCOUNT); do
2056 # The LFSCK status query internal is 30 seconds. For the case
2057 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2058 # time to guarantee the status sync up.
2059 wait_update_facet mds${k} "$LCTL get_param -n \
2060 mdd.$(facet_svc mds${k}).lfsck_layout |
2061 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2062 error "(3) MDS${k} is not the expected 'completed'"
2065 for k in $(seq $OSTCOUNT); do
2066 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2067 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2068 awk '/^status/ { print $2 }')
2069 [ "$cur_status" == "completed" ] ||
2070 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
2073 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2074 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2075 awk '/^repaired_orphan/ { print $2 }')
2076 [ $repaired -eq 1 ] ||
2077 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
2079 echo "The file size should be correct after layout LFSCK scanning"
2080 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2081 [ "$cur_size" == "$saved_size" ] ||
2082 error "(6) Expect file2 size $saved_size, but got $cur_size"
2084 echo "The LFSCK should find back the original data."
2085 cat $DIR/$tdir/a1/f2
2086 $LFS path2fid $DIR/$tdir/a1/f2
2087 $LFS getstripe $DIR/$tdir/a1/f2
2089 run_test 18d "Find out orphan OST-object and repair it (4)"
2093 echo "The target MDT-object layout EA slot is occpuied by some new"
2094 echo "created OST-object when repair dangling reference case. Such"
2095 echo "conflict OST-object has been modified by others. To keep the"
2096 echo "new data, the LFSCK will create a new file to refernece this"
2097 echo "old orphan OST-object."
2100 check_mount_and_prep
2102 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2103 echo "guard" > $DIR/$tdir/a1/f1
2104 echo "foo" > $DIR/$tdir/a1/f2
2105 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2106 $LFS path2fid $DIR/$tdir/a1/f1
2107 $LFS getstripe $DIR/$tdir/a1/f1
2108 $LFS path2fid $DIR/$tdir/a1/f2
2109 $LFS getstripe $DIR/$tdir/a1/f2
2110 cancel_lru_locks osc
2112 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
2113 echo "to reference the same OST-object (which is f1's OST-obejct)."
2114 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
2115 echo "dangling reference case, but f2's old OST-object is there."
2118 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
2119 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
2120 chown 1.1 $DIR/$tdir/a1/f2
2121 rm -f $DIR/$tdir/a1/f1
2124 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2126 echo "stopall to cleanup object cache"
2129 setupall > /dev/null
2131 echo "The file size should be incorrect since dangling referenced"
2132 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
2133 [ "$cur_size" != "$saved_size" ] ||
2134 error "(1) Expect incorrect file2 size"
2136 #define OBD_FAIL_LFSCK_DELAY3 0x1602
2137 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
2139 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2140 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
2142 wait_update_facet mds1 "$LCTL get_param -n \
2143 mdd.$(facet_svc mds1).lfsck_layout |
2144 awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
2145 error "(3) MDS1 is not the expected 'scanning-phase2'"
2147 # to guarantee all updates are synced.
2151 echo "Write new data to f2 to modify the new created OST-object."
2152 echo "dummy" >> $DIR/$tdir/a1/f2
2154 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
2156 for k in $(seq $MDSCOUNT); do
2157 # The LFSCK status query internal is 30 seconds. For the case
2158 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2159 # time to guarantee the status sync up.
2160 wait_update_facet mds${k} "$LCTL get_param -n \
2161 mdd.$(facet_svc mds${k}).lfsck_layout |
2162 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2163 error "(4) MDS${k} is not the expected 'completed'"
2166 for k in $(seq $OSTCOUNT); do
2167 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2168 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2169 awk '/^status/ { print $2 }')
2170 [ "$cur_status" == "completed" ] ||
2171 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2174 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2175 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2176 awk '/^repaired_orphan/ { print $2 }')
2177 [ $repaired -eq 1 ] ||
2178 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2180 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2181 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
2182 error "(7) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
2184 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-C-*)
2185 [ ! -z "$cname" ] ||
2186 error "(8) .lustre/lost+found/MDT0000/ should not be empty"
2188 echo "The stub file should keep the original f2 data"
2189 cur_size=$(ls -il $cname | awk '{ print $6 }')
2190 [ "$cur_size" == "$saved_size" ] ||
2191 error "(9) Expect file2 size $saved_size, but got $cur_size"
2194 $LFS path2fid $cname
2195 $LFS getstripe $cname
2197 echo "The f2 should contains new data."
2198 cat $DIR/$tdir/a1/f2
2199 $LFS path2fid $DIR/$tdir/a1/f2
2200 $LFS getstripe $DIR/$tdir/a1/f2
2202 run_test 18e "Find out orphan OST-object and repair it (5)"
2205 [ $OSTCOUNT -lt 2 ] &&
2206 skip "The test needs at least 2 OSTs" && return
2209 echo "The target MDT-object is lost. The LFSCK should re-create the"
2210 echo "MDT-object under .lustre/lost+found/MDTxxxx. If some OST fail"
2211 echo "to verify some OST-object(s) during the first stage-scanning,"
2212 echo "the LFSCK should skip orphan OST-objects for such OST. Others"
2213 echo "should not be affected."
2216 check_mount_and_prep
2217 $LFS mkdir -i 0 $DIR/$tdir/a1
2218 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a1
2219 dd if=/dev/zero of=$DIR/$tdir/a1/guard bs=1M count=2
2220 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
2221 $LFS mkdir -i 0 $DIR/$tdir/a2
2222 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a2
2223 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
2224 $LFS getstripe $DIR/$tdir/a1/f1
2225 $LFS getstripe $DIR/$tdir/a2/f2
2227 if [ $MDSCOUNT -ge 2 ]; then
2228 $LFS mkdir -i 1 $DIR/$tdir/a3
2229 $LFS setstripe -c 1 -i 0 -S 1M $DIR/$tdir/a3
2230 dd if=/dev/zero of=$DIR/$tdir/a3/guard bs=1M count=2
2231 dd if=/dev/zero of=$DIR/$tdir/a3/f3 bs=1M count=2
2232 $LFS mkdir -i 1 $DIR/$tdir/a4
2233 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a4
2234 dd if=/dev/zero of=$DIR/$tdir/a4/f4 bs=1M count=2
2235 $LFS getstripe $DIR/$tdir/a3/f3
2236 $LFS getstripe $DIR/$tdir/a4/f4
2239 cancel_lru_locks osc
2241 echo "Inject failure, to simulate the case of missing the MDT-object"
2242 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2243 do_facet mds1 $LCTL set_param fail_loc=0x1616
2244 rm -f $DIR/$tdir/a1/f1
2245 rm -f $DIR/$tdir/a2/f2
2247 if [ $MDSCOUNT -ge 2 ]; then
2248 do_facet mds2 $LCTL set_param fail_loc=0x1616
2249 rm -f $DIR/$tdir/a3/f3
2250 rm -f $DIR/$tdir/a4/f4
2256 do_facet mds1 $LCTL set_param fail_loc=0
2257 if [ $MDSCOUNT -ge 2 ]; then
2258 do_facet mds2 $LCTL set_param fail_loc=0
2261 cancel_lru_locks mdc
2262 cancel_lru_locks osc
2264 echo "Inject failure, to simulate the OST0 fail to handle"
2265 echo "MDT0 LFSCK request during the first-stage scanning."
2266 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
2267 do_facet mds1 $LCTL set_param fail_loc=0x161c fail_val=0
2269 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2270 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2272 for k in $(seq $MDSCOUNT); do
2273 # The LFSCK status query internal is 30 seconds. For the case
2274 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2275 # time to guarantee the status sync up.
2276 wait_update_facet mds${k} "$LCTL get_param -n \
2277 mdd.$(facet_svc mds${k}).lfsck_layout |
2278 awk '/^status/ { print \\\$2 }'" "partial" $LTIME ||
2279 error "(2) MDS${k} is not the expected 'partial'"
2282 wait_update_facet ost1 "$LCTL get_param -n \
2283 obdfilter.$(facet_svc ost1).lfsck_layout |
2284 awk '/^status/ { print \\\$2 }'" "partial" $LTIME || {
2285 error "(3) OST1 is not the expected 'partial'"
2288 wait_update_facet ost2 "$LCTL get_param -n \
2289 obdfilter.$(facet_svc ost2).lfsck_layout |
2290 awk '/^status/ { print \\\$2 }'" "completed" $LTIME || {
2291 error "(4) OST2 is not the expected 'completed'"
2294 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2296 local repaired=$(do_facet mds1 $LCTL get_param -n \
2297 mdd.$(facet_svc mds1).lfsck_layout |
2298 awk '/^repaired_orphan/ { print $2 }')
2299 [ $repaired -eq 1 ] ||
2300 error "(5) Expect 1 fixed on mds{1}, but got: $repaired"
2302 if [ $MDSCOUNT -ge 2 ]; then
2303 repaired=$(do_facet mds2 $LCTL get_param -n \
2304 mdd.$(facet_svc mds2).lfsck_layout |
2305 awk '/^repaired_orphan/ { print $2 }')
2306 [ $repaired -eq 1 ] ||
2307 error "(6) Expect 1 fixed on mds{2}, but got: $repaired"
2310 echo "Trigger layout LFSCK on all devices again to cleanup"
2311 $START_LAYOUT -r -o || error "(7) Fail to start LFSCK for layout!"
2313 for k in $(seq $MDSCOUNT); do
2314 # The LFSCK status query internal is 30 seconds. For the case
2315 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2316 # time to guarantee the status sync up.
2317 wait_update_facet mds${k} "$LCTL get_param -n \
2318 mdd.$(facet_svc mds${k}).lfsck_layout |
2319 awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
2320 error "(8) MDS${k} is not the expected 'completed'"
2323 for k in $(seq $OSTCOUNT); do
2324 cur_status=$(do_facet ost${k} $LCTL get_param -n \
2325 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2326 awk '/^status/ { print $2 }')
2327 [ "$cur_status" == "completed" ] ||
2328 error "(9) OST${k} Expect 'completed', but got '$cur_status'"
2332 local repaired=$(do_facet mds1 $LCTL get_param -n \
2333 mdd.$(facet_svc mds1).lfsck_layout |
2334 awk '/^repaired_orphan/ { print $2 }')
2335 [ $repaired -eq 2 ] ||
2336 error "(10) Expect 2 fixed on mds{1}, but got: $repaired"
2338 if [ $MDSCOUNT -ge 2 ]; then
2339 repaired=$(do_facet mds2 $LCTL get_param -n \
2340 mdd.$(facet_svc mds2).lfsck_layout |
2341 awk '/^repaired_orphan/ { print $2 }')
2342 [ $repaired -eq 2 ] ||
2343 error "(11) Expect 2 fixed on mds{2}, but got: $repaired"
2346 run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
2348 $LCTL set_param debug=-cache > /dev/null
2351 check_mount_and_prep
2352 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2354 echo "foo" > $DIR/$tdir/a0
2355 echo "guard" > $DIR/$tdir/a1
2356 cancel_lru_locks osc
2358 echo "Inject failure, then client will offer wrong parent FID when read"
2359 do_facet ost1 $LCTL set_param -n \
2360 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2361 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2362 $LCTL set_param fail_loc=0x1619
2364 echo "Read RPC with wrong parent FID should be denied"
2365 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2366 $LCTL set_param fail_loc=0
2368 run_test 19a "OST-object inconsistency self detect"
2371 check_mount_and_prep
2372 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2374 echo "Inject failure stub to make the OST-object to back point to"
2375 echo "non-exist MDT-object"
2377 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2378 do_facet ost1 $LCTL set_param fail_loc=0x1611
2379 echo "foo" > $DIR/$tdir/f0
2380 cancel_lru_locks osc
2381 do_facet ost1 $LCTL set_param fail_loc=0
2383 echo "Nothing should be fixed since self detect and repair is disabled"
2384 local repaired=$(do_facet ost1 $LCTL get_param -n \
2385 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2386 awk '/^repaired/ { print $2 }')
2387 [ $repaired -eq 0 ] ||
2388 error "(1) Expected 0 repaired, but got $repaired"
2390 echo "Read RPC with right parent FID should be accepted,"
2391 echo "and cause parent FID on OST to be fixed"
2393 do_facet ost1 $LCTL set_param -n \
2394 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2395 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2397 repaired=$(do_facet ost1 $LCTL get_param -n \
2398 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2399 awk '/^repaired/ { print $2 }')
2400 [ $repaired -eq 1 ] ||
2401 error "(3) Expected 1 repaired, but got $repaired"
2403 run_test 19b "OST-object inconsistency self repair"
2406 [ $OSTCOUNT -lt 2 ] &&
2407 skip "The test needs at least 2 OSTs" && return
2410 echo "The target MDT-object and some of its OST-object are lost."
2411 echo "The LFSCK should find out the left OST-objects and re-create"
2412 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2413 echo "with the partial OST-objects (LOV EA hole)."
2415 echo "New client can access the file with LOV EA hole via normal"
2416 echo "system tools or commands without crash the system."
2418 echo "For old client, even though it cannot access the file with"
2419 echo "LOV EA hole, it should not cause the system crash."
2422 check_mount_and_prep
2423 $LFS mkdir -i 0 $DIR/$tdir/a1
2424 if [ $OSTCOUNT -gt 2 ]; then
2425 $LFS setstripe -c 3 -i 0 -S 1M $DIR/$tdir/a1
2428 $LFS setstripe -c 2 -i 0 -S 1M $DIR/$tdir/a1
2432 # 256 blocks on the stripe0.
2433 # 1 block on the stripe1 for 2 OSTs case.
2434 # 256 blocks on the stripe1 for other cases.
2435 # 1 block on the stripe2 if OSTs > 2
2436 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2437 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2438 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2440 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2441 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2442 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2445 $LFS getstripe $DIR/$tdir/a1/f0
2447 $LFS getstripe $DIR/$tdir/a1/f1
2449 $LFS getstripe $DIR/$tdir/a1/f2
2451 if [ $OSTCOUNT -gt 2 ]; then
2452 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2453 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2455 $LFS getstripe $DIR/$tdir/a1/f3
2458 cancel_lru_locks osc
2460 echo "Inject failure..."
2461 echo "To simulate f0 lost MDT-object"
2462 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2463 do_facet mds1 $LCTL set_param fail_loc=0x1616
2464 rm -f $DIR/$tdir/a1/f0
2466 echo "To simulate f1 lost MDT-object and OST-object0"
2467 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2468 do_facet mds1 $LCTL set_param fail_loc=0x161a
2469 rm -f $DIR/$tdir/a1/f1
2471 echo "To simulate f2 lost MDT-object and OST-object1"
2472 do_facet mds1 $LCTL set_param fail_val=1
2473 rm -f $DIR/$tdir/a1/f2
2475 if [ $OSTCOUNT -gt 2 ]; then
2476 echo "To simulate f3 lost MDT-object and OST-object2"
2477 do_facet mds1 $LCTL set_param fail_val=2
2478 rm -f $DIR/$tdir/a1/f3
2481 umount_client $MOUNT
2484 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2486 echo "Inject failure to slow down the LFSCK on OST0"
2487 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2488 do_facet ost1 $LCTL set_param fail_loc=0x161b
2490 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2491 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2494 do_facet ost1 $LCTL set_param fail_loc=0
2496 for k in $(seq $MDSCOUNT); do
2497 # The LFSCK status query internal is 30 seconds. For the case
2498 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2499 # time to guarantee the status sync up.
2500 wait_update_facet mds${k} "$LCTL get_param -n \
2501 mdd.$(facet_svc mds${k}).lfsck_layout |
2502 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2503 error "(2) MDS${k} is not the expected 'completed'"
2506 for k in $(seq $OSTCOUNT); do
2507 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2508 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2509 awk '/^status/ { print $2 }')
2510 [ "$cur_status" == "completed" ] ||
2511 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2514 local repaired=$(do_facet mds1 $LCTL get_param -n \
2515 mdd.$(facet_svc mds1).lfsck_layout |
2516 awk '/^repaired_orphan/ { print $2 }')
2517 if [ $OSTCOUNT -gt 2 ]; then
2518 [ $repaired -eq 9 ] ||
2519 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2521 [ $repaired -eq 4 ] ||
2522 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2525 mount_client $MOUNT || error "(5.0) Fail to start client!"
2527 LOV_PATTERN_F_HOLE=0x40000000
2530 # ${fid0}-R-0 is the old f0
2532 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2533 echo "Check $name, which is the old f0"
2535 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2537 local pattern=0x$($LFS getstripe -L $name)
2538 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2539 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2541 local stripes=$($LFS getstripe -c $name)
2542 if [ $OSTCOUNT -gt 2 ]; then
2543 [ $stripes -eq 3 ] ||
2544 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2546 [ $stripes -eq 2 ] ||
2547 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2550 local size=$(stat $name | awk '/Size:/ { print $2 }')
2551 [ $size -eq $((4096 * $bcount)) ] ||
2552 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2554 cat $name > /dev/null || error "(5.5) cannot read $name"
2556 echo "dummy" >> $name || error "(5.6) cannot write $name"
2558 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2560 touch $name || error "(5.8) cannot touch $name"
2562 rm -f $name || error "(5.9) cannot unlink $name"
2565 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2567 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2568 if [ $OSTCOUNT -gt 2 ]; then
2569 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2571 echo "Check $name, it contains the old f1's stripe1"
2574 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2576 pattern=0x$($LFS getstripe -L $name)
2577 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2578 error "(6.2) expect pattern flag hole, but got $pattern"
2580 stripes=$($LFS getstripe -c $name)
2581 if [ $OSTCOUNT -gt 2 ]; then
2582 [ $stripes -eq 3 ] ||
2583 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2585 [ $stripes -eq 2 ] ||
2586 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2589 size=$(stat $name | awk '/Size:/ { print $2 }')
2590 [ $size -eq $((4096 * $bcount)) ] ||
2591 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2593 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2595 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2596 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2599 [ $failures -eq 256 ] ||
2600 error "(6.6) expect 256 IO failures, but get $failures"
2602 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2603 [ $size -eq $((4096 * $bcount)) ] ||
2604 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2606 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2607 error "(6.8) write to the LOV EA hole should fail"
2609 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2610 error "(6.9) write to normal stripe should NOT fail"
2612 echo "foo" >> $name && error "(6.10) append write $name should fail"
2614 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2616 touch $name || error "(6.12) cannot touch $name"
2618 rm -f $name || error "(6.13) cannot unlink $name"
2621 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2623 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2624 if [ $OSTCOUNT -gt 2 ]; then
2625 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2627 echo "Check $name, it contains the old f2's stripe0"
2630 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2632 pattern=0x$($LFS getstripe -L $name)
2633 stripes=$($LFS getstripe -c $name)
2634 size=$(stat $name | awk '/Size:/ { print $2 }')
2635 if [ $OSTCOUNT -gt 2 ]; then
2636 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2637 error "(7.2.1) expect pattern flag hole, but got $pattern"
2639 [ $stripes -eq 3 ] ||
2640 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2642 [ $size -eq $((4096 * $bcount)) ] ||
2643 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2645 cat $name > /dev/null &&
2646 error "(7.5.1) normal read $name should fail"
2648 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2649 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2651 [ $failures -eq 256 ] ||
2652 error "(7.6) expect 256 IO failures, but get $failures"
2654 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2655 [ $size -eq $((4096 * $bcount)) ] ||
2656 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2658 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2659 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2661 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2662 error "(7.8.1) write to normal stripe should NOT fail"
2664 echo "foo" >> $name &&
2665 error "(7.8.3) append write $name should fail"
2667 chown $RUNAS_ID:$RUNAS_GID $name ||
2668 error "(7.9.1) cannot chown on $name"
2670 touch $name || error "(7.10.1) cannot touch $name"
2672 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2673 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2675 [ $stripes -eq 1 ] ||
2676 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2679 [ $size -eq $((4096 * (256 + 0))) ] ||
2680 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2682 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2684 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2686 chown $RUNAS_ID:$RUNAS_GID $name ||
2687 error "(7.9.2) cannot chown on $name"
2689 touch $name || error "(7.10.2) cannot touch $name"
2692 rm -f $name || error "(7.11) cannot unlink $name"
2694 [ $OSTCOUNT -le 2 ] && return
2697 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2699 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2700 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2702 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2704 pattern=0x$($LFS getstripe -L $name)
2705 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2706 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2708 stripes=$($LFS getstripe -c $name)
2709 # LFSCK does not know the old f3 had 3 stripes.
2710 # It only tries to find as much as possible.
2711 # The stripe count depends on the last stripe's offset.
2712 [ $stripes -eq 2 ] ||
2713 error "(8.3) expect the stripe count is 2, but got $stripes"
2715 size=$(stat $name | awk '/Size:/ { print $2 }')
2717 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2718 error "(8.4) expect the size $((4096 * 512)), but got $size"
2720 cat $name > /dev/null || error "(8.5) cannot read $name"
2722 echo "dummy" >> $name || error "(8.6) cannot write $name"
2724 chown $RUNAS_ID:$RUNAS_GID $name ||
2725 error "(8.7) cannot chown on $name"
2727 touch $name || error "(8.8) cannot touch $name"
2729 rm -f $name || error "(8.9) cannot unlink $name"
2731 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2734 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2735 skip "ignore the test if MDS is older than 2.5.59" && return
2737 check_mount_and_prep
2738 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2740 echo "Start all LFSCK components by default (-s 1)"
2741 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2742 error "Fail to start LFSCK"
2744 echo "namespace LFSCK should be in 'scanning-phase1' status"
2745 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2746 [ "$STATUS" == "scanning-phase1" ] ||
2747 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2749 echo "layout LFSCK should be in 'scanning-phase1' status"
2750 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2751 [ "$STATUS" == "scanning-phase1" ] ||
2752 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2754 echo "Stop all LFSCK components by default"
2755 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2756 error "Fail to stop LFSCK"
2758 run_test 21 "run all LFSCK components by default"
2761 [ $MDSCOUNT -lt 2 ] &&
2762 skip "We need at least 2 MDSes for this test" && return
2765 echo "The parent_A references the child directory via some name entry,"
2766 echo "but the child directory back references another parent_B via its"
2767 echo "".." name entry. The parent_B does not exist. Then the namesapce"
2768 echo "LFSCK will repair the child directory's ".." name entry."
2771 check_mount_and_prep
2773 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2774 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2776 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2777 echo "The dummy's dotdot name entry references the guard."
2778 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2779 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2780 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2781 error "(3) Fail to mkdir on MDT0"
2782 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2784 rmdir $DIR/$tdir/guard || error "(4) Fail to rmdir $DIR/$tdir/guard"
2786 echo "Trigger namespace LFSCK to repair unmatched pairs"
2787 $START_NAMESPACE -A -r ||
2788 error "(5) Fail to start LFSCK for namespace"
2790 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2791 mdd.${MDT_DEV}.lfsck_namespace |
2792 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2794 error "(6) unexpected status"
2797 local repaired=$($SHOW_NAMESPACE |
2798 awk '/^unmatched_pairs_repaired/ { print $2 }')
2799 [ $repaired -eq 1 ] ||
2800 error "(7) Fail to repair unmatched pairs: $repaired"
2802 echo "'ls' should success after namespace LFSCK repairing"
2803 ls -ail $DIR/$tdir/foo/dummy > /dev/null ||
2804 error "(8) ls should success."
2806 run_test 22a "LFSCK can repair unmatched pairs (1)"
2809 [ $MDSCOUNT -lt 2 ] &&
2810 skip "We need at least 2 MDSes for this test" && return
2813 echo "The parent_A references the child directory via the name entry_B,"
2814 echo "but the child directory back references another parent_C via its"
2815 echo "".." name entry. The parent_C exists, but there is no the name"
2816 echo "entry_B under the parent_C. Then the namesapce LFSCK will repair"
2817 echo "the child directory's ".." name entry and its linkEA."
2820 check_mount_and_prep
2822 $LFS mkdir -i 1 $DIR/$tdir/guard || error "(1) Fail to mkdir on MDT1"
2823 $LFS mkdir -i 1 $DIR/$tdir/foo || error "(2) Fail to mkdir on MDT1"
2825 echo "Inject failure stub on MDT0 to simulate bad dotdot name entry"
2826 echo "and bad linkEA. The dummy's dotdot name entry references the"
2827 echo "guard. The dummy's linkEA references n non-exist name entry."
2828 #define OBD_FAIL_LFSCK_BAD_PARENT 0x161e
2829 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161e
2830 $LFS mkdir -i 0 $DIR/$tdir/foo/dummy ||
2831 error "(3) Fail to mkdir on MDT0"
2832 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2834 local dummyfid=$($LFS path2fid $DIR/$tdir/foo/dummy)
2835 echo "fid2path should NOT work on the dummy's FID $dummyfid"
2836 local dummyname=$($LFS fid2path $DIR $dummyfid)
2837 [ "$dummyname" != "$DIR/$tdir/foo/dummy" ] ||
2838 error "(4) fid2path works unexpectedly."
2840 echo "Trigger namespace LFSCK to repair unmatched pairs"
2841 $START_NAMESPACE -A -r ||
2842 error "(5) Fail to start LFSCK for namespace"
2844 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2845 mdd.${MDT_DEV}.lfsck_namespace |
2846 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2848 error "(6) unexpected status"
2851 local repaired=$($SHOW_NAMESPACE |
2852 awk '/^unmatched_pairs_repaired/ { print $2 }')
2853 [ $repaired -eq 1 ] ||
2854 error "(7) Fail to repair unmatched pairs: $repaired"
2856 echo "fid2path should work on the dummy's FID $dummyfid after LFSCK"
2857 local dummyname=$($LFS fid2path $DIR $dummyfid)
2858 [ "$dummyname" == "$DIR/$tdir/foo/dummy" ] ||
2859 error "(8) fid2path does not work"
2861 run_test 22b "LFSCK can repair unmatched pairs (2)"
2864 [ $MDSCOUNT -lt 2 ] &&
2865 skip "We need at least 2 MDSes for this test" && return
2868 echo "The name entry is there, but the MDT-object for such name "
2869 echo "entry does not exist. The namespace LFSCK should find out "
2870 echo "and repair the inconsistency as required."
2873 check_mount_and_prep
2875 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2876 $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
2878 echo "Inject failure stub on MDT1 to simulate dangling name entry"
2879 #define OBD_FAIL_LFSCK_DANGLING2 0x1620
2880 do_facet mds2 $LCTL set_param fail_loc=0x1620
2881 rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
2882 do_facet mds2 $LCTL set_param fail_loc=0
2884 echo "'ls' should fail because of dangling name entry"
2885 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
2887 echo "Trigger namespace LFSCK to find out dangling name entry"
2888 $START_NAMESPACE -A -r ||
2889 error "(5) Fail to start LFSCK for namespace"
2891 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2892 mdd.${MDT_DEV}.lfsck_namespace |
2893 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2895 error "(6) unexpected status"
2898 local repaired=$($SHOW_NAMESPACE |
2899 awk '/^dangling_repaired/ { print $2 }')
2900 [ $repaired -eq 1 ] ||
2901 error "(7) Fail to repair dangling name entry: $repaired"
2903 echo "'ls' should fail because not re-create MDT-object by default"
2904 ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
2906 echo "Trigger namespace LFSCK again to repair dangling name entry"
2907 $START_NAMESPACE -A -r -C ||
2908 error "(9) Fail to start LFSCK for namespace"
2910 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2911 mdd.${MDT_DEV}.lfsck_namespace |
2912 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2914 error "(10) unexpected status"
2917 repaired=$($SHOW_NAMESPACE |
2918 awk '/^dangling_repaired/ { print $2 }')
2919 [ $repaired -eq 1 ] ||
2920 error "(11) Fail to repair dangling name entry: $repaired"
2922 echo "'ls' should success after namespace LFSCK repairing"
2923 ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
2925 run_test 23a "LFSCK can repair dangling name entry (1)"
2929 echo "The objectA has multiple hard links, one of them corresponding"
2930 echo "to the name entry_B. But there is something wrong for the name"
2931 echo "entry_B and cause entry_B to references non-exist object_C."
2932 echo "In the first-stage scanning, the LFSCK will think the entry_B"
2933 echo "as dangling, and re-create the lost object_C. When the LFSCK"
2934 echo "comes to the second-stage scanning, it will find that the"
2935 echo "former re-creating object_C is not proper, and will try to"
2936 echo "replace the object_C with the real object_A."
2939 check_mount_and_prep
2941 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
2942 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
2943 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
2945 echo "Inject failure stub on MDT0 to simulate dangling name entry"
2946 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
2947 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
2948 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
2949 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
2951 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
2953 echo "'ls' should fail because of dangling name entry"
2954 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
2955 error "(6) ls should fail."
2957 echo "Trigger namespace LFSCK to find out dangling name entry"
2958 $START_NAMESPACE -r -C ||
2959 error "(7) Fail to start LFSCK for namespace"
2961 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
2962 mdd.${MDT_DEV}.lfsck_namespace |
2963 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
2965 error "(8) unexpected status"
2968 local repaired=$($SHOW_NAMESPACE |
2969 awk '/^dangling_repaired/ { print $2 }')
2970 [ $repaired -eq 1 ] ||
2971 error "(9) Fail to repair dangling name entry: $repaired"
2973 repaired=$($SHOW_NAMESPACE |
2974 awk '/^multiple_linked_repaired/ { print $2 }')
2975 [ $repaired -eq 1 ] ||
2976 error "(10) Fail to drop the former created object: $repaired"
2978 local data=$(cat $DIR/$tdir/d0/foo)
2979 [ "$data" == "dummy" ] ||
2980 error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
2982 run_test 23b "LFSCK can repair dangling name entry (2)"
2986 echo "The objectA has multiple hard links, one of them corresponding"
2987 echo "to the name entry_B. But there is something wrong for the name"
2988 echo "entry_B and cause entry_B to references non-exist object_C."
2989 echo "In the first-stage scanning, the LFSCK will think the entry_B"
2990 echo "as dangling, and re-create the lost object_C. And then others"
2991 echo "modified the re-created object_C. When the LFSCK comes to the"
2992 echo "second-stage scanning, it will find that the former re-creating"
2993 echo "object_C maybe wrong and try to replace the object_C with the"
2994 echo "real object_A. But because object_C has been modified, so the"
2995 echo "LFSCK cannot replace it."
2998 check_mount_and_prep
3000 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
3001 echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
3002 echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
3004 echo "Inject failure stub on MDT0 to simulate dangling name entry"
3005 #define OBD_FAIL_LFSCK_DANGLING3 0x1621
3006 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
3007 ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
3008 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3010 rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
3012 echo "'ls' should fail because of dangling name entry"
3013 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
3014 error "(6) ls should fail."
3016 #define OBD_FAIL_LFSCK_DELAY3 0x1602
3017 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
3019 echo "Trigger namespace LFSCK to find out dangling name entry"
3020 $START_NAMESPACE -r -C ||
3021 error "(7) Fail to start LFSCK for namespace"
3023 wait_update_facet client "stat $DIR/$tdir/d0/foo |
3024 awk '/Size/ { print \\\$2 }'" "0" 32 || {
3025 stat $DIR/$tdir/guard
3027 error "(8) unexpected size"
3030 echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
3031 cancel_lru_locks osc
3033 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
3034 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3035 mdd.${MDT_DEV}.lfsck_namespace |
3036 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3038 error "(10) unexpected status"
3041 local repaired=$($SHOW_NAMESPACE |
3042 awk '/^dangling_repaired/ { print $2 }')
3043 [ $repaired -eq 1 ] ||
3044 error "(11) Fail to repair dangling name entry: $repaired"
3046 local data=$(cat $DIR/$tdir/d0/foo)
3047 [ "$data" != "dummy" ] ||
3048 error "(12) The $DIR/$tdir/d0/foo should not be recovered"
3050 run_test 23c "LFSCK can repair dangling name entry (3)"
3053 [ $MDSCOUNT -lt 2 ] &&
3054 skip "We need at least 2 MDSes for this test" && return
3057 echo "Two MDT-objects back reference the same name entry via their"
3058 echo "each own linkEA entry, but the name entry only references one"
3059 echo "MDT-object. The namespace LFSCK will remove the linkEA entry"
3060 echo "for the MDT-object that is not recognized. If such MDT-object"
3061 echo "has no other linkEA entry after the removing, then the LFSCK"
3062 echo "will add it as orphan under the .lustre/lost+found/MDTxxxx/."
3065 check_mount_and_prep
3067 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3069 mkdir $DIR/$tdir/d0/guard || error "(1) Fail to mkdir guard"
3070 $LFS path2fid $DIR/$tdir/d0/guard
3072 mkdir $DIR/$tdir/d0/dummy || error "(2) Fail to mkdir dummy"
3073 $LFS path2fid $DIR/$tdir/d0/dummy
3076 if [ $(facet_fstype $SINGLEMDS) != ldiskfs ]; then
3077 pfid=$($LFS path2fid $DIR/$tdir/d0/guard)
3079 pfid=$($LFS path2fid $DIR/$tdir/d0/dummy)
3082 touch $DIR/$tdir/d0/guard/foo ||
3083 error "(3) Fail to touch $DIR/$tdir/d0/guard/foo"
3085 echo "Inject failure stub on MDT0 to simulate the case that"
3086 echo "the $DIR/$tdir/d0/dummy/foo has the 'bad' linkEA entry"
3087 echo "that references $DIR/$tdir/d0/guard/foo."
3088 echo "Then remove the name entry $DIR/$tdir/d0/dummy/foo."
3089 echo "So the MDT-object $DIR/$tdir/d0/dummy/foo will be left"
3090 echo "there with the same linkEA entry as another MDT-object"
3091 echo "$DIR/$tdir/d0/guard/foo has"
3093 #define OBD_FAIL_LFSCK_MUL_REF 0x1622
3094 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1622
3095 $LFS mkdir -i 0 $DIR/$tdir/d0/dummy/foo ||
3096 error "(4) Fail to mkdir $DIR/$tdir/d0/dummy/foo"
3097 $LFS path2fid $DIR/$tdir/d0/dummy/foo
3098 local cfid=$($LFS path2fid $DIR/$tdir/d0/dummy/foo)
3099 rmdir $DIR/$tdir/d0/dummy/foo ||
3100 error "(5) Fail to remove $DIR/$tdir/d0/dummy/foo name entry"
3101 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3103 echo "stat $DIR/$tdir/d0/dummy/foo should fail"
3104 stat $DIR/$tdir/d0/dummy/foo > /dev/null 2>&1 &&
3105 error "(6) stat successfully unexpectedly"
3107 echo "Trigger namespace LFSCK to repair multiple-referenced name entry"
3108 $START_NAMESPACE -A -r ||
3109 error "(7) Fail to start LFSCK for namespace"
3111 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3112 mdd.${MDT_DEV}.lfsck_namespace |
3113 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3115 error "(8) unexpected status"
3118 local repaired=$($SHOW_NAMESPACE |
3119 awk '/^multiple_referenced_repaired/ { print $2 }')
3120 [ $repaired -eq 1 ] ||
3121 error "(9) Fail to repair multiple referenced name entry: $repaired"
3123 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3124 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3125 error "(10) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3127 local cname="$cfid-$pfid-D-0"
3128 ls -ail $MOUNT/.lustre/lost+found/MDT0000/$cname ||
3129 error "(11) .lustre/lost+found/MDT0000/ should not be empty"
3131 run_test 24 "LFSCK can repair multiple-referenced name entry"
3134 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3135 skip "Only support to inject failure on ldiskfs" && return
3138 echo "The file type in the name entry does not match the file type"
3139 echo "claimed by the referenced object. Then the LFSCK will update"
3140 echo "the file type in the name entry."
3143 check_mount_and_prep
3145 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3147 echo "Inject failure stub on MDT0 to simulate the case that"
3148 echo "the file type stored in the name entry is wrong."
3150 #define OBD_FAIL_LFSCK_BAD_TYPE 0x1623
3151 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1623
3152 touch $DIR/$tdir/d0/foo || error "(2) Fail to touch $DIR/$tdir/d0/foo"
3153 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3155 echo "Trigger namespace LFSCK to repair bad file type in the name entry"
3156 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace"
3158 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3159 mdd.${MDT_DEV}.lfsck_namespace |
3160 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3162 error "(4) unexpected status"
3165 local repaired=$($SHOW_NAMESPACE |
3166 awk '/^bad_file_type_repaired/ { print $2 }')
3167 [ $repaired -eq 1 ] ||
3168 error "(5) Fail to repair bad file type in name entry: $repaired"
3170 ls -ail $DIR/$tdir/d0 || error "(6) Fail to 'ls' the $DIR/$tdir/d0"
3172 run_test 25 "LFSCK can repair bad file type in the name entry"
3176 echo "The local name entry back referenced by the MDT-object is lost."
3177 echo "The namespace LFSCK will add the missing local name entry back"
3178 echo "to the normal namespace."
3181 check_mount_and_prep
3183 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3184 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3185 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3187 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3188 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3190 echo "Inject failure stub on MDT0 to simulate the case that"
3191 echo "foo's name entry will be removed, but the foo's object"
3192 echo "and its linkEA are kept in the system."
3194 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3195 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3196 rm -f $DIR/$tdir/d0/foo || error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3197 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3199 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(5) 'ls' should fail"
3201 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3202 $START_NAMESPACE -r -A ||
3203 error "(6) Fail to start LFSCK for namespace"
3205 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3206 mdd.${MDT_DEV}.lfsck_namespace |
3207 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3209 error "(7) unexpected status"
3212 local repaired=$($SHOW_NAMESPACE |
3213 awk '/^lost_dirent_repaired/ { print $2 }')
3214 [ $repaired -eq 1 ] ||
3215 error "(8) Fail to repair lost dirent: $repaired"
3217 ls -ail $DIR/$tdir/d0/foo ||
3218 error "(9) Fail to 'ls' $DIR/$tdir/d0/foo"
3220 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3221 [ "$foofid" == "$foofid2" ] ||
3222 error "(10) foo's FID changed: $foofid, $foofid2"
3224 run_test 26a "LFSCK can add the missing local name entry back to the namespace"
3227 [ $MDSCOUNT -lt 2 ] &&
3228 skip "We need at least 2 MDSes for this test" && return
3231 echo "The remote name entry back referenced by the MDT-object is lost."
3232 echo "The namespace LFSCK will add the missing remote name entry back"
3233 echo "to the normal namespace."
3236 check_mount_and_prep
3238 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3239 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3240 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3242 echo "Inject failure stub on MDT0 to simulate the case that"
3243 echo "foo's name entry will be removed, but the foo's object"
3244 echo "and its linkEA are kept in the system."
3246 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3247 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3248 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3249 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3251 ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 && "(4) 'ls' should fail"
3253 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3254 $START_NAMESPACE -r -A ||
3255 error "(5) Fail to start LFSCK for namespace"
3257 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3258 mdd.${MDT_DEV}.lfsck_namespace |
3259 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3261 error "(6) unexpected status"
3264 local repaired=$($SHOW_NAMESPACE |
3265 awk '/^lost_dirent_repaired/ { print $2 }')
3266 [ $repaired -eq 1 ] ||
3267 error "(7) Fail to repair lost dirent: $repaired"
3269 ls -ail $DIR/$tdir/d0/foo ||
3270 error "(8) Fail to 'ls' $DIR/$tdir/d0/foo"
3272 local foofid2=$($LFS path2fid $DIR/$tdir/d0/foo)
3273 [ "$foofid" == "$foofid2" ] ||
3274 error "(9) foo's FID changed: $foofid, $foofid2"
3276 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
3280 echo "The local parent referenced by the MDT-object linkEA is lost."
3281 echo "The namespace LFSCK will re-create the lost parent as orphan."
3284 check_mount_and_prep
3286 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3287 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3288 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
3289 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3291 echo "Inject failure stub on MDT0 to simulate the case that"
3292 echo "foo's name entry will be removed, but the foo's object"
3293 echo "and its linkEA are kept in the system. And then remove"
3294 echo "another hard link and the parent directory."
3296 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3297 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3298 rm -f $DIR/$tdir/d0/foo ||
3299 error "(4) Fail to unlink $DIR/$tdir/d0/foo"
3300 rm -f $DIR/$tdir/d0/dummy ||
3301 error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
3302 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3304 rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
3305 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail"
3307 echo "Trigger namespace LFSCK to repair the lost parent"
3308 $START_NAMESPACE -r -A ||
3309 error "(6) Fail to start LFSCK for namespace"
3311 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3312 mdd.${MDT_DEV}.lfsck_namespace |
3313 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3315 error "(7) unexpected status"
3318 local repaired=$($SHOW_NAMESPACE |
3319 awk '/^lost_dirent_repaired/ { print $2 }')
3320 [ $repaired -eq 1 ] ||
3321 error "(8) Fail to repair lost dirent: $repaired"
3323 echo "There should be an orphan under .lustre/lost+found/MDT0000/"
3324 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3325 error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3327 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3329 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
3330 [ ! -z "$cname" ] ||
3331 error "(10) .lustre/lost+found/MDT0000/ should not be empty"
3333 run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
3336 [ $MDSCOUNT -lt 2 ] &&
3337 skip "We need at least 2 MDSes for this test" && return
3340 echo "The remote parent referenced by the MDT-object linkEA is lost."
3341 echo "The namespace LFSCK will re-create the lost parent as orphan."
3344 check_mount_and_prep
3346 $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3347 $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
3349 $LFS path2fid $DIR/$tdir/d0
3351 echo "Inject failure stub on MDT0 to simulate the case that"
3352 echo "foo's name entry will be removed, but the foo's object"
3353 echo "and its linkEA are kept in the system. And then remove"
3354 echo "the parent directory."
3356 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3357 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3358 rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
3359 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3361 rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
3362 ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail"
3364 echo "Trigger namespace LFSCK to repair the missing remote name entry"
3365 $START_NAMESPACE -r -A ||
3366 error "(6) Fail to start LFSCK for namespace"
3368 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3369 mdd.${MDT_DEV}.lfsck_namespace |
3370 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3372 error "(7) unexpected status"
3375 local repaired=$($SHOW_NAMESPACE |
3376 awk '/^lost_dirent_repaired/ { print $2 }')
3377 [ $repaired -eq 1 ] ||
3378 error "(8) Fail to repair lost dirent: $repaired"
3380 ls -ail $MOUNT/.lustre/lost+found/
3382 echo "There should be an orphan under .lustre/lost+found/MDT0001/"
3383 [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
3384 error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
3386 ls -ail $MOUNT/.lustre/lost+found/MDT0001/
3388 cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
3389 [ ! -z "$cname" ] ||
3390 error "(10) .lustre/lost+found/MDT0001/ should not be empty"
3392 run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
3395 [ $MDSCOUNT -lt 2 ] &&
3396 skip "The test needs at least 2 MDTs" && return
3399 echo "The target name entry is lost. The LFSCK should insert the"
3400 echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if"
3401 echo "the MDT (on which the orphan MDT-object resides) has ever"
3402 echo "failed to respond some name entry verification during the"
3403 echo "first stage-scanning, then the LFSCK should skip to handle"
3404 echo "orphan MDT-object on this MDT. But other MDTs should not"
3408 check_mount_and_prep
3409 $LFS mkdir -i 0 $DIR/$tdir/d1
3410 $LFS mkdir -i 1 $DIR/$tdir/d1/a1
3411 $LFS mkdir -i 1 $DIR/$tdir/d1/a2
3413 $LFS mkdir -i 1 $DIR/$tdir/d2
3414 $LFS mkdir -i 0 $DIR/$tdir/d2/a1
3415 $LFS mkdir -i 0 $DIR/$tdir/d2/a2
3417 echo "Inject failure stub on MDT0 to simulate the case that"
3418 echo "d1/a1's name entry will be removed, but the d1/a1's object"
3419 echo "and its linkEA are kept in the system. And the case that"
3420 echo "d2/a2's name entry will be removed, but the d2/a2's object"
3421 echo "and its linkEA are kept in the system."
3423 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3424 do_facet mds1 $LCTL set_param fail_loc=0x1624
3425 do_facet mds2 $LCTL set_param fail_loc=0x1624
3426 rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1"
3427 rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2"
3428 do_facet mds1 $LCTL set_param fail_loc=0
3429 do_facet mds2 $LCTL set_param fail_loc=0
3431 cancel_lru_locks mdc
3432 cancel_lru_locks osc
3434 echo "Inject failure, to simulate the MDT0 fail to handle"
3435 echo "MDT1 LFSCK request during the first-stage scanning."
3436 #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c
3437 do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0
3439 echo "Trigger namespace LFSCK on all devices to find out orphan object"
3440 $START_NAMESPACE -r -A ||
3441 error "(3) Fail to start LFSCK for namespace"
3443 wait_update_facet mds1 "$LCTL get_param -n \
3444 mdd.$(facet_svc mds1).lfsck_namespace |
3445 awk '/^status/ { print \\\$2 }'" "partial" 32 || {
3446 error "(4) mds1 is not the expected 'partial'"
3449 wait_update_facet mds2 "$LCTL get_param -n \
3450 mdd.$(facet_svc mds2).lfsck_namespace |
3451 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3452 error "(5) mds2 is not the expected 'completed'"
3455 do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
3457 local repaired=$(do_facet mds1 $LCTL get_param -n \
3458 mdd.$(facet_svc mds1).lfsck_namespace |
3459 awk '/^lost_dirent_repaired/ { print $2 }')
3460 [ $repaired -eq 0 ] ||
3461 error "(6) Expect 0 fixed on mds1, but got: $repaired"
3463 repaired=$(do_facet mds2 $LCTL get_param -n \
3464 mdd.$(facet_svc mds2).lfsck_namespace |
3465 awk '/^lost_dirent_repaired/ { print $2 }')
3466 [ $repaired -eq 1 ] ||
3467 error "(7) Expect 1 fixed on mds2, but got: $repaired"
3469 echo "Trigger namespace LFSCK on all devices again to cleanup"
3470 $START_NAMESPACE -r -A ||
3471 error "(8) Fail to start LFSCK for namespace"
3473 for k in $(seq $MDSCOUNT); do
3474 # The LFSCK status query internal is 30 seconds. For the case
3475 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
3476 # time to guarantee the status sync up.
3477 wait_update_facet mds${k} "$LCTL get_param -n \
3478 mdd.$(facet_svc mds${k}).lfsck_namespace |
3479 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3480 error "(9) MDS${k} is not the expected 'completed'"
3483 local repaired=$(do_facet mds1 $LCTL get_param -n \
3484 mdd.$(facet_svc mds1).lfsck_namespace |
3485 awk '/^lost_dirent_repaired/ { print $2 }')
3486 [ $repaired -eq 1 ] ||
3487 error "(10) Expect 1 fixed on mds1, but got: $repaired"
3489 repaired=$(do_facet mds2 $LCTL get_param -n \
3490 mdd.$(facet_svc mds2).lfsck_namespace |
3491 awk '/^lost_dirent_repaired/ { print $2 }')
3492 [ $repaired -eq 0 ] ||
3493 error "(11) Expect 0 fixed on mds2, but got: $repaired"
3495 run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects"
3499 echo "The object's nlink attribute is larger than the object's known"
3500 echo "name entries count. The LFSCK will repair the object's nlink"
3501 echo "attribute to match the known name entries count"
3504 check_mount_and_prep
3506 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3507 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3509 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3510 echo "nlink attribute is larger than its name entries count."
3512 #define OBD_FAIL_LFSCK_MORE_NLINK 0x1625
3513 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1625
3514 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3515 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3516 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3518 cancel_lru_locks mdc
3519 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3520 [ $count -eq 3 ] || error "(4) Cannot inject error: $count"
3522 echo "Trigger namespace LFSCK to repair the nlink count"
3523 $START_NAMESPACE -r -A ||
3524 error "(5) Fail to start LFSCK for namespace"
3526 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3527 mdd.${MDT_DEV}.lfsck_namespace |
3528 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3530 error "(6) unexpected status"
3533 local repaired=$($SHOW_NAMESPACE |
3534 awk '/^nlinks_repaired/ { print $2 }')
3535 [ $repaired -eq 1 ] ||
3536 error "(7) Fail to repair nlink count: $repaired"
3538 cancel_lru_locks mdc
3539 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3540 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3542 run_test 29a "LFSCK can repair bad nlink count (1)"
3546 echo "The object's nlink attribute is smaller than the object's known"
3547 echo "name entries count. The LFSCK will repair the object's nlink"
3548 echo "attribute to match the known name entries count"
3551 check_mount_and_prep
3553 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3554 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3556 echo "Inject failure stub on MDT0 to simulate the case that foo's"
3557 echo "nlink attribute is smaller than its name entries count."
3559 #define OBD_FAIL_LFSCK_LESS_NLINK 0x1626
3560 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1626
3561 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3562 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3563 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3565 cancel_lru_locks mdc
3566 local count=$(stat --format=%h $DIR/$tdir/d0/foo)
3567 [ $count -eq 1 ] || error "(4) Cannot inject error: $count"
3569 echo "Trigger namespace LFSCK to repair the nlink count"
3570 $START_NAMESPACE -r -A ||
3571 error "(5) Fail to start LFSCK for namespace"
3573 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3574 mdd.${MDT_DEV}.lfsck_namespace |
3575 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3577 error "(6) unexpected status"
3580 local repaired=$($SHOW_NAMESPACE |
3581 awk '/^nlinks_repaired/ { print $2 }')
3582 [ $repaired -eq 1 ] ||
3583 error "(7) Fail to repair nlink count: $repaired"
3585 cancel_lru_locks mdc
3586 count=$(stat --format=%h $DIR/$tdir/d0/foo)
3587 [ $count -eq 2 ] || error "(8) Fail to repair nlink count: $count"
3589 run_test 29b "LFSCK can repair bad nlink count (2)"
3593 echo "There are too many hard links to the object, and exceeds the"
3594 echo "object's linkEA limitation, as to NOT all the known name entries"
3595 echo "will be recorded in the linkEA. Under such case, LFSCK should"
3596 echo "skip the nlink verification for this object."
3599 check_mount_and_prep
3601 $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
3602 touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
3603 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h1 ||
3604 error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
3606 echo "Inject failure stub on MDT0 to simulate the case that"
3607 echo "foo's hard links exceed the object's linkEA limitation."
3609 #define OBD_FAIL_LFSCK_LINKEA_OVERFLOW 0x1627
3610 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1627
3611 ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h2 ||
3612 error "(4) Fail to hard link to $DIR/$tdir/d0/foo"
3614 cancel_lru_locks mdc
3616 local count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3617 [ $count1 -eq 3 ] || error "(5) Stat failure: $count1"
3619 local foofid=$($LFS path2fid $DIR/$tdir/d0/foo)
3620 $LFS fid2path $DIR $foofid
3621 local count2=$($LFS fid2path $DIR $foofid | wc -l)
3622 [ $count2 -eq 2 ] || "(6) Fail to inject error: $count2"
3624 echo "Trigger namespace LFSCK to repair the nlink count"
3625 $START_NAMESPACE -r -A ||
3626 error "(7) Fail to start LFSCK for namespace"
3628 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3629 mdd.${MDT_DEV}.lfsck_namespace |
3630 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3632 error "(8) unexpected status"
3635 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3636 local repaired=$($SHOW_NAMESPACE |
3637 awk '/^nlinks_repaired/ { print $2 }')
3638 [ $repaired -eq 0 ] ||
3639 error "(9) Repair nlink count unexpcetedly: $repaired"
3641 cancel_lru_locks mdc
3643 count1=$(stat --format=%h $DIR/$tdir/d0/foo)
3644 [ $count1 -eq 3 ] || error "(10) Stat failure: $count1"
3646 count2=$($LFS fid2path $DIR $foofid | wc -l)
3647 [ $count2 -eq 2 ] ||
3648 error "(11) Repaired something unexpectedly: $count2"
3650 run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
3653 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
3654 skip "Only support backend /lost+found for ldiskfs" && return
3657 echo "The namespace LFSCK will move the orphans from backend"
3658 echo "/lost+found directory to normal client visible namespace"
3659 echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
3662 check_mount_and_prep
3664 $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
3665 touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
3667 echo "Inject failure stub on MDT0 to simulate the case that"
3668 echo "directory d0 has no linkEA entry, then the LFSCK will"
3669 echo "move it into .lustre/lost+found/MDTxxxx/ later."
3671 #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d
3672 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
3673 mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
3674 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3676 touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
3677 mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
3679 echo "Inject failure stub on MDT0 to simulate the case that the"
3680 echo "object's name entry will be removed, but not destroy the"
3681 echo "object. Then backend e2fsck will handle it as orphan and"
3682 echo "add them into the backend /lost+found directory."
3684 #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624
3685 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
3686 rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
3687 rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
3688 rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
3689 rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
3690 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3692 umount_client $MOUNT || error "(10) Fail to stop client!"
3694 stop $SINGLEMDS || error "(11) Fail to stop MDT0"
3697 run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
3698 error "(12) Fail to run e2fsck"
3700 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
3701 error "(13) Fail to start MDT0"
3703 echo "Trigger namespace LFSCK to recover backend orphans"
3704 $START_NAMESPACE -r -A ||
3705 error "(14) Fail to start LFSCK for namespace"
3707 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3708 mdd.${MDT_DEV}.lfsck_namespace |
3709 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3711 error "(15) unexpected status"
3714 local repaired=$($SHOW_NAMESPACE |
3715 awk '/^local_lost_found_moved/ { print $2 }')
3716 [ $repaired -ge 4 ] ||
3717 error "(16) Fail to recover backend orphans: $repaired"
3719 mount_client $MOUNT || error "(17) Fail to start client!"
3721 stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered"
3723 ls -ail $MOUNT/.lustre/lost+found/
3725 echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
3726 [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
3727 error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
3729 ls -ail $MOUNT/.lustre/lost+found/MDT0000/
3731 cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-*-D-*)
3732 [ ! -z "$cname" ] || error "(20) d0 is not recovered"
3734 stat ${cname}/d1 || error "(21) d0 is not recovered"
3735 stat ${cname}/f1 || error "(22) f1 is not recovered"
3737 run_test 30 "LFSCK can recover the orphans from backend /lost+found"
3740 [ $MDSCOUNT -lt 2 ] &&
3741 skip "The test needs at least 2 MDTs" && return
3744 echo "For the name entry under a striped directory, if the name"
3745 echo "hash does not match the shard, then the LFSCK will repair"
3746 echo "the bad name entry"
3749 check_mount_and_prep
3751 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3752 error "(1) Fail to create striped directory"
3754 echo "Inject failure stub on client to simulate the case that"
3755 echo "some name entry should be inserted into other non-first"
3756 echo "shard, but inserted into the first shard by wrong"
3758 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3759 $LCTL set_param fail_loc=0x1628 fail_val=0
3760 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3761 error "(2) Fail to create file under striped directory"
3762 $LCTL set_param fail_loc=0 fail_val=0
3764 echo "Trigger namespace LFSCK to repair bad name hash"
3765 $START_NAMESPACE -r -A ||
3766 error "(3) Fail to start LFSCK for namespace"
3768 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3769 mdd.${MDT_DEV}.lfsck_namespace |
3770 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3772 error "(4) unexpected status"
3775 local repaired=$($SHOW_NAMESPACE |
3776 awk '/^name_hash_repaired/ { print $2 }')
3777 [ $repaired -ge 1 ] ||
3778 error "(5) Fail to repair bad name hash: $repaired"
3780 umount_client $MOUNT || error "(6) umount failed"
3781 mount_client $MOUNT || error "(7) mount failed"
3783 for ((i = 0; i < $MDSCOUNT; i++)); do
3784 stat $DIR/$tdir/striped_dir/d$i ||
3785 error "(8) Fail to stat d$i after LFSCK"
3786 rmdir $DIR/$tdir/striped_dir/d$i ||
3787 error "(9) Fail to unlink d$i after LFSCK"
3790 rmdir $DIR/$tdir/striped_dir ||
3791 error "(10) Fail to remove the striped directory after LFSCK"
3793 run_test 31a "The LFSCK can find/repair the name entry with bad name hash (1)"
3796 [ $MDSCOUNT -lt 2 ] &&
3797 skip "The test needs at least 2 MDTs" && return
3800 echo "For the name entry under a striped directory, if the name"
3801 echo "hash does not match the shard, then the LFSCK will repair"
3802 echo "the bad name entry"
3805 check_mount_and_prep
3807 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3808 error "(1) Fail to create striped directory"
3810 echo "Inject failure stub on client to simulate the case that"
3811 echo "some name entry should be inserted into other non-second"
3812 echo "shard, but inserted into the secod shard by wrong"
3814 #define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
3815 $LCTL set_param fail_loc=0x1628 fail_val=1
3816 createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
3817 error "(2) Fail to create file under striped directory"
3818 $LCTL set_param fail_loc=0 fail_val=0
3820 echo "Trigger namespace LFSCK to repair bad name hash"
3821 $START_NAMESPACE -r -A ||
3822 error "(3) Fail to start LFSCK for namespace"
3824 wait_update_facet mds2 "$LCTL get_param -n \
3825 mdd.$(facet_svc mds2).lfsck_namespace |
3826 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
3827 error "(4) unexpected status"
3829 local repaired=$(do_facet mds2 $LCTL get_param -n \
3830 mdd.$(facet_svc mds2).lfsck_namespace |
3831 awk '/^name_hash_repaired/ { print $2 }')
3832 [ $repaired -ge 1 ] ||
3833 error "(5) Fail to repair bad name hash: $repaired"
3835 umount_client $MOUNT || error "(6) umount failed"
3836 mount_client $MOUNT || error "(7) mount failed"
3838 for ((i = 0; i < $MDSCOUNT; i++)); do
3839 stat $DIR/$tdir/striped_dir/d$i ||
3840 error "(8) Fail to stat d$i after LFSCK"
3841 rmdir $DIR/$tdir/striped_dir/d$i ||
3842 error "(9) Fail to unlink d$i after LFSCK"
3845 rmdir $DIR/$tdir/striped_dir ||
3846 error "(10) Fail to remove the striped directory after LFSCK"
3848 run_test 31b "The LFSCK can find/repair the name entry with bad name hash (2)"
3851 [ $MDSCOUNT -lt 2 ] &&
3852 skip "The test needs at least 2 MDTs" && return
3855 echo "For some reason, the master MDT-object of the striped directory"
3856 echo "may lost its master LMV EA. If nobody created files under the"
3857 echo "master directly after the master LMV EA lost, then the LFSCK"
3858 echo "should re-generate the master LMV EA."
3861 check_mount_and_prep
3863 echo "Inject failure stub on MDT0 to simulate the case that the"
3864 echo "master MDT-object of the striped directory lost the LMV EA."
3866 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3867 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3868 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3869 error "(1) Fail to create striped directory"
3870 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
3872 echo "Trigger namespace LFSCK to re-generate master LMV EA"
3873 $START_NAMESPACE -r -A ||
3874 error "(2) Fail to start LFSCK for namespace"
3876 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3877 mdd.${MDT_DEV}.lfsck_namespace |
3878 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3880 error "(3) unexpected status"
3883 local repaired=$($SHOW_NAMESPACE |
3884 awk '/^striped_dirs_repaired/ { print $2 }')
3885 [ $repaired -eq 1 ] ||
3886 error "(4) Fail to re-generate master LMV EA: $repaired"
3888 umount_client $MOUNT || error "(5) umount failed"
3889 mount_client $MOUNT || error "(6) mount failed"
3891 local empty=$(ls $DIR/$tdir/striped_dir/)
3892 [ -z "$empty" ] || error "(7) The master LMV EA is not repaired: $empty"
3894 rmdir $DIR/$tdir/striped_dir ||
3895 error "(8) Fail to remove the striped directory after LFSCK"
3897 run_test 31c "Re-generate the lost master LMV EA for striped directory"
3900 [ $MDSCOUNT -lt 2 ] &&
3901 skip "The test needs at least 2 MDTs" && return
3904 echo "For some reason, the master MDT-object of the striped directory"
3905 echo "may lost its master LMV EA. If somebody created files under the"
3906 echo "master directly after the master LMV EA lost, then the LFSCK"
3907 echo "should NOT re-generate the master LMV EA, instead, it should"
3908 echo "change the broken striped dirctory as read-only to prevent"
3909 echo "further damage"
3912 check_mount_and_prep
3914 echo "Inject failure stub on MDT0 to simulate the case that the"
3915 echo "master MDT-object of the striped directory lost the LMV EA."
3917 #define OBD_FAIL_LFSCK_LOST_MASTER_LMV 0x1629
3918 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1629
3919 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3920 error "(1) Fail to create striped directory"
3921 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0
3923 umount_client $MOUNT || error "(2) umount failed"
3924 mount_client $MOUNT || error "(3) mount failed"
3926 touch $DIR/$tdir/striped_dir/dummy ||
3927 error "(4) Fail to touch under broken striped directory"
3929 echo "Trigger namespace LFSCK to find out the inconsistency"
3930 $START_NAMESPACE -r -A ||
3931 error "(5) Fail to start LFSCK for namespace"
3933 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3934 mdd.${MDT_DEV}.lfsck_namespace |
3935 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3937 error "(6) unexpected status"
3940 local repaired=$($SHOW_NAMESPACE |
3941 awk '/^striped_dirs_repaired/ { print $2 }')
3942 [ $repaired -eq 0 ] ||
3943 error "(7) Re-generate master LMV EA unexpected: $repaired"
3945 stat $DIR/$tdir/striped_dir/dummy ||
3946 error "(8) Fail to stat $DIR/$tdir/striped_dir/dummy"
3948 touch $DIR/$tdir/striped_dir/foo &&
3949 error "(9) The broken striped directory should be read-only"
3951 chattr -i $DIR/$tdir/striped_dir ||
3952 error "(10) Fail to chattr on the broken striped directory"
3954 rmdir $DIR/$tdir/striped_dir ||
3955 error "(11) Fail to remove the striped directory after LFSCK"
3957 run_test 31d "Set broken striped directory (modified after broken) as read-only"
3960 [ $MDSCOUNT -lt 2 ] &&
3961 skip "The test needs at least 2 MDTs" && return
3964 echo "For some reason, the slave MDT-object of the striped directory"
3965 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
3966 echo "slave LMV EA."
3969 check_mount_and_prep
3971 echo "Inject failure stub on MDT0 to simulate the case that the"
3972 echo "slave MDT-object (that resides on the same MDT as the master"
3973 echo "MDT-object resides on) lost the LMV EA."
3975 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
3976 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=0
3977 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
3978 error "(1) Fail to create striped directory"
3979 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
3981 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
3982 $START_NAMESPACE -r -A ||
3983 error "(2) Fail to start LFSCK for namespace"
3985 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
3986 mdd.${MDT_DEV}.lfsck_namespace |
3987 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
3989 error "(3) unexpected status"
3992 local repaired=$($SHOW_NAMESPACE |
3993 awk '/^striped_shards_repaired/ { print $2 }')
3994 [ $repaired -eq 1 ] ||
3995 error "(4) Fail to re-generate slave LMV EA: $repaired"
3997 rmdir $DIR/$tdir/striped_dir ||
3998 error "(5) Fail to remove the striped directory after LFSCK"
4000 run_test 31e "Re-generate the lost slave LMV EA for striped directory (1)"
4003 [ $MDSCOUNT -lt 2 ] &&
4004 skip "The test needs at least 2 MDTs" && return
4007 echo "For some reason, the slave MDT-object of the striped directory"
4008 echo "may lost its slave LMV EA. The LFSCK should re-generate the"
4009 echo "slave LMV EA."
4012 check_mount_and_prep
4014 echo "Inject failure stub on MDT0 to simulate the case that the"
4015 echo "slave MDT-object (that resides on differnt MDT as the master"
4016 echo "MDT-object resides on) lost the LMV EA."
4018 #define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
4019 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162a fail_val=1
4020 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4021 error "(1) Fail to create striped directory"
4022 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4024 echo "Trigger namespace LFSCK to re-generate slave LMV EA"
4025 $START_NAMESPACE -r -A ||
4026 error "(2) Fail to start LFSCK for namespace"
4028 wait_update_facet mds2 "$LCTL get_param -n \
4029 mdd.$(facet_svc mds2).lfsck_namespace |
4030 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
4031 error "(3) unexpected status"
4033 local repaired=$(do_facet mds2 $LCTL get_param -n \
4034 mdd.$(facet_svc mds2).lfsck_namespace |
4035 awk '/^striped_shards_repaired/ { print $2 }')
4036 [ $repaired -eq 1 ] ||
4037 error "(4) Fail to re-generate slave LMV EA: $repaired"
4039 rmdir $DIR/$tdir/striped_dir ||
4040 error "(5) Fail to remove the striped directory after LFSCK"
4042 run_test 31f "Re-generate the lost slave LMV EA for striped directory (2)"
4045 [ $MDSCOUNT -lt 2 ] &&
4046 skip "The test needs at least 2 MDTs" && return
4049 echo "For some reason, the stripe index in the slave LMV EA is"
4050 echo "corrupted. The LFSCK should repair the slave LMV EA."
4053 check_mount_and_prep
4055 echo "Inject failure stub on MDT0 to simulate the case that the"
4056 echo "slave LMV EA on the first shard of the striped directory"
4057 echo "claims the same index as the second shard claims"
4059 #define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
4060 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162b fail_val=0
4061 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4062 error "(1) Fail to create striped directory"
4063 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4065 echo "Trigger namespace LFSCK to repair the slave LMV EA"
4066 $START_NAMESPACE -r -A ||
4067 error "(2) Fail to start LFSCK for namespace"
4069 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4070 mdd.${MDT_DEV}.lfsck_namespace |
4071 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4073 error "(3) unexpected status"
4076 local repaired=$($SHOW_NAMESPACE |
4077 awk '/^striped_shards_repaired/ { print $2 }')
4078 [ $repaired -eq 1 ] ||
4079 error "(4) Fail to repair slave LMV EA: $repaired"
4081 umount_client $MOUNT || error "(5) umount failed"
4082 mount_client $MOUNT || error "(6) mount failed"
4084 touch $DIR/$tdir/striped_dir/foo ||
4085 error "(7) Fail to touch file after the LFSCK"
4087 rm -f $DIR/$tdir/striped_dir/foo ||
4088 error "(8) Fail to unlink file after the LFSCK"
4090 rmdir $DIR/$tdir/striped_dir ||
4091 error "(9) Fail to remove the striped directory after LFSCK"
4093 run_test 31g "Repair the corrupted slave LMV EA"
4096 [ $MDSCOUNT -lt 2 ] &&
4097 skip "The test needs at least 2 MDTs" && return
4100 echo "For some reason, the shard's name entry in the striped"
4101 echo "directory may be corrupted. The LFSCK should repair the"
4102 echo "bad shard's name entry."
4105 check_mount_and_prep
4107 echo "Inject failure stub on MDT0 to simulate the case that the"
4108 echo "first shard's name entry in the striped directory claims"
4109 echo "the same index as the second shard's name entry claims."
4111 #define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
4112 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x162c fail_val=0
4113 $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir/striped_dir ||
4114 error "(1) Fail to create striped directory"
4115 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0 fail_val=0
4117 echo "Trigger namespace LFSCK to repair the shard's name entry"
4118 $START_NAMESPACE -r -A ||
4119 error "(2) Fail to start LFSCK for namespace"
4121 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
4122 mdd.${MDT_DEV}.lfsck_namespace |
4123 awk '/^status/ { print \\\$2 }'" "completed" 32 || {
4125 error "(3) unexpected status"
4128 local repaired=$($SHOW_NAMESPACE |
4129 awk '/^dirent_repaired/ { print $2 }')
4130 [ $repaired -eq 1 ] ||
4131 error "(4) Fail to repair shard's name entry: $repaired"
4133 umount_client $MOUNT || error "(5) umount failed"
4134 mount_client $MOUNT || error "(6) mount failed"
4136 touch $DIR/$tdir/striped_dir/foo ||
4137 error "(7) Fail to touch file after the LFSCK"
4139 rm -f $DIR/$tdir/striped_dir/foo ||
4140 error "(8) Fail to unlink file after the LFSCK"
4142 rmdir $DIR/$tdir/striped_dir ||
4143 error "(9) Fail to remove the striped directory after LFSCK"
4145 run_test 31h "Repair the corrupted shard's name entry"
4147 # restore MDS/OST size
4148 MDSSIZE=${SAVED_MDSSIZE}
4149 OSTSIZE=${SAVED_OSTSIZE}
4150 OSTCOUNT=${SAVED_OSTCOUNT}
4152 # cleanup the system at last