3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 # remove the check when ZFS backend iteration is ready
21 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
22 skip "test LFSCK only for ldiskfs" && exit 0
23 [ $(facet_fstype ost1) != ldiskfs ] &&
24 skip "test LFSCK only for ldiskfs" && exit 0
26 require_dsh_mds || exit 0
28 MCREATE=${MCREATE:-mcreate}
29 SAVED_MDSSIZE=${MDSSIZE}
30 SAVED_OSTSIZE=${OSTSIZE}
31 # use small MDS + OST size to speed formatting time
32 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
36 check_and_setup_lustre
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15"
50 $LCTL set_param debug=+lfsck > /dev/null || true
52 MDT_DEV="${FSNAME}-MDT0000"
53 OST_DEV="${FSNAME}-OST0000"
54 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
55 START_NAMESPACE="do_facet $SINGLEMDS \
56 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
57 START_LAYOUT="do_facet $SINGLEMDS \
58 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
59 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
60 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
61 SHOW_NAMESPACE="do_facet $SINGLEMDS \
62 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
63 SHOW_LAYOUT="do_facet $SINGLEMDS \
64 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
65 SHOW_LAYOUT_ON_OST="do_facet ost1 \
66 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
67 MOUNT_OPTS_SCRUB="-o user_xattr"
68 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
81 if [ ! -z $igif ]; then
82 #define OBD_FAIL_FID_IGIF 0x1504
83 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
86 echo "preparing... ${nfiles} * ${ndirs} files will be created."
88 cp $LUSTRE/tests/*.sh $DIR/
89 for ((i = 0; i < ${ndirs}; i++)); do
90 mkdir $DIR/$tdir/d${i}
91 touch $DIR/$tdir/f${i}
92 for ((j = 0; j < ${nfiles}; j++)); do
93 touch $DIR/$tdir/d${i}/f${j}
95 mkdir $DIR/$tdir/e${i}
98 if [ ! -z $igif ]; then
99 touch $DIR/$tdir/dummy
100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
104 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
105 echo "stop $SINGLEMDS"
106 stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!"
111 echo "start $SINGLEMDS"
112 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
113 error "(1) Fail to start MDS!"
115 #define OBD_FAIL_LFSCK_DELAY1 0x1600
116 do_facet $SINGLEMDS $LCTL set_param fail_val=3
117 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
118 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
120 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
122 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
123 [ "$STATUS" == "scanning-phase1" ] ||
124 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
126 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
129 [ "$STATUS" == "stopped" ] ||
130 error "(6) Expect 'stopped', but got '$STATUS'"
132 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
134 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
135 [ "$STATUS" == "scanning-phase1" ] ||
136 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
138 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
139 do_facet $SINGLEMDS $LCTL set_param fail_val=0
141 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
142 [ "$STATUS" == "completed" ] ||
143 error "(9) Expect 'completed', but got '$STATUS'"
145 local repaired=$($SHOW_NAMESPACE |
146 awk '/^updated_phase1/ { print $2 }')
147 [ $repaired -eq 0 ] ||
148 error "(10) Expect nothing to be repaired, but got: $repaired"
150 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
151 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
154 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
155 [ "$STATUS" == "completed" ] ||
156 error "(12) Expect 'completed', but got '$STATUS'"
158 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
159 [ $((scanned1 + 1)) -eq $scanned2 ] ||
160 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
162 echo "stopall, should NOT crash LU-3649"
165 run_test 0 "Control LFSCK manually"
169 echo "start $SINGLEMDS"
170 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
171 error "(1) Fail to start MDS!"
173 mount_client $MOUNT || error "(2) Fail to start client!"
175 #define OBD_FAIL_FID_INDIR 0x1501
176 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
177 touch $DIR/$tdir/dummy
179 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
181 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
184 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
185 [ "$STATUS" == "completed" ] ||
186 error "(4) Expect 'completed', but got '$STATUS'"
188 local repaired=$($SHOW_NAMESPACE |
189 awk '/^updated_phase1/ { print $2 }')
190 [ $repaired -eq 1 ] ||
191 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
193 mount_client $MOUNT || error "(6) Fail to start client!"
195 #define OBD_FAIL_FID_LOOKUP 0x1505
196 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
197 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
199 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
201 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
206 echo "start $SINGLEMDS"
207 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
208 error "(1) Fail to start MDS!"
210 mount_client $MOUNT || error "(2) Fail to start client!"
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
223 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
224 [ "$STATUS" == "completed" ] ||
225 error "(4) Expect 'completed', but got '$STATUS'"
227 local repaired=$($SHOW_NAMESPACE |
228 awk '/^updated_phase1/ { print $2 }')
229 [ $repaired -eq 1 ] ||
230 error "(5) Fail to repair missed FID-in-LMA: $repaired"
232 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
233 mount_client $MOUNT || error "(6) Fail to start client!"
235 #define OBD_FAIL_FID_LOOKUP 0x1505
236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
237 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
239 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
241 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
245 echo "start $SINGLEMDS"
246 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
247 error "(1) Fail to start MDS!"
249 mount_client $MOUNT || error "(2) Fail to start client!"
251 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
252 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
253 touch $DIR/$tdir/dummy
255 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
257 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
260 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
261 [ "$STATUS" == "completed" ] ||
262 error "(4) Expect 'completed', but got '$STATUS'"
264 local repaired=$($SHOW_NAMESPACE |
265 awk '/^updated_phase1/ { print $2 }')
266 [ $repaired -eq 1 ] ||
267 error "(5) Fail to repair crashed linkEA: $repaired"
269 mount_client $MOUNT || error "(6) Fail to start client!"
271 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
272 error "(7) Fail to stat $DIR/$tdir/dummy"
274 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
275 local dummyname=$($LFS fid2path $DIR $dummyfid)
276 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
277 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
279 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
284 echo "start $SINGLEMDS"
285 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
286 error "(1) Fail to start MDS!"
288 mount_client $MOUNT || error "(2) Fail to start client!"
290 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
291 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
292 touch $DIR/$tdir/dummy
294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
296 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
299 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
300 [ "$STATUS" == "completed" ] ||
301 error "(4) Expect 'completed', but got '$STATUS'"
303 local repaired=$($SHOW_NAMESPACE |
304 awk '/^updated_phase2/ { print $2 }')
305 [ $repaired -eq 1 ] ||
306 error "(5) Fail to repair crashed linkEA: $repaired"
308 mount_client $MOUNT || error "(6) Fail to start client!"
310 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
311 error "(7) Fail to stat $DIR/$tdir/dummy"
313 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
314 local dummyname=$($LFS fid2path $DIR $dummyfid)
315 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
316 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
318 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
323 echo "start $SINGLEMDS"
324 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
325 error "(1) Fail to start MDS!"
327 mount_client $MOUNT || error "(2) Fail to start client!"
329 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
330 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
331 touch $DIR/$tdir/dummy
333 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
335 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
338 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
339 [ "$STATUS" == "completed" ] ||
340 error "(4) Expect 'completed', but got '$STATUS'"
342 local repaired=$($SHOW_NAMESPACE |
343 awk '/^updated_phase2/ { print $2 }')
344 [ $repaired -eq 1 ] ||
345 error "(5) Fail to repair crashed linkEA: $repaired"
347 mount_client $MOUNT || error "(6) Fail to start client!"
349 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
350 error "(7) Fail to stat $DIR/$tdir/dummy"
352 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
353 local dummyname=$($LFS fid2path $DIR $dummyfid)
354 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
355 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
357 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
363 echo "start $SINGLEMDS with disabling OI scrub"
364 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
365 error "(2) Fail to start MDS!"
367 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
368 [ "$STATUS" == "init" ] ||
369 error "(3) Expect 'init', but got '$STATUS'"
371 #define OBD_FAIL_LFSCK_DELAY2 0x1601
372 do_facet $SINGLEMDS $LCTL set_param fail_val=1
373 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
374 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
377 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
378 [ "$STATUS" == "scanning-phase1" ] ||
379 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
381 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
382 [ "$FLAGS" == "inconsistent" ] ||
383 error "(6) Expect 'inconsistent', but got '$FLAGS'"
385 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
386 do_facet $SINGLEMDS $LCTL set_param fail_val=0
388 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
389 [ "$STATUS" == "completed" ] ||
390 error "(7) Expect 'completed', but got '$STATUS'"
392 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
393 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
395 local repaired=$($SHOW_NAMESPACE |
396 awk '/^updated_phase1/ { print $2 }')
397 [ $repaired -ge 9 ] ||
398 error "(9) Fail to repair crashed linkEA: $repaired"
400 mount_client $MOUNT || error "(10) Fail to start client!"
402 #define OBD_FAIL_FID_LOOKUP 0x1505
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
404 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
406 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
408 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
413 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
414 echo "start $SINGLEMDS with disabling OI scrub"
415 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
416 error "(2) Fail to start MDS!"
418 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
419 [ "$STATUS" == "init" ] ||
420 error "(3) Expect 'init', but got '$STATUS'"
422 #define OBD_FAIL_LFSCK_DELAY2 0x1601
423 do_facet $SINGLEMDS $LCTL set_param fail_val=1
424 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
425 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
428 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
429 [ "$STATUS" == "scanning-phase1" ] ||
430 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
432 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
433 [ "$FLAGS" == "inconsistent,upgrade" ] ||
434 error "(6) Expect 'inconsistent,upgrade', but got '$FLAGS'"
436 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
437 do_facet $SINGLEMDS $LCTL set_param fail_val=0
439 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
440 [ "$STATUS" == "completed" ] ||
441 error "(7) Expect 'completed', but got '$STATUS'"
443 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
444 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
446 local repaired=$($SHOW_NAMESPACE |
447 awk '/^updated_phase1/ { print $2 }')
448 [ $repaired -ge 2 ] ||
449 error "(9) Fail to repair crashed linkEA: $repaired"
451 mount_client $MOUNT || error "(10) Fail to start client!"
453 #define OBD_FAIL_FID_LOOKUP 0x1505
454 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
455 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
457 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
459 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
460 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
461 local dummyname=$($LFS fid2path $DIR $dummyfid)
462 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
463 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
465 run_test 5 "LFSCK can handle IFIG object upgrading"
469 echo "start $SINGLEMDS"
470 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
471 error "(1) Fail to start MDS!"
473 #define OBD_FAIL_LFSCK_DELAY1 0x1600
474 do_facet $SINGLEMDS $LCTL set_param fail_val=1
475 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
476 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
478 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
479 [ "$STATUS" == "scanning-phase1" ] ||
480 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
482 # Sleep 3 sec to guarantee at least one object processed by LFSCK
484 # Fail the LFSCK to guarantee there is at least one checkpoint
485 #define OBD_FAIL_LFSCK_FATAL1 0x1608
486 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
488 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
489 [ "$STATUS" == "failed" ] ||
490 error "(4) Expect 'failed', but got '$STATUS'"
492 local POSITION0=$($SHOW_NAMESPACE |
493 awk '/^last_checkpoint_position/ { print $2 }' |
496 #define OBD_FAIL_LFSCK_DELAY1 0x1600
497 do_facet $SINGLEMDS $LCTL set_param fail_val=1
498 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
499 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
501 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
502 [ "$STATUS" == "scanning-phase1" ] ||
503 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
505 local POSITION1=$($SHOW_NAMESPACE |
506 awk '/^latest_start_position/ { print $2 }' |
508 [ $POSITION0 -lt $POSITION1 ] ||
509 error "(7) Expect larger than: $POSITION0, but got $POSITION1"
511 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
512 do_facet $SINGLEMDS $LCTL set_param fail_val=0
514 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
515 [ "$STATUS" == "completed" ] ||
516 error "(8) Expect 'completed', but got '$STATUS'"
518 run_test 6a "LFSCK resumes from last checkpoint (1)"
522 echo "start $SINGLEMDS"
523 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
524 error "(1) Fail to start MDS!"
526 #define OBD_FAIL_LFSCK_DELAY2 0x1601
527 do_facet $SINGLEMDS $LCTL set_param fail_val=1
528 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
529 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
531 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
532 [ "$STATUS" == "scanning-phase1" ] ||
533 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
535 # Sleep 3 sec to guarantee at least one object processed by LFSCK
537 # Fail the LFSCK to guarantee there is at least one checkpoint
538 #define OBD_FAIL_LFSCK_FATAL2 0x1609
539 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
541 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
542 [ "$STATUS" == "failed" ] ||
543 error "(4) Expect 'failed', but got '$STATUS'"
545 local POSITION0=$($SHOW_NAMESPACE |
546 awk '/^last_checkpoint_position/ { print $4 }')
548 #define OBD_FAIL_LFSCK_DELAY2 0x1601
549 do_facet $SINGLEMDS $LCTL set_param fail_val=1
550 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
551 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
553 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
554 [ "$STATUS" == "scanning-phase1" ] ||
555 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
557 local POSITION1=$($SHOW_NAMESPACE |
558 awk '/^latest_start_position/ { print $4 }')
559 if [ $POSITION0 -gt $POSITION1 ]; then
560 [ $POSITION1 -eq 0 -a $POSITION0 -eq $((POSITION1 + 1)) ] ||
561 error "(7) Expect larger than: $POSITION0, but got $POSITION1"
564 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
565 do_facet $SINGLEMDS $LCTL set_param fail_val=0
567 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
568 [ "$STATUS" == "completed" ] ||
569 error "(8) Expect 'completed', but got '$STATUS'"
571 run_test 6b "LFSCK resumes from last checkpoint (2)"
576 echo "start $SINGLEMDS"
577 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
578 error "(1) Fail to start MDS!"
580 #define OBD_FAIL_LFSCK_DELAY2 0x1601
581 do_facet $SINGLEMDS $LCTL set_param fail_val=1
582 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
583 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
585 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
586 [ "$STATUS" == "scanning-phase1" ] ||
587 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
589 # Sleep 3 sec to guarantee at least one object processed by LFSCK
591 echo "stop $SINGLEMDS"
592 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
594 echo "start $SINGLEMDS"
595 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
596 error "(5) Fail to start MDS!"
598 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
599 [ "$STATUS" == "scanning-phase1" ] ||
600 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
602 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
603 do_facet $SINGLEMDS $LCTL set_param fail_val=0
605 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
606 [ "$STATUS" == "completed" ] ||
607 error "(7) Expect 'completed', but got '$STATUS'"
609 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
614 echo "start $SINGLEMDS"
615 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
616 error "(1) Fail to start MDS!"
618 mount_client $MOUNT || error "(2) Fail to start client!"
620 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
621 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
622 for ((i = 0; i < 20; i++)); do
623 touch $DIR/$tdir/dummy${i}
626 #define OBD_FAIL_LFSCK_DELAY3 0x1602
627 do_facet $SINGLEMDS $LCTL set_param fail_val=1
628 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
629 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
632 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
633 [ "$STATUS" == "scanning-phase2" ] ||
634 error "(4) Expect 'scanning-phase2', but got '$STATUS'"
636 echo "stop $SINGLEMDS"
637 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
639 echo "start $SINGLEMDS"
640 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
641 error "(6) Fail to start MDS!"
643 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
644 [ "$STATUS" == "scanning-phase2" ] ||
645 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
647 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
648 do_facet $SINGLEMDS $LCTL set_param fail_val=0
650 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "completed" ] ||
652 error "(8) Expect 'completed', but got '$STATUS'"
654 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
659 echo "start $SINGLEMDS"
660 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
661 error "(1) Fail to start MDS!"
663 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
664 [ "$STATUS" == "init" ] ||
665 error "(2) Expect 'init', but got '$STATUS'"
667 mount_client $MOUNT || error "(3) Fail to start client!"
669 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
670 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
671 mkdir $DIR/$tdir/crashed
673 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
674 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
675 for ((i = 0; i < 5; i++)); do
676 touch $DIR/$tdir/dummy${i}
679 #define OBD_FAIL_LFSCK_DELAY2 0x1601
680 do_facet $SINGLEMDS $LCTL set_param fail_val=2
681 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
682 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
684 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
685 [ "$STATUS" == "scanning-phase1" ] ||
686 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
688 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
690 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
691 [ "$STATUS" == "stopped" ] ||
692 error "(7) Expect 'stopped', but got '$STATUS'"
694 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
696 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
697 [ "$STATUS" == "scanning-phase1" ] ||
698 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
700 #define OBD_FAIL_LFSCK_FATAL2 0x1609
701 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
703 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
704 [ "$STATUS" == "failed" ] ||
705 error "(10) Expect 'failed', but got '$STATUS'"
707 #define OBD_FAIL_LFSCK_DELAY1 0x1600
708 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
709 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
711 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
712 [ "$STATUS" == "scanning-phase1" ] ||
713 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
715 #define OBD_FAIL_LFSCK_CRASH 0x160a
716 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
719 echo "stop $SINGLEMDS"
720 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
722 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
723 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
725 echo "start $SINGLEMDS"
726 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
727 error "(14) Fail to start MDS!"
729 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
730 [ "$STATUS" == "crashed" ] ||
731 error "(15) Expect 'crashed', but got '$STATUS'"
733 #define OBD_FAIL_LFSCK_DELAY2 0x1601
734 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
735 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
737 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
738 [ "$STATUS" == "scanning-phase1" ] ||
739 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
741 echo "stop $SINGLEMDS"
742 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
744 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
745 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
747 echo "start $SINGLEMDS"
748 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
749 error "(19) Fail to start MDS!"
751 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
752 [ "$STATUS" == "paused" ] ||
753 error "(20) Expect 'paused', but got '$STATUS'"
755 #define OBD_FAIL_LFSCK_DELAY3 0x1602
756 do_facet $SINGLEMDS $LCTL set_param fail_val=2
757 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
759 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
761 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
762 [ "$STATUS" == "scanning-phase2" ] ||
763 error "(22) Expect 'scanning-phase2', but got '$STATUS'"
765 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
766 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
767 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
769 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
770 do_facet $SINGLEMDS $LCTL set_param fail_val=0
772 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
773 [ "$STATUS" == "completed" ] ||
774 error "(24) Expect 'completed', but got '$STATUS'"
776 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
777 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
780 run_test 8 "LFSCK state machine"
783 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
784 skip "Testing on UP system, the speed may be inaccurate."
789 echo "start $SINGLEMDS"
790 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
791 error "(1) Fail to start MDS!"
793 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
794 [ "$STATUS" == "init" ] ||
795 error "(2) Expect 'init', but got '$STATUS'"
797 local BASE_SPEED1=100
799 $START_NAMESPACE -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
802 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
803 [ "$STATUS" == "scanning-phase1" ] ||
804 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
806 local SPEED=$($SHOW_NAMESPACE |
807 awk '/^average_speed_phase1/ { print $2 }')
809 # There may be time error, normally it should be less than 2 seconds.
810 # We allow another 20% schedule error.
812 # MAX_MARGIN = 1.2 = 12 / 10
813 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
814 RUN_TIME1 * 12 / 10))
815 [ $SPEED -lt $MAX_SPEED ] ||
816 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
819 local BASE_SPEED2=300
821 do_facet $SINGLEMDS \
822 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
825 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
826 # MIN_MARGIN = 0.8 = 8 / 10
827 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
828 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
829 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
830 [ $SPEED -gt $MIN_SPEED ] ||
831 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
833 # MAX_MARGIN = 1.2 = 12 / 10
834 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
835 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
836 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
837 [ $SPEED -lt $MAX_SPEED ] ||
838 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
840 do_facet $SINGLEMDS \
841 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
843 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
844 [ "$STATUS" == "completed" ] ||
845 error "(7) Expect 'completed', but got '$STATUS'"
847 run_test 9a "LFSCK speed control (1)"
850 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
851 skip "Testing on UP system, the speed may be inaccurate."
856 echo "start $SINGLEMDS"
857 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
858 error "(1) Fail to start MDS!"
860 mount_client $MOUNT || error "(2) Fail to start client!"
862 echo "Another preparing... 50 * 50 files (with error) will be created."
863 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
864 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
865 for ((i = 0; i < 50; i++)); do
866 mkdir -p $DIR/$tdir/d${i}
867 touch $DIR/$tdir/f${i}
868 for ((j = 0; j < 50; j++)); do
869 touch $DIR/$tdir/d${i}/f${j}
873 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
874 [ "$STATUS" == "init" ] ||
875 error "(3) Expect 'init', but got '$STATUS'"
877 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
878 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
879 $START_NAMESPACE || error "(4) Fail to start LFSCK!"
882 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
883 [ "$STATUS" == "stopped" ] ||
884 error "(5) Expect 'stopped', but got '$STATUS'"
886 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
890 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
893 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
894 [ "$STATUS" == "scanning-phase2" ] ||
895 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
897 local SPEED=$($SHOW_NAMESPACE |
898 awk '/^average_speed_phase2/ { print $2 }')
899 # There may be time error, normally it should be less than 2 seconds.
900 # We allow another 20% schedule error.
902 # MAX_MARGIN = 1.2 = 12 / 10
903 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
904 RUN_TIME1 * 12 / 10))
905 [ $SPEED -lt $MAX_SPEED ] ||
906 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
909 local BASE_SPEED2=150
911 do_facet $SINGLEMDS \
912 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
915 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
916 # MIN_MARGIN = 0.8 = 8 / 10
917 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
918 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
919 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
920 [ $SPEED -gt $MIN_SPEED ] ||
921 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
923 # MAX_MARGIN = 1.2 = 12 / 10
924 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
925 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
926 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
927 [ $SPEED -lt $MAX_SPEED ] ||
928 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
930 do_facet $SINGLEMDS \
931 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
933 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
934 [ "$STATUS" == "completed" ] ||
935 error "(11) Expect 'completed', but got '$STATUS'"
937 run_test 9b "LFSCK speed control (2)"
942 echo "start $SINGLEMDS"
943 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
944 error "(1) Fail to start MDS!"
946 mount_client $MOUNT || error "(2) Fail to start client!"
948 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
949 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
950 for ((i = 0; i < 1000; i = $((i+2)))); do
951 mkdir -p $DIR/$tdir/d${i}
952 touch $DIR/$tdir/f${i}
953 for ((j = 0; j < 5; j++)); do
954 touch $DIR/$tdir/d${i}/f${j}
958 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
959 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
960 for ((i = 1; i < 1000; i = $((i+2)))); do
961 mkdir -p $DIR/$tdir/d${i}
962 touch $DIR/$tdir/f${i}
963 for ((j = 0; j < 5; j++)); do
964 touch $DIR/$tdir/d${i}/f${j}
968 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
969 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
972 mount_client $MOUNT || error "(3) Fail to start client!"
974 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
975 [ "$STATUS" == "init" ] ||
976 error "(4) Expect 'init', but got '$STATUS'"
978 $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!"
981 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
982 [ "$STATUS" == "scanning-phase1" ] ||
983 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
985 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
987 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
989 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
991 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
993 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
995 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
997 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
999 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1000 error "(14) Fail to softlink!"
1002 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1003 [ "$STATUS" == "scanning-phase1" ] ||
1004 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1006 do_facet $SINGLEMDS \
1007 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1008 umount_client $MOUNT
1010 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1011 [ "$STATUS" == "completed" ] ||
1012 error "(16) Expect 'completed', but got '$STATUS'"
1014 run_test 10 "System is available during LFSCK scanning"
1017 ost_remove_lastid() {
1020 local rcmd="do_facet ost${ost}"
1022 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1024 # step 1: local mount
1025 mount_fstype ost${ost} || return 1
1026 # step 2: remove the specified LAST_ID
1027 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/LAST_ID
1029 unmount_fstype ost${ost} || return 2
1036 formatall > /dev/null
1038 setupall > /dev/null
1041 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1042 createmany -o $DIR/$tdir/f 64
1047 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1050 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1051 error "(2) Fail to start ost1"
1053 local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }')
1054 [ "$STATUS" == "init" ] ||
1055 error "(3) Expect 'init', but got '$STATUS'"
1057 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1058 do_facet ost1 $LCTL set_param fail_val=3
1059 do_facet ost1 $LCTL set_param fail_loc=0x160e
1061 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1062 $START_LAYOUT_ON_OST || error "(4) Fail to start LFSCK on OST!"
1064 wait_update_facet ost1 "$LCTL get_param -n \
1065 obdfilter.${OST_DEV}.lfsck_layout |
1066 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1071 do_facet ost1 $LCTL set_param fail_val=0
1072 do_facet ost1 $LCTL set_param fail_loc=0
1074 wait_update_facet ost1 "$LCTL get_param -n \
1075 obdfilter.${OST_DEV}.lfsck_layout |
1076 awk '/^status/ { print \\\$2 }'" "completed" 3 || {
1081 echo "the LAST_ID(s) should have been rebuilt"
1082 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1083 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1085 run_test 11a "LFSCK can rebuild lost last_id"
1091 formatall > /dev/null
1093 setupall > /dev/null
1096 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1098 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1099 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1100 do_facet ost1 $LCTL set_param fail_loc=0x160d
1101 createmany -o $DIR/$tdir/f 64
1102 local lastid1=$(do_facet ost1 "lctl get_param -n \
1103 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1104 awk -F: '{ print $2 }')
1106 umount_client $MOUNT
1108 stop ost1 || error "(1) Fail to stop ost1"
1110 #define OBD_FAIL_OST_ENOSPC 0x215
1111 do_facet ost1 $LCTL set_param fail_loc=0x215
1114 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1115 error "(2) Fail to start ost1"
1117 local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }')
1118 [ "$STATUS" == "init" ] ||
1119 error "(3) Expect 'init', but got '$STATUS'"
1121 for ((i = 0; i < 60; i++)); do
1122 lastid2=$(do_facet ost1 "lctl get_param -n \
1123 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1124 awk -F: '{ print $2 }')
1125 [ ! -z $lastid2 ] && break;
1129 echo "the on-disk LAST_ID should be smaller than the expected one"
1130 [ $lastid1 -gt $lastid2 ] ||
1131 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1133 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1134 $START_LAYOUT_ON_OST || error "(5) Fail to start LFSCK on OST!"
1136 wait_update_facet ost1 "$LCTL get_param -n \
1137 obdfilter.${OST_DEV}.lfsck_layout |
1138 awk '/^status/ { print \\\$2 }'" "completed" 3 || {
1144 stop ost1 || error "(7) Fail to stop ost1"
1147 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1148 error "(8) Fail to start ost1"
1150 echo "the on-disk LAST_ID should have been rebuilt"
1151 wait_update_facet ost1 "$LCTL get_param -n \
1152 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1153 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1154 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1155 error "(9) expect lastid1 0x100000000:$lastid1"
1158 do_facet ost1 $LCTL set_param fail_loc=0
1160 run_test 11b "LFSCK can rebuild crashed last_id"
1163 [ $MDSCOUNT -lt 2 ] &&
1164 skip "We need at least 2 MDSes for test_12" && exit 0
1169 formatall > /dev/null
1171 setupall > /dev/null
1173 echo "All the LFSCK targets should be in 'init' status."
1174 for k in $(seq $MDSCOUNT); do
1175 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1176 mdd.$(facet_svc mds${k}).lfsck_layout |
1177 awk '/^status/ { print $2 }')
1178 [ "$STATUS" == "init" ] ||
1179 error "(1) MDS${k} Expect 'init', but got '$STATUS'"
1181 $LFS mkdir -i $((k - 1)) $DIR/${k}
1182 createmany -o $DIR/${k}/f 100
1185 echo "Trigger LFSCK on all targets by single command (limited speed)."
1186 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1187 -s 10 || error "(2) Fail to start LFSCK on all devices!"
1189 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1190 for k in $(seq $MDSCOUNT); do
1191 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1192 mdd.$(facet_svc mds${k}).lfsck_layout |
1193 awk '/^status/ { print $2 }')
1194 [ "$STATUS" == "scanning-phase1" ] ||
1195 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1198 echo "Stop layout LFSCK on all targets by single lctl command."
1199 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1200 error "(4) Fail to stop LFSCK on all devices!"
1202 echo "All the LFSCK targets should be in 'stopped' status."
1203 for k in $(seq $MDSCOUNT); do
1204 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1205 mdd.$(facet_svc mds${k}).lfsck_layout |
1206 awk '/^status/ { print $2 }')
1207 [ "$STATUS" == "stopped" ] ||
1208 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1211 for k in $(seq $OSTCOUNT); do
1212 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1213 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1214 awk '/^status/ { print $2 }')
1215 [ "$STATUS" == "stopped" ] ||
1216 error "(6) OST${k} Expect 'stopped', but got '$STATUS'"
1219 echo "Re-trigger LFSCK on all targets by single command (full speed)."
1220 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1221 -s 0 || error "(7) Fail to start LFSCK on all devices!"
1223 echo "All the LFSCK targets should be in 'completed' status."
1224 for k in $(seq $MDSCOUNT); do
1225 # The LFSCK status query internal is 30 seconds. For the case
1226 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1227 # time to guarantee the status sync up.
1228 wait_update_facet mds${k} "$LCTL get_param -n \
1229 mdd.$(facet_svc mds${k}).lfsck_layout |
1230 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1231 error "(8) MDS${k} is not the expected 'completed'"
1234 run_test 12 "single command to trigger LFSCK on all devices"
1238 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1239 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1240 echo "MDT-object FID."
1246 formatall > /dev/null
1248 setupall > /dev/null
1252 echo "Inject failure stub to simulate bad lmm_oi"
1253 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1254 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1255 createmany -o $DIR/$tdir/f 32
1256 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1258 echo "stopall to cleanup object cache"
1261 setupall > /dev/null
1263 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1264 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1266 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1267 mdd.${MDT_DEV}.lfsck_layout |
1268 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1270 local repaired=$($SHOW_LAYOUT |
1271 awk '/^repaired_others/ { print $2 }')
1272 [ $repaired -eq 32 ] ||
1273 error "(3) Fail to repair crashed lmm_oi: $repaired"
1275 run_test 13 "LFSCK can repair crashed lmm_oi"
1279 echo "The OST-object referenced by the MDT-object should be there;"
1280 echo "otherwise, the LFSCK should re-create the missed OST-object."
1286 formatall > /dev/null
1288 setupall > /dev/null
1291 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1293 echo "Inject failure stub to simulate dangling referenced MDT-object"
1294 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1295 do_facet ost1 $LCTL set_param fail_loc=0x1610
1296 createmany -o $DIR/$tdir/f 64
1297 do_facet ost1 $LCTL set_param fail_loc=0
1299 echo "stopall to cleanup object cache"
1302 setupall > /dev/null
1304 echo "'ls' should fail because of dangling referenced MDT-object"
1305 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1307 echo "Trigger layout LFSCK to find out dangling reference and fix them"
1308 $START_LAYOUT || error "(2) Fail to start LFSCK for layout!"
1310 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1311 mdd.${MDT_DEV}.lfsck_layout |
1312 awk '/^status/ { print \\\$2 }'" "completed" 6 || return 3
1314 local repaired=$($SHOW_LAYOUT |
1315 awk '/^repaired_dangling/ { print $2 }')
1316 [ $repaired -eq 32 ] ||
1317 error "(4) Fail to repair dangling reference: $repaired"
1319 echo "'ls' should success after layout LFSCK repairing"
1320 ls -ail $DIR/$tdir > /dev/null || error "(5) ls should success."
1322 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1326 echo "If the OST-object referenced by the MDT-object back points"
1327 echo "to some non-exist MDT-object, then the LFSCK should repair"
1328 echo "the OST-object to back point to the right MDT-object."
1334 formatall > /dev/null
1336 setupall > /dev/null
1339 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1341 echo "Inject failure stub to make the OST-object to back point to"
1342 echo "non-exist MDT-object."
1343 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1345 do_facet ost1 $LCTL set_param fail_loc=0x1611
1346 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1347 cancel_lru_locks osc
1350 do_facet ost1 $LCTL set_param fail_loc=0
1352 echo "stopall to cleanup object cache"
1355 setupall > /dev/null
1357 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1358 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1360 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1361 mdd.${MDT_DEV}.lfsck_layout |
1362 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1364 local repaired=$($SHOW_LAYOUT |
1365 awk '/^repaired_unmatched_pair/ { print $2 }')
1366 [ $repaired -eq 1 ] ||
1367 error "(3) Fail to repair unmatched pair: $repaired"
1369 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1373 echo "If the OST-object referenced by the MDT-object back points"
1374 echo "to other MDT-object that doesn't recognize the OST-object,"
1375 echo "then the LFSCK should repair it to back point to the right"
1376 echo "MDT-object (the first one)."
1382 formatall > /dev/null
1384 setupall > /dev/null
1387 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1388 touch $DIR/$tdir/guard
1390 echo "Inject failure stub to make the OST-object to back point to"
1391 echo "other MDT-object"
1393 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1394 do_facet ost1 $LCTL set_param fail_loc=0x1612
1395 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1396 cancel_lru_locks osc
1399 do_facet ost1 $LCTL set_param fail_loc=0
1401 echo "stopall to cleanup object cache"
1404 setupall > /dev/null
1406 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1407 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1409 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1410 mdd.${MDT_DEV}.lfsck_layout |
1411 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1413 local repaired=$($SHOW_LAYOUT |
1414 awk '/^repaired_unmatched_pair/ { print $2 }')
1415 [ $repaired -eq 1 ] ||
1416 error "(3) Fail to repair unmatched pair: $repaired"
1418 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1420 $LCTL set_param debug=-lfsck > /dev/null || true
1422 # restore MDS/OST size
1423 MDSSIZE=${SAVED_MDSSIZE}
1424 OSTSIZE=${SAVED_OSTSIZE}
1426 # cleanup the system at last