3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 # remove the check when ZFS backend iteration is ready
21 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
22 skip "test LFSCK only for ldiskfs" && exit 0
23 [ $(facet_fstype ost1) != ldiskfs ] &&
24 skip "test LFSCK only for ldiskfs" && exit 0
26 require_dsh_mds || exit 0
28 MCREATE=${MCREATE:-mcreate}
29 SAVED_MDSSIZE=${MDSSIZE}
30 SAVED_OSTSIZE=${OSTSIZE}
31 # use small MDS + OST size to speed formatting time
32 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
36 check_and_setup_lustre
38 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
39 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
42 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
43 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
45 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
46 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18"
50 $LCTL set_param debug=+lfsck > /dev/null || true
52 MDT_DEV="${FSNAME}-MDT0000"
53 OST_DEV="${FSNAME}-OST0000"
54 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
55 START_NAMESPACE="do_facet $SINGLEMDS \
56 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
57 START_LAYOUT="do_facet $SINGLEMDS \
58 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
59 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
60 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
61 SHOW_NAMESPACE="do_facet $SINGLEMDS \
62 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
63 SHOW_LAYOUT="do_facet $SINGLEMDS \
64 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
65 SHOW_LAYOUT_ON_OST="do_facet ost1 \
66 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
67 MOUNT_OPTS_SCRUB="-o user_xattr"
68 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
81 if [ ! -z $igif ]; then
82 #define OBD_FAIL_FID_IGIF 0x1504
83 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
86 echo "preparing... ${nfiles} * ${ndirs} files will be created."
88 cp $LUSTRE/tests/*.sh $DIR/
89 for ((i = 0; i < ${ndirs}; i++)); do
90 mkdir $DIR/$tdir/d${i}
91 touch $DIR/$tdir/f${i}
92 for ((j = 0; j < ${nfiles}; j++)); do
93 touch $DIR/$tdir/d${i}/f${j}
95 mkdir $DIR/$tdir/e${i}
98 if [ ! -z $igif ]; then
99 touch $DIR/$tdir/dummy
100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
104 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
105 echo "stop $SINGLEMDS"
106 stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!"
111 echo "start $SINGLEMDS"
112 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
113 error "(1) Fail to start MDS!"
115 #define OBD_FAIL_LFSCK_DELAY1 0x1600
116 do_facet $SINGLEMDS $LCTL set_param fail_val=3
117 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
118 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
120 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
122 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
123 [ "$STATUS" == "scanning-phase1" ] ||
124 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
126 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
129 [ "$STATUS" == "stopped" ] ||
130 error "(6) Expect 'stopped', but got '$STATUS'"
132 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
134 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
135 [ "$STATUS" == "scanning-phase1" ] ||
136 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
138 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
139 do_facet $SINGLEMDS $LCTL set_param fail_val=0
141 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
142 [ "$STATUS" == "completed" ] ||
143 error "(9) Expect 'completed', but got '$STATUS'"
145 local repaired=$($SHOW_NAMESPACE |
146 awk '/^updated_phase1/ { print $2 }')
147 [ $repaired -eq 0 ] ||
148 error "(10) Expect nothing to be repaired, but got: $repaired"
150 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
151 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
154 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
155 [ "$STATUS" == "completed" ] ||
156 error "(12) Expect 'completed', but got '$STATUS'"
158 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
159 [ $((scanned1 + 1)) -eq $scanned2 ] ||
160 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
162 echo "stopall, should NOT crash LU-3649"
165 run_test 0 "Control LFSCK manually"
169 echo "start $SINGLEMDS"
170 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
171 error "(1) Fail to start MDS!"
173 mount_client $MOUNT || error "(2) Fail to start client!"
175 #define OBD_FAIL_FID_INDIR 0x1501
176 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
177 touch $DIR/$tdir/dummy
179 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
181 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
184 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
185 [ "$STATUS" == "completed" ] ||
186 error "(4) Expect 'completed', but got '$STATUS'"
188 local repaired=$($SHOW_NAMESPACE |
189 awk '/^updated_phase1/ { print $2 }')
190 [ $repaired -eq 1 ] ||
191 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
193 mount_client $MOUNT || error "(6) Fail to start client!"
195 #define OBD_FAIL_FID_LOOKUP 0x1505
196 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
197 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
199 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
201 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
206 echo "start $SINGLEMDS"
207 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
208 error "(1) Fail to start MDS!"
210 mount_client $MOUNT || error "(2) Fail to start client!"
212 #define OBD_FAIL_FID_INLMA 0x1502
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
214 touch $DIR/$tdir/dummy
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 #define OBD_FAIL_FID_NOLMA 0x1506
219 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
220 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
223 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
224 [ "$STATUS" == "completed" ] ||
225 error "(4) Expect 'completed', but got '$STATUS'"
227 local repaired=$($SHOW_NAMESPACE |
228 awk '/^updated_phase1/ { print $2 }')
229 [ $repaired -eq 1 ] ||
230 error "(5) Fail to repair missed FID-in-LMA: $repaired"
232 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
233 mount_client $MOUNT || error "(6) Fail to start client!"
235 #define OBD_FAIL_FID_LOOKUP 0x1505
236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
237 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
239 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
241 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
245 echo "start $SINGLEMDS"
246 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
247 error "(1) Fail to start MDS!"
249 mount_client $MOUNT || error "(2) Fail to start client!"
251 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
252 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
253 touch $DIR/$tdir/dummy
255 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
257 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
260 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
261 [ "$STATUS" == "completed" ] ||
262 error "(4) Expect 'completed', but got '$STATUS'"
264 local repaired=$($SHOW_NAMESPACE |
265 awk '/^updated_phase1/ { print $2 }')
266 [ $repaired -eq 1 ] ||
267 error "(5) Fail to repair crashed linkEA: $repaired"
269 mount_client $MOUNT || error "(6) Fail to start client!"
271 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
272 error "(7) Fail to stat $DIR/$tdir/dummy"
274 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
275 local dummyname=$($LFS fid2path $DIR $dummyfid)
276 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
277 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
279 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
284 echo "start $SINGLEMDS"
285 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
286 error "(1) Fail to start MDS!"
288 mount_client $MOUNT || error "(2) Fail to start client!"
290 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
291 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
292 touch $DIR/$tdir/dummy
294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
296 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
299 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
300 [ "$STATUS" == "completed" ] ||
301 error "(4) Expect 'completed', but got '$STATUS'"
303 local repaired=$($SHOW_NAMESPACE |
304 awk '/^updated_phase2/ { print $2 }')
305 [ $repaired -eq 1 ] ||
306 error "(5) Fail to repair crashed linkEA: $repaired"
308 mount_client $MOUNT || error "(6) Fail to start client!"
310 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
311 error "(7) Fail to stat $DIR/$tdir/dummy"
313 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
314 local dummyname=$($LFS fid2path $DIR $dummyfid)
315 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
316 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
318 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
323 echo "start $SINGLEMDS"
324 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
325 error "(1) Fail to start MDS!"
327 mount_client $MOUNT || error "(2) Fail to start client!"
329 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
330 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
331 touch $DIR/$tdir/dummy
333 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
335 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
338 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
339 [ "$STATUS" == "completed" ] ||
340 error "(4) Expect 'completed', but got '$STATUS'"
342 local repaired=$($SHOW_NAMESPACE |
343 awk '/^updated_phase2/ { print $2 }')
344 [ $repaired -eq 1 ] ||
345 error "(5) Fail to repair crashed linkEA: $repaired"
347 mount_client $MOUNT || error "(6) Fail to start client!"
349 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
350 error "(7) Fail to stat $DIR/$tdir/dummy"
352 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
353 local dummyname=$($LFS fid2path $DIR $dummyfid)
354 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
355 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
357 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
362 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
363 echo "start $SINGLEMDS with disabling OI scrub"
364 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
365 error "(2) Fail to start MDS!"
367 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
368 [ "$STATUS" == "init" ] ||
369 error "(3) Expect 'init', but got '$STATUS'"
371 #define OBD_FAIL_LFSCK_DELAY2 0x1601
372 do_facet $SINGLEMDS $LCTL set_param fail_val=1
373 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
374 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
377 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
378 [ "$STATUS" == "scanning-phase1" ] ||
379 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
381 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
382 [ "$FLAGS" == "inconsistent" ] ||
383 error "(6) Expect 'inconsistent', but got '$FLAGS'"
385 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
386 do_facet $SINGLEMDS $LCTL set_param fail_val=0
388 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
389 [ "$STATUS" == "completed" ] ||
390 error "(7) Expect 'completed', but got '$STATUS'"
392 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
393 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
395 local repaired=$($SHOW_NAMESPACE |
396 awk '/^updated_phase1/ { print $2 }')
397 [ $repaired -ge 9 ] ||
398 error "(9) Fail to repair crashed linkEA: $repaired"
400 mount_client $MOUNT || error "(10) Fail to start client!"
402 #define OBD_FAIL_FID_LOOKUP 0x1505
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
404 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
406 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
408 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
413 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
414 echo "start $SINGLEMDS with disabling OI scrub"
415 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
416 error "(2) Fail to start MDS!"
418 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
419 [ "$STATUS" == "init" ] ||
420 error "(3) Expect 'init', but got '$STATUS'"
422 #define OBD_FAIL_LFSCK_DELAY2 0x1601
423 do_facet $SINGLEMDS $LCTL set_param fail_val=1
424 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
425 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
428 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
429 [ "$STATUS" == "scanning-phase1" ] ||
430 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
432 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
433 [ "$FLAGS" == "inconsistent,upgrade" ] ||
434 error "(6) Expect 'inconsistent,upgrade', but got '$FLAGS'"
436 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
437 do_facet $SINGLEMDS $LCTL set_param fail_val=0
439 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
440 [ "$STATUS" == "completed" ] ||
441 error "(7) Expect 'completed', but got '$STATUS'"
443 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
444 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
446 local repaired=$($SHOW_NAMESPACE |
447 awk '/^updated_phase1/ { print $2 }')
448 [ $repaired -ge 2 ] ||
449 error "(9) Fail to repair crashed linkEA: $repaired"
451 mount_client $MOUNT || error "(10) Fail to start client!"
453 #define OBD_FAIL_FID_LOOKUP 0x1505
454 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
455 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
457 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
459 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
460 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
461 local dummyname=$($LFS fid2path $DIR $dummyfid)
462 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
463 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
465 run_test 5 "LFSCK can handle IFIG object upgrading"
469 echo "start $SINGLEMDS"
470 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
471 error "(1) Fail to start MDS!"
473 #define OBD_FAIL_LFSCK_DELAY1 0x1600
474 do_facet $SINGLEMDS $LCTL set_param fail_val=1
475 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
476 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
478 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
479 [ "$STATUS" == "scanning-phase1" ] ||
480 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
482 # Sleep 3 sec to guarantee at least one object processed by LFSCK
484 # Fail the LFSCK to guarantee there is at least one checkpoint
485 #define OBD_FAIL_LFSCK_FATAL1 0x1608
486 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
488 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
489 [ "$STATUS" == "failed" ] ||
490 error "(4) Expect 'failed', but got '$STATUS'"
492 local POSITION0=$($SHOW_NAMESPACE |
493 awk '/^last_checkpoint_position/ { print $2 }' |
496 #define OBD_FAIL_LFSCK_DELAY1 0x1600
497 do_facet $SINGLEMDS $LCTL set_param fail_val=1
498 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
499 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
501 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
502 [ "$STATUS" == "scanning-phase1" ] ||
503 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
505 local POSITION1=$($SHOW_NAMESPACE |
506 awk '/^latest_start_position/ { print $2 }' |
508 [ $POSITION0 -lt $POSITION1 ] ||
509 error "(7) Expect larger than: $POSITION0, but got $POSITION1"
511 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
512 do_facet $SINGLEMDS $LCTL set_param fail_val=0
514 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
515 [ "$STATUS" == "completed" ] ||
516 error "(8) Expect 'completed', but got '$STATUS'"
518 run_test 6a "LFSCK resumes from last checkpoint (1)"
522 echo "start $SINGLEMDS"
523 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
524 error "(1) Fail to start MDS!"
526 #define OBD_FAIL_LFSCK_DELAY2 0x1601
527 do_facet $SINGLEMDS $LCTL set_param fail_val=1
528 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
529 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
531 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
532 [ "$STATUS" == "scanning-phase1" ] ||
533 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
535 # Sleep 3 sec to guarantee at least one object processed by LFSCK
537 # Fail the LFSCK to guarantee there is at least one checkpoint
538 #define OBD_FAIL_LFSCK_FATAL2 0x1609
539 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
541 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
542 [ "$STATUS" == "failed" ] ||
543 error "(4) Expect 'failed', but got '$STATUS'"
545 local POSITION0=$($SHOW_NAMESPACE |
546 awk '/^last_checkpoint_position/ { print $4 }')
548 #define OBD_FAIL_LFSCK_DELAY2 0x1601
549 do_facet $SINGLEMDS $LCTL set_param fail_val=1
550 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
551 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
553 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
554 [ "$STATUS" == "scanning-phase1" ] ||
555 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
557 local POSITION1=$($SHOW_NAMESPACE |
558 awk '/^latest_start_position/ { print $4 }')
559 if [ $POSITION0 -gt $POSITION1 ]; then
560 [ $POSITION1 -eq 0 -a $POSITION0 -eq $((POSITION1 + 1)) ] ||
561 error "(7) Expect larger than: $POSITION0, but got $POSITION1"
564 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
565 do_facet $SINGLEMDS $LCTL set_param fail_val=0
567 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
568 [ "$STATUS" == "completed" ] ||
569 error "(8) Expect 'completed', but got '$STATUS'"
571 run_test 6b "LFSCK resumes from last checkpoint (2)"
576 echo "start $SINGLEMDS"
577 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
578 error "(1) Fail to start MDS!"
580 #define OBD_FAIL_LFSCK_DELAY2 0x1601
581 do_facet $SINGLEMDS $LCTL set_param fail_val=1
582 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
583 $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
585 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
586 [ "$STATUS" == "scanning-phase1" ] ||
587 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
589 # Sleep 3 sec to guarantee at least one object processed by LFSCK
591 echo "stop $SINGLEMDS"
592 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
594 echo "start $SINGLEMDS"
595 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
596 error "(5) Fail to start MDS!"
598 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
599 [ "$STATUS" == "scanning-phase1" ] ||
600 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
602 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
603 do_facet $SINGLEMDS $LCTL set_param fail_val=0
605 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
606 [ "$STATUS" == "completed" ] ||
607 error "(7) Expect 'completed', but got '$STATUS'"
609 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
614 echo "start $SINGLEMDS"
615 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
616 error "(1) Fail to start MDS!"
618 mount_client $MOUNT || error "(2) Fail to start client!"
620 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
621 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
622 for ((i = 0; i < 20; i++)); do
623 touch $DIR/$tdir/dummy${i}
626 #define OBD_FAIL_LFSCK_DELAY3 0x1602
627 do_facet $SINGLEMDS $LCTL set_param fail_val=1
628 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
629 $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
632 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
633 [ "$STATUS" == "scanning-phase2" ] ||
634 error "(4) Expect 'scanning-phase2', but got '$STATUS'"
636 echo "stop $SINGLEMDS"
637 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
639 echo "start $SINGLEMDS"
640 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
641 error "(6) Fail to start MDS!"
643 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
644 [ "$STATUS" == "scanning-phase2" ] ||
645 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
647 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
648 do_facet $SINGLEMDS $LCTL set_param fail_val=0
650 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
651 [ "$STATUS" == "completed" ] ||
652 error "(8) Expect 'completed', but got '$STATUS'"
654 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
659 echo "start $SINGLEMDS"
660 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
661 error "(1) Fail to start MDS!"
663 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
664 [ "$STATUS" == "init" ] ||
665 error "(2) Expect 'init', but got '$STATUS'"
667 mount_client $MOUNT || error "(3) Fail to start client!"
669 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
670 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
671 mkdir $DIR/$tdir/crashed
673 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
674 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
675 for ((i = 0; i < 5; i++)); do
676 touch $DIR/$tdir/dummy${i}
679 #define OBD_FAIL_LFSCK_DELAY2 0x1601
680 do_facet $SINGLEMDS $LCTL set_param fail_val=2
681 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
682 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
684 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
685 [ "$STATUS" == "scanning-phase1" ] ||
686 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
688 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
690 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
691 [ "$STATUS" == "stopped" ] ||
692 error "(7) Expect 'stopped', but got '$STATUS'"
694 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
696 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
697 [ "$STATUS" == "scanning-phase1" ] ||
698 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
700 #define OBD_FAIL_LFSCK_FATAL2 0x1609
701 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
703 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
704 [ "$STATUS" == "failed" ] ||
705 error "(10) Expect 'failed', but got '$STATUS'"
707 #define OBD_FAIL_LFSCK_DELAY1 0x1600
708 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
709 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
711 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
712 [ "$STATUS" == "scanning-phase1" ] ||
713 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
715 #define OBD_FAIL_LFSCK_CRASH 0x160a
716 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
719 echo "stop $SINGLEMDS"
720 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
722 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
723 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
725 echo "start $SINGLEMDS"
726 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
727 error "(14) Fail to start MDS!"
729 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
730 [ "$STATUS" == "crashed" ] ||
731 error "(15) Expect 'crashed', but got '$STATUS'"
733 #define OBD_FAIL_LFSCK_DELAY2 0x1601
734 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
735 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
737 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
738 [ "$STATUS" == "scanning-phase1" ] ||
739 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
741 echo "stop $SINGLEMDS"
742 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
744 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
745 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
747 echo "start $SINGLEMDS"
748 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
749 error "(19) Fail to start MDS!"
751 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
752 [ "$STATUS" == "paused" ] ||
753 error "(20) Expect 'paused', but got '$STATUS'"
755 #define OBD_FAIL_LFSCK_DELAY3 0x1602
756 do_facet $SINGLEMDS $LCTL set_param fail_val=2
757 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
759 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
761 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
762 [ "$STATUS" == "scanning-phase2" ] ||
763 error "(22) Expect 'scanning-phase2', but got '$STATUS'"
765 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
766 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
767 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
769 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
770 do_facet $SINGLEMDS $LCTL set_param fail_val=0
772 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
773 [ "$STATUS" == "completed" ] ||
774 error "(24) Expect 'completed', but got '$STATUS'"
776 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
777 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
780 run_test 8 "LFSCK state machine"
783 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
784 skip "Testing on UP system, the speed may be inaccurate."
789 echo "start $SINGLEMDS"
790 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
791 error "(1) Fail to start MDS!"
793 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
794 [ "$STATUS" == "init" ] ||
795 error "(2) Expect 'init', but got '$STATUS'"
797 local BASE_SPEED1=100
799 $START_NAMESPACE -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
802 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
803 [ "$STATUS" == "scanning-phase1" ] ||
804 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
806 local SPEED=$($SHOW_NAMESPACE |
807 awk '/^average_speed_phase1/ { print $2 }')
809 # There may be time error, normally it should be less than 2 seconds.
810 # We allow another 20% schedule error.
812 # MAX_MARGIN = 1.2 = 12 / 10
813 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
814 RUN_TIME1 * 12 / 10))
815 [ $SPEED -lt $MAX_SPEED ] ||
816 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
819 local BASE_SPEED2=300
821 do_facet $SINGLEMDS \
822 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
825 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
826 # MIN_MARGIN = 0.8 = 8 / 10
827 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
828 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
829 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
830 [ $SPEED -gt $MIN_SPEED ] ||
831 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
833 # MAX_MARGIN = 1.2 = 12 / 10
834 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
835 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
836 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
837 [ $SPEED -lt $MAX_SPEED ] ||
838 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
840 do_facet $SINGLEMDS \
841 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
843 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
844 [ "$STATUS" == "completed" ] ||
845 error "(7) Expect 'completed', but got '$STATUS'"
847 run_test 9a "LFSCK speed control (1)"
850 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
851 skip "Testing on UP system, the speed may be inaccurate."
856 echo "start $SINGLEMDS"
857 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
858 error "(1) Fail to start MDS!"
860 mount_client $MOUNT || error "(2) Fail to start client!"
862 echo "Another preparing... 50 * 50 files (with error) will be created."
863 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
864 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
865 for ((i = 0; i < 50; i++)); do
866 mkdir -p $DIR/$tdir/d${i}
867 touch $DIR/$tdir/f${i}
868 for ((j = 0; j < 50; j++)); do
869 touch $DIR/$tdir/d${i}/f${j}
873 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
874 [ "$STATUS" == "init" ] ||
875 error "(3) Expect 'init', but got '$STATUS'"
877 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
878 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
879 $START_NAMESPACE || error "(4) Fail to start LFSCK!"
882 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
883 [ "$STATUS" == "stopped" ] ||
884 error "(5) Expect 'stopped', but got '$STATUS'"
886 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
890 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
893 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
894 [ "$STATUS" == "scanning-phase2" ] ||
895 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
897 local SPEED=$($SHOW_NAMESPACE |
898 awk '/^average_speed_phase2/ { print $2 }')
899 # There may be time error, normally it should be less than 2 seconds.
900 # We allow another 20% schedule error.
902 # MAX_MARGIN = 1.2 = 12 / 10
903 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
904 RUN_TIME1 * 12 / 10))
905 [ $SPEED -lt $MAX_SPEED ] ||
906 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
909 local BASE_SPEED2=150
911 do_facet $SINGLEMDS \
912 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
915 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
916 # MIN_MARGIN = 0.8 = 8 / 10
917 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
918 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
919 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
920 [ $SPEED -gt $MIN_SPEED ] ||
921 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
923 # MAX_MARGIN = 1.2 = 12 / 10
924 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
925 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
926 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
927 [ $SPEED -lt $MAX_SPEED ] ||
928 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
930 do_facet $SINGLEMDS \
931 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
933 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
934 [ "$STATUS" == "completed" ] ||
935 error "(11) Expect 'completed', but got '$STATUS'"
937 run_test 9b "LFSCK speed control (2)"
942 echo "start $SINGLEMDS"
943 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
944 error "(1) Fail to start MDS!"
946 mount_client $MOUNT || error "(2) Fail to start client!"
948 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
949 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
950 for ((i = 0; i < 1000; i = $((i+2)))); do
951 mkdir -p $DIR/$tdir/d${i}
952 touch $DIR/$tdir/f${i}
953 for ((j = 0; j < 5; j++)); do
954 touch $DIR/$tdir/d${i}/f${j}
958 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
959 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
960 for ((i = 1; i < 1000; i = $((i+2)))); do
961 mkdir -p $DIR/$tdir/d${i}
962 touch $DIR/$tdir/f${i}
963 for ((j = 0; j < 5; j++)); do
964 touch $DIR/$tdir/d${i}/f${j}
968 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
969 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
972 mount_client $MOUNT || error "(3) Fail to start client!"
974 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
975 [ "$STATUS" == "init" ] ||
976 error "(4) Expect 'init', but got '$STATUS'"
978 $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!"
981 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
982 [ "$STATUS" == "scanning-phase1" ] ||
983 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
985 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
987 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
989 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
991 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
993 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
995 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
997 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
999 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
1000 error "(14) Fail to softlink!"
1002 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1003 [ "$STATUS" == "scanning-phase1" ] ||
1004 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
1006 do_facet $SINGLEMDS \
1007 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1008 umount_client $MOUNT
1010 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
1011 [ "$STATUS" == "completed" ] ||
1012 error "(16) Expect 'completed', but got '$STATUS'"
1014 run_test 10 "System is available during LFSCK scanning"
1017 ost_remove_lastid() {
1020 local rcmd="do_facet ost${ost}"
1022 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1024 # step 1: local mount
1025 mount_fstype ost${ost} || return 1
1026 # step 2: remove the specified LAST_ID
1027 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/LAST_ID
1029 unmount_fstype ost${ost} || return 2
1036 formatall > /dev/null
1038 setupall > /dev/null
1041 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1042 createmany -o $DIR/$tdir/f 64
1047 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1050 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1051 error "(2) Fail to start ost1"
1053 local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }')
1054 [ "$STATUS" == "init" ] ||
1055 error "(3) Expect 'init', but got '$STATUS'"
1057 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1058 do_facet ost1 $LCTL set_param fail_val=3
1059 do_facet ost1 $LCTL set_param fail_loc=0x160e
1061 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1062 $START_LAYOUT_ON_OST || error "(4) Fail to start LFSCK on OST!"
1064 wait_update_facet ost1 "$LCTL get_param -n \
1065 obdfilter.${OST_DEV}.lfsck_layout |
1066 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1071 do_facet ost1 $LCTL set_param fail_val=0
1072 do_facet ost1 $LCTL set_param fail_loc=0
1074 wait_update_facet ost1 "$LCTL get_param -n \
1075 obdfilter.${OST_DEV}.lfsck_layout |
1076 awk '/^status/ { print \\\$2 }'" "completed" 3 || {
1081 echo "the LAST_ID(s) should have been rebuilt"
1082 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1083 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1085 run_test 11a "LFSCK can rebuild lost last_id"
1091 formatall > /dev/null
1093 setupall > /dev/null
1096 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1098 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1099 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1100 do_facet ost1 $LCTL set_param fail_loc=0x160d
1101 createmany -o $DIR/$tdir/f 64
1102 local lastid1=$(do_facet ost1 "lctl get_param -n \
1103 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1104 awk -F: '{ print $2 }')
1106 umount_client $MOUNT
1108 stop ost1 || error "(1) Fail to stop ost1"
1110 #define OBD_FAIL_OST_ENOSPC 0x215
1111 do_facet ost1 $LCTL set_param fail_loc=0x215
1114 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1115 error "(2) Fail to start ost1"
1117 local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }')
1118 [ "$STATUS" == "init" ] ||
1119 error "(3) Expect 'init', but got '$STATUS'"
1121 for ((i = 0; i < 60; i++)); do
1122 lastid2=$(do_facet ost1 "lctl get_param -n \
1123 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1124 awk -F: '{ print $2 }')
1125 [ ! -z $lastid2 ] && break;
1129 echo "the on-disk LAST_ID should be smaller than the expected one"
1130 [ $lastid1 -gt $lastid2 ] ||
1131 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1133 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1134 $START_LAYOUT_ON_OST || error "(5) Fail to start LFSCK on OST!"
1136 wait_update_facet ost1 "$LCTL get_param -n \
1137 obdfilter.${OST_DEV}.lfsck_layout |
1138 awk '/^status/ { print \\\$2 }'" "completed" 3 || {
1144 stop ost1 || error "(7) Fail to stop ost1"
1147 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1148 error "(8) Fail to start ost1"
1150 echo "the on-disk LAST_ID should have been rebuilt"
1151 wait_update_facet ost1 "$LCTL get_param -n \
1152 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1153 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1154 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1155 error "(9) expect lastid1 0x100000000:$lastid1"
1158 do_facet ost1 $LCTL set_param fail_loc=0
1160 run_test 11b "LFSCK can rebuild crashed last_id"
1163 [ $MDSCOUNT -lt 2 ] &&
1164 skip "We need at least 2 MDSes for test_12" && exit 0
1169 formatall > /dev/null
1171 setupall > /dev/null
1175 echo "All the LFSCK targets should be in 'init' status."
1176 for k in $(seq $MDSCOUNT); do
1177 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1178 mdd.$(facet_svc mds${k}).lfsck_layout |
1179 awk '/^status/ { print $2 }')
1180 [ "$STATUS" == "init" ] ||
1181 error "(1) MDS${k} Expect 'init', but got '$STATUS'"
1183 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1184 createmany -o $DIR/$tdir/${k}/f 100
1187 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1188 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1189 -s 1 || error "(2) Fail to start LFSCK on all devices!"
1191 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1192 for k in $(seq $MDSCOUNT); do
1193 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1194 mdd.$(facet_svc mds${k}).lfsck_namespace |
1195 awk '/^status/ { print $2 }')
1196 [ "$STATUS" == "scanning-phase1" ] ||
1197 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1200 echo "Stop namespace LFSCK on all targets by single lctl command."
1201 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1202 error "(4) Fail to stop LFSCK on all devices!"
1204 echo "All the LFSCK targets should be in 'stopped' status."
1205 for k in $(seq $MDSCOUNT); do
1206 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1207 mdd.$(facet_svc mds${k}).lfsck_namespace |
1208 awk '/^status/ { print $2 }')
1209 [ "$STATUS" == "stopped" ] ||
1210 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1213 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1214 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1215 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1217 echo "All the LFSCK targets should be in 'completed' status."
1218 for k in $(seq $MDSCOUNT); do
1219 wait_update_facet mds${k} "$LCTL get_param -n \
1220 mdd.$(facet_svc mds${k}).lfsck_namespace |
1221 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1222 error "(7) MDS${k} is not the expected 'completed'"
1225 echo "Start layout LFSCK on all targets by single command (-s 1)."
1226 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1227 -s 1 || error "(8) Fail to start LFSCK on all devices!"
1229 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1230 for k in $(seq $MDSCOUNT); do
1231 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1232 mdd.$(facet_svc mds${k}).lfsck_layout |
1233 awk '/^status/ { print $2 }')
1234 [ "$STATUS" == "scanning-phase1" ] ||
1235 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1238 echo "Stop layout LFSCK on all targets by single lctl command."
1239 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1240 error "(10) Fail to stop LFSCK on all devices!"
1242 echo "All the LFSCK targets should be in 'stopped' status."
1243 for k in $(seq $MDSCOUNT); do
1244 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1245 mdd.$(facet_svc mds${k}).lfsck_layout |
1246 awk '/^status/ { print $2 }')
1247 [ "$STATUS" == "stopped" ] ||
1248 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1251 for k in $(seq $OSTCOUNT); do
1252 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1253 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1254 awk '/^status/ { print $2 }')
1255 [ "$STATUS" == "stopped" ] ||
1256 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1259 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1260 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1261 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1263 echo "All the LFSCK targets should be in 'completed' status."
1264 for k in $(seq $MDSCOUNT); do
1265 # The LFSCK status query internal is 30 seconds. For the case
1266 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1267 # time to guarantee the status sync up.
1268 wait_update_facet mds${k} "$LCTL get_param -n \
1269 mdd.$(facet_svc mds${k}).lfsck_layout |
1270 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1271 error "(14) MDS${k} is not the expected 'completed'"
1274 run_test 12 "single command to trigger LFSCK on all devices"
1278 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1279 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1280 echo "MDT-object FID."
1286 formatall > /dev/null
1288 setupall > /dev/null
1292 echo "Inject failure stub to simulate bad lmm_oi"
1293 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1295 createmany -o $DIR/$tdir/f 32
1296 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1298 echo "stopall to cleanup object cache"
1301 setupall > /dev/null
1303 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1304 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1306 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1307 mdd.${MDT_DEV}.lfsck_layout |
1308 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1310 local repaired=$($SHOW_LAYOUT |
1311 awk '/^repaired_others/ { print $2 }')
1312 [ $repaired -eq 32 ] ||
1313 error "(3) Fail to repair crashed lmm_oi: $repaired"
1315 run_test 13 "LFSCK can repair crashed lmm_oi"
1319 echo "The OST-object referenced by the MDT-object should be there;"
1320 echo "otherwise, the LFSCK should re-create the missed OST-object."
1326 formatall > /dev/null
1328 setupall > /dev/null
1331 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1333 echo "Inject failure stub to simulate dangling referenced MDT-object"
1334 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1335 do_facet ost1 $LCTL set_param fail_loc=0x1610
1336 createmany -o $DIR/$tdir/f 64
1337 do_facet ost1 $LCTL set_param fail_loc=0
1339 echo "stopall to cleanup object cache"
1342 setupall > /dev/null
1344 echo "'ls' should fail because of dangling referenced MDT-object"
1345 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1347 echo "Trigger layout LFSCK to find out dangling reference and fix them"
1348 $START_LAYOUT || error "(2) Fail to start LFSCK for layout!"
1350 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1351 mdd.${MDT_DEV}.lfsck_layout |
1352 awk '/^status/ { print \\\$2 }'" "completed" 6 || return 3
1354 local repaired=$($SHOW_LAYOUT |
1355 awk '/^repaired_dangling/ { print $2 }')
1356 [ $repaired -eq 32 ] ||
1357 error "(4) Fail to repair dangling reference: $repaired"
1359 echo "'ls' should success after layout LFSCK repairing"
1360 ls -ail $DIR/$tdir > /dev/null || error "(5) ls should success."
1362 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1366 echo "If the OST-object referenced by the MDT-object back points"
1367 echo "to some non-exist MDT-object, then the LFSCK should repair"
1368 echo "the OST-object to back point to the right MDT-object."
1374 formatall > /dev/null
1376 setupall > /dev/null
1379 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1381 echo "Inject failure stub to make the OST-object to back point to"
1382 echo "non-exist MDT-object."
1383 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1385 do_facet ost1 $LCTL set_param fail_loc=0x1611
1386 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1387 cancel_lru_locks osc
1390 do_facet ost1 $LCTL set_param fail_loc=0
1392 echo "stopall to cleanup object cache"
1395 setupall > /dev/null
1397 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1398 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1400 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1401 mdd.${MDT_DEV}.lfsck_layout |
1402 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1404 local repaired=$($SHOW_LAYOUT |
1405 awk '/^repaired_unmatched_pair/ { print $2 }')
1406 [ $repaired -eq 1 ] ||
1407 error "(3) Fail to repair unmatched pair: $repaired"
1409 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1413 echo "If the OST-object referenced by the MDT-object back points"
1414 echo "to other MDT-object that doesn't recognize the OST-object,"
1415 echo "then the LFSCK should repair it to back point to the right"
1416 echo "MDT-object (the first one)."
1422 formatall > /dev/null
1424 setupall > /dev/null
1427 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1428 touch $DIR/$tdir/guard
1430 echo "Inject failure stub to make the OST-object to back point to"
1431 echo "other MDT-object"
1433 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1434 do_facet ost1 $LCTL set_param fail_loc=0x1612
1435 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1436 cancel_lru_locks osc
1439 do_facet ost1 $LCTL set_param fail_loc=0
1441 echo "stopall to cleanup object cache"
1444 setupall > /dev/null
1446 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1447 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1449 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1450 mdd.${MDT_DEV}.lfsck_layout |
1451 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1453 local repaired=$($SHOW_LAYOUT |
1454 awk '/^repaired_unmatched_pair/ { print $2 }')
1455 [ $repaired -eq 1 ] ||
1456 error "(3) Fail to repair unmatched pair: $repaired"
1458 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1462 echo "If the OST-object's owner information does not match the owner"
1463 echo "information stored in the MDT-object, then the LFSCK trust the"
1464 echo "MDT-object and update the OST-object's owner information."
1470 formatall > /dev/null
1472 setupall > /dev/null
1475 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1476 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1477 cancel_lru_locks osc
1481 echo "Inject failure stub to skip OST-object owner changing"
1482 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1483 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1484 chown 1.1 $DIR/$tdir/f0
1485 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1487 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1490 $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
1492 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1493 mdd.${MDT_DEV}.lfsck_layout |
1494 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
1496 local repaired=$($SHOW_LAYOUT |
1497 awk '/^repaired_inconsistent_owner/ { print $2 }')
1498 [ $repaired -eq 1 ] ||
1499 error "(3) Fail to repair inconsistent owner: $repaired"
1501 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1505 echo "If more than one MDT-objects reference the same OST-object,"
1506 echo "and the OST-object only recognizes one MDT-object, then the"
1507 echo "LFSCK should create new OST-objects for such non-recognized"
1514 formatall > /dev/null
1516 setupall > /dev/null
1519 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1521 echo "Inject failure stub to make two MDT-objects to refernce"
1522 echo "the OST-object"
1524 do_facet $SINGLEMDS $LCTL set_param fail_val=0
1525 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1526 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1614
1528 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1529 cancel_lru_locks osc
1533 createmany -o $DIR/$tdir/f 1 > /dev/null 2>&1
1535 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1536 do_facet $SINGLEMDS $LCTL set_param fail_val=0
1538 echo "stopall to cleanup object cache"
1541 setupall > /dev/null
1543 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1544 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1545 [ $size -eq 1048576 ] ||
1546 error "(1) f0 (wrong) size should be 1048576, but got $size"
1548 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1551 $START_LAYOUT || error "(2) Fail to start LFSCK for layout!"
1553 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1554 mdd.${MDT_DEV}.lfsck_layout |
1555 awk '/^status/ { print \\\$2 }'" "completed" 3 || return 3
1557 local repaired=$($SHOW_LAYOUT |
1558 awk '/^repaired_multiple_referenced/ { print $2 }')
1559 [ $repaired -eq 1 ] ||
1560 error "(4) Fail to repair multiple references: $repaired"
1562 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1563 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1564 error "(5) Fail to write f0."
1565 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1566 [ $size -eq 1048576 ] ||
1567 error "(6) guard size should be 1048576, but got $size"
1569 run_test 17 "LFSCK can repair multiple references"
1572 [ $MDSCOUNT -lt 2 ] &&
1573 skip "We need at least 2 MDSes for test_18a" && exit 0
1575 [ $OSTCOUNT -lt 2 ] &&
1576 skip "We need at least 2 OSTs for test_18a" && exit 0
1579 echo "The target MDT-object is there, but related stripe information"
1580 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1581 echo "layout EA entries."
1587 formatall > /dev/null
1589 setupall > /dev/null
1592 $LFS mkdir -i 0 $DIR/$tdir/a1
1593 $LFS mkdir -i 1 $DIR/$tdir/a2
1594 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1595 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1596 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1597 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1599 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1601 $LFS path2fid $DIR/$tdir/a1/f1
1602 $LFS getstripe $DIR/$tdir/a1/f1
1603 $LFS path2fid $DIR/$tdir/a2/f2
1604 $LFS getstripe $DIR/$tdir/a2/f2
1606 cancel_lru_locks osc
1608 echo "Inject failure, to make the MDT-object lost its layout EA"
1609 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1610 do_facet mds1 $LCTL set_param fail_loc=0x1615
1611 chown 1.1 $DIR/$tdir/a1/f1
1612 do_facet mds2 $LCTL set_param fail_loc=0x1615
1613 chown 1.1 $DIR/$tdir/a2/f2
1616 do_facet mds1 $LCTL set_param fail_loc=0
1617 do_facet mds2 $LCTL set_param fail_loc=0
1619 echo "stopall to cleanup object cache"
1622 setupall > /dev/null
1624 echo "The file size should be incorrect since layout EA is lost"
1625 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1626 [ "$cur_size" != "$saved_size" ] ||
1627 error "(1) Expect incorrect file1 size"
1629 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1630 [ "$cur_size" != "$saved_size" ] ||
1631 error "(2) Expect incorrect file2 size"
1633 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1634 $START_LAYOUT -o || error "(3) Fail to start LFSCK for layout!"
1636 for k in $(seq $MDSCOUNT); do
1637 # The LFSCK status query internal is 30 seconds. For the case
1638 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1639 # time to guarantee the status sync up.
1640 wait_update_facet mds${k} "$LCTL get_param -n \
1641 mdd.$(facet_svc mds${k}).lfsck_layout |
1642 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1643 error "(4) MDS${k} is not the expected 'completed'"
1646 for k in $(seq $OSTCOUNT); do
1647 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1648 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1649 awk '/^status/ { print $2 }')
1650 [ "$cur_status" == "completed" ] ||
1651 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1655 local repaired=$(do_facet mds${k} $LCTL get_param -n \
1656 mdd.$(facet_svc mds${k}).lfsck_layout |
1657 awk '/^repaired_orphan/ { print $2 }')
1658 [ $repaired -eq ${k} ] ||
1659 error "(6) Expect ${k} fixed on mds${k}, but got: $repaired"
1662 $LFS path2fid $DIR/$tdir/a1/f1
1663 $LFS getstripe $DIR/$tdir/a1/f1
1664 $LFS path2fid $DIR/$tdir/a2/f2
1665 $LFS getstripe $DIR/$tdir/a2/f2
1667 echo "The file size should be correct after layout LFSCK scanning"
1668 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1669 [ "$cur_size" == "$saved_size" ] ||
1670 error "(7) Expect file1 size $saved_size, but got $cur_size"
1672 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1673 [ "$cur_size" == "$saved_size" ] ||
1674 error "(8) Expect file2 size $saved_size, but got $cur_size"
1676 run_test 18a "Find out orphan OST-object and repair it (1)"
1678 $LCTL set_param debug=-lfsck > /dev/null || true
1680 # restore MDS/OST size
1681 MDSSIZE=${SAVED_MDSSIZE}
1682 OSTSIZE=${SAVED_OSTSIZE}
1684 # cleanup the system at last