3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
21 skip "test LFSCK only for ldiskfs" && exit 0
22 require_dsh_mds || exit 0
24 MCREATE=${MCREATE:-mcreate}
25 SAVED_MDSSIZE=${MDSSIZE}
26 SAVED_OSTSIZE=${OSTSIZE}
27 SAVED_OSTCOUNT=${OSTCOUNT}
28 # use small MDS + OST size to speed formatting time
29 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
32 # no need too much OSTs, to reduce the format/start/stop overhead
33 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
35 # build up a clean test environment.
39 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
40 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
43 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
44 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
48 $LCTL set_param debug=+lfsck > /dev/null || true
50 MDT_DEV="${FSNAME}-MDT0000"
51 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
52 START_NAMESPACE="do_facet $SINGLEMDS \
53 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
54 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
55 SHOW_NAMESPACE="do_facet $SINGLEMDS \
56 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
57 MOUNT_OPTS_SCRUB="-o user_xattr"
58 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
67 echo "preparing... $nfiles * $ndirs files will be created $(date)."
68 if [ ! -z $igif ]; then
69 #define OBD_FAIL_FID_IGIF 0x1504
70 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
73 cp $LUSTRE/tests/*.sh $DIR/$tdir/
74 if [ $ndirs -gt 0 ]; then
75 createmany -d $DIR/$tdir/d $ndirs
76 createmany -m $DIR/$tdir/f $ndirs
77 if [ $nfiles -gt 0 ]; then
78 for ((i = 0; i < $ndirs; i++)); do
79 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
80 /dev/null || error "createmany $nfiles"
83 createmany -d $DIR/$tdir/e $ndirs
86 if [ ! -z $igif ]; then
87 touch $DIR/$tdir/dummy
88 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
91 echo "prepared $(date)."
97 #define OBD_FAIL_LFSCK_DELAY1 0x1600
98 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
99 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
101 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
103 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
104 [ "$STATUS" == "scanning-phase1" ] ||
105 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
107 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
109 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
110 [ "$STATUS" == "stopped" ] ||
111 error "(6) Expect 'stopped', but got '$STATUS'"
113 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
115 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
116 [ "$STATUS" == "scanning-phase1" ] ||
117 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
119 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
120 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
121 mdd.${MDT_DEV}.lfsck_namespace |
122 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
124 error "(9) unexpected status"
127 local repaired=$($SHOW_NAMESPACE |
128 awk '/^updated_phase1/ { print $2 }')
129 [ $repaired -eq 0 ] ||
130 error "(10) Expect nothing to be repaired, but got: $repaired"
132 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
133 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
134 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
135 mdd.${MDT_DEV}.lfsck_namespace |
136 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
138 error "(12) unexpected status"
141 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
142 [ $((scanned1 + 1)) -eq $scanned2 ] ||
143 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
145 echo "stopall, should NOT crash LU-3649"
146 stopall || error "(14) Fail to stopall"
148 run_test 0 "Control LFSCK manually"
153 #define OBD_FAIL_FID_INDIR 0x1501
154 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
155 touch $DIR/$tdir/dummy
157 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
159 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
160 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
161 mdd.${MDT_DEV}.lfsck_namespace |
162 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
164 error "(4) unexpected status"
167 local repaired=$($SHOW_NAMESPACE |
168 awk '/^updated_phase1/ { print $2 }')
169 [ $repaired -eq 1 ] ||
170 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
172 mount_client $MOUNT || error "(6) Fail to start client!"
174 #define OBD_FAIL_FID_LOOKUP 0x1505
175 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
176 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
178 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
181 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
187 #define OBD_FAIL_FID_INLMA 0x1502
188 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
189 touch $DIR/$tdir/dummy
191 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
193 #define OBD_FAIL_FID_NOLMA 0x1506
194 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
195 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
196 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
197 mdd.${MDT_DEV}.lfsck_namespace |
198 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
200 error "(4) unexpected status"
203 local repaired=$($SHOW_NAMESPACE |
204 awk '/^updated_phase1/ { print $2 }')
205 [ $repaired -eq 1 ] ||
206 error "(5) Fail to repair missed FID-in-LMA: $repaired"
208 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
209 mount_client $MOUNT || error "(6) Fail to start client!"
211 #define OBD_FAIL_FID_LOOKUP 0x1505
212 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
213 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
215 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
218 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
223 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
224 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
225 touch $DIR/$tdir/dummy
227 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
229 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
230 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
231 mdd.${MDT_DEV}.lfsck_namespace |
232 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
234 error "(4) unexpected status"
237 local repaired=$($SHOW_NAMESPACE |
238 awk '/^updated_phase1/ { print $2 }')
239 [ $repaired -eq 1 ] ||
240 error "(5) Fail to repair crashed linkEA: $repaired"
242 mount_client $MOUNT || error "(6) Fail to start client!"
244 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
245 error "(7) Fail to stat $DIR/$tdir/dummy"
247 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
248 local dummyname=$($LFS fid2path $DIR $dummyfid)
249 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
250 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
252 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
258 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
259 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
260 touch $DIR/$tdir/dummy
262 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
264 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
265 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
266 mdd.${MDT_DEV}.lfsck_namespace |
267 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
269 error "(4) unexpected status"
272 local repaired=$($SHOW_NAMESPACE |
273 awk '/^updated_phase2/ { print $2 }')
274 [ $repaired -eq 1 ] ||
275 error "(5) Fail to repair crashed linkEA: $repaired"
277 mount_client $MOUNT || error "(6) Fail to start client!"
279 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
280 error "(7) Fail to stat $DIR/$tdir/dummy"
282 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
283 local dummyname=$($LFS fid2path $DIR $dummyfid)
284 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
285 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
287 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
293 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
295 touch $DIR/$tdir/dummy
297 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
299 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
300 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
301 mdd.${MDT_DEV}.lfsck_namespace |
302 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
304 error "(4) unexpected status"
307 local repaired=$($SHOW_NAMESPACE |
308 awk '/^updated_phase2/ { print $2 }')
309 [ $repaired -eq 1 ] ||
310 error "(5) Fail to repair crashed linkEA: $repaired"
312 mount_client $MOUNT || error "(6) Fail to start client!"
314 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
315 error "(7) Fail to stat $DIR/$tdir/dummy"
317 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
318 local dummyname=$($LFS fid2path $DIR $dummyfid)
319 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
320 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
322 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
327 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
328 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
330 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
331 echo "start $SINGLEMDS with disabling OI scrub"
332 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
333 error "(2) Fail to start MDS!"
335 #define OBD_FAIL_LFSCK_DELAY2 0x1601
336 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
337 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
338 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
339 mdd.${MDT_DEV}.lfsck_namespace |
340 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
342 error "(5) unexpected status"
345 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
346 [ "$STATUS" == "scanning-phase1" ] ||
347 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
349 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
350 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
351 mdd.${MDT_DEV}.lfsck_namespace |
352 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
354 error "(7) unexpected status"
357 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
358 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
360 local repaired=$($SHOW_NAMESPACE |
361 awk '/^updated_phase1/ { print $2 }')
362 [ $repaired -ge 9 ] ||
363 error "(9) Fail to repair crashed linkEA: $repaired"
365 mount_client $MOUNT || error "(10) Fail to start client!"
367 #define OBD_FAIL_FID_LOOKUP 0x1505
368 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
369 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
371 local server_version=$(lustre_version_code $SINGLEMDS)
372 if [[ $server_version -ge $(version_code 2.5.58) ]] ||
373 [[ $server_version -ge $(version_code 2.5.4) &&
374 $server_version -lt $(version_code 2.5.11) ]]; then
375 local count=$(ls -al $DIR/$tdir | wc -l)
376 [ $count -gt 9 ] || error "(12) namespace LFSCK failed"
379 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
382 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
387 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
388 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
390 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
391 echo "start $SINGLEMDS with disabling OI scrub"
392 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
393 error "(2) Fail to start MDS!"
395 #define OBD_FAIL_LFSCK_DELAY2 0x1601
396 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
397 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
398 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
399 mdd.${MDT_DEV}.lfsck_namespace |
400 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
402 error "(5) unexpected status"
405 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
406 [ "$STATUS" == "scanning-phase1" ] ||
407 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
409 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
410 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
411 mdd.${MDT_DEV}.lfsck_namespace |
412 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
414 error "(7) unexpected status"
417 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
418 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
420 local repaired=$($SHOW_NAMESPACE |
421 awk '/^updated_phase1/ { print $2 }')
422 [ $repaired -ge 2 ] ||
423 error "(9) Fail to repair crashed linkEA: $repaired"
425 mount_client $MOUNT || error "(10) Fail to start client!"
427 #define OBD_FAIL_FID_LOOKUP 0x1505
428 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
429 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
431 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
433 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
435 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
436 local dummyname=$($LFS fid2path $DIR $dummyfid)
437 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
438 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
440 run_test 5 "LFSCK can handle IGIF object upgrading"
445 #define OBD_FAIL_LFSCK_DELAY1 0x1600
446 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
447 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
449 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
450 [ "$STATUS" == "scanning-phase1" ] ||
451 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
453 # Sleep 3 sec to guarantee at least one object processed by LFSCK
455 # Fail the LFSCK to guarantee there is at least one checkpoint
456 #define OBD_FAIL_LFSCK_FATAL1 0x1608
457 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
458 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
459 mdd.${MDT_DEV}.lfsck_namespace |
460 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
462 error "(4) unexpected status"
465 local POS0=$($SHOW_NAMESPACE |
466 awk '/^last_checkpoint_position/ { print $2 }' |
469 #define OBD_FAIL_LFSCK_DELAY1 0x1600
470 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
471 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
473 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
474 [ "$STATUS" == "scanning-phase1" ] ||
475 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
477 local POS1=$($SHOW_NAMESPACE |
478 awk '/^latest_start_position/ { print $2 }' |
480 [ $POS0 -lt $POS1 ] ||
481 error "(7) Expect larger than: $POS0, but got $POS1"
483 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
484 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
485 mdd.${MDT_DEV}.lfsck_namespace |
486 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
488 error "(8) unexpected status"
491 run_test 6a "LFSCK resumes from last checkpoint (1)"
496 #define OBD_FAIL_LFSCK_DELAY2 0x1601
497 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
498 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
500 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
501 [ "$STATUS" == "scanning-phase1" ] ||
502 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
504 # Sleep 5 sec to guarantee that we are in the directory scanning
506 # Fail the LFSCK to guarantee there is at least one checkpoint
507 #define OBD_FAIL_LFSCK_FATAL2 0x1609
508 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
509 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
510 mdd.${MDT_DEV}.lfsck_namespace |
511 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
513 error "(4) unexpected status"
516 local O_POS0=$($SHOW_NAMESPACE |
517 awk '/^last_checkpoint_position/ { print $2 }' |
520 local D_POS0=$($SHOW_NAMESPACE |
521 awk '/^last_checkpoint_position/ { print $4 }')
523 #define OBD_FAIL_LFSCK_DELAY2 0x1601
524 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
525 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
527 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
528 [ "$STATUS" == "scanning-phase1" ] ||
529 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
531 local O_POS1=$($SHOW_NAMESPACE |
532 awk '/^latest_start_position/ { print $2 }' |
534 local D_POS1=$($SHOW_NAMESPACE |
535 awk '/^latest_start_position/ { print $4 }')
537 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
538 [ $O_POS0 -lt $O_POS1 ] ||
539 error "(7.1) $O_POS1 is not larger than $O_POS0"
541 [ $D_POS0 -lt $D_POS1 ] ||
542 error "(7.2) $D_POS1 is not larger than $D_POS0"
545 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
546 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
547 mdd.${MDT_DEV}.lfsck_namespace |
548 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
550 error "(8) unexpected status"
553 run_test 6b "LFSCK resumes from last checkpoint (2)"
560 #define OBD_FAIL_LFSCK_DELAY2 0x1601
561 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
562 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
564 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
565 [ "$STATUS" == "scanning-phase1" ] ||
566 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
568 # Sleep 3 sec to guarantee at least one object processed by LFSCK
570 echo "stop $SINGLEMDS"
571 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
573 echo "start $SINGLEMDS"
574 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
575 error "(5) Fail to start MDS!"
577 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
578 [ "$STATUS" == "scanning-phase1" ] ||
579 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
581 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
582 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
583 mdd.${MDT_DEV}.lfsck_namespace |
584 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
586 error "(7) unexpected status"
589 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
595 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
596 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
597 for ((i = 0; i < 20; i++)); do
598 touch $DIR/$tdir/dummy${i}
601 #define OBD_FAIL_LFSCK_DELAY3 0x1602
602 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
603 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
604 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
605 mdd.${MDT_DEV}.lfsck_namespace |
606 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
608 error "(4) unexpected status"
611 echo "stop $SINGLEMDS"
612 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
614 echo "start $SINGLEMDS"
615 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
616 error "(6) Fail to start MDS!"
618 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
619 [ "$STATUS" == "scanning-phase2" ] ||
620 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
622 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
623 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
624 mdd.${MDT_DEV}.lfsck_namespace |
625 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
627 error "(8) unexpected status"
630 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
635 formatall > /dev/null
641 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
642 [ "$STATUS" == "init" ] ||
643 error "(2) Expect 'init', but got '$STATUS'"
645 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
646 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
647 mkdir $DIR/$tdir/crashed
649 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
650 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
651 for ((i = 0; i < 5; i++)); do
652 touch $DIR/$tdir/dummy${i}
655 umount_client $MOUNT || error "(3) Fail to stop client!"
657 #define OBD_FAIL_LFSCK_DELAY2 0x1601
658 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
659 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
661 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
662 [ "$STATUS" == "scanning-phase1" ] ||
663 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
665 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
667 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
668 [ "$STATUS" == "stopped" ] ||
669 error "(7) Expect 'stopped', but got '$STATUS'"
671 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
673 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
674 [ "$STATUS" == "scanning-phase1" ] ||
675 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
677 #define OBD_FAIL_LFSCK_FATAL2 0x1609
678 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
679 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
680 mdd.${MDT_DEV}.lfsck_namespace |
681 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
683 error "(10) unexpected status"
686 #define OBD_FAIL_LFSCK_DELAY1 0x1600
687 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
688 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
690 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
691 [ "$STATUS" == "scanning-phase1" ] ||
692 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
694 #define OBD_FAIL_LFSCK_CRASH 0x160a
695 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
698 echo "stop $SINGLEMDS"
699 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
701 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
702 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
704 echo "start $SINGLEMDS"
705 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
706 error "(14) Fail to start MDS!"
708 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
709 [ "$STATUS" == "crashed" ] ||
710 error "(15) Expect 'crashed', but got '$STATUS'"
712 #define OBD_FAIL_LFSCK_DELAY2 0x1601
713 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
714 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
716 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
717 [ "$STATUS" == "scanning-phase1" ] ||
718 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
720 echo "stop $SINGLEMDS"
721 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
723 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
724 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
726 echo "start $SINGLEMDS"
727 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
728 error "(19) Fail to start MDS!"
730 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
731 [ "$STATUS" == "paused" ] ||
732 error "(20) Expect 'paused', but got '$STATUS'"
734 #define OBD_FAIL_LFSCK_DELAY3 0x1602
735 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
737 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
738 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
739 mdd.${MDT_DEV}.lfsck_namespace |
740 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
742 error "(22) unexpected status"
745 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
746 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
747 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
749 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
750 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
751 mdd.${MDT_DEV}.lfsck_namespace |
752 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
754 error "(24) unexpected status"
757 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
758 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
760 run_test 8 "LFSCK state machine"
763 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
764 skip "Testing on UP system, the speed may be inaccurate."
770 local BASE_SPEED1=100
772 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
775 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
776 [ "$STATUS" == "scanning-phase1" ] ||
777 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
779 local SPEED=$($SHOW_NAMESPACE |
780 awk '/^average_speed_phase1/ { print $2 }')
782 # There may be time error, normally it should be less than 2 seconds.
783 # We allow another 20% schedule error.
785 # MAX_MARGIN = 1.2 = 12 / 10
786 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
787 RUN_TIME1 * 12 / 10))
788 [ $SPEED -lt $MAX_SPEED ] ||
789 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
792 local BASE_SPEED2=300
794 do_facet $SINGLEMDS \
795 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
798 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
799 # MIN_MARGIN = 0.8 = 8 / 10
800 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
801 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
802 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
803 [ $SPEED -gt $MIN_SPEED ] ||
804 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
806 # MAX_MARGIN = 1.2 = 12 / 10
807 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
808 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
809 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
810 [ $SPEED -lt $MAX_SPEED ] ||
811 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
813 do_facet $SINGLEMDS \
814 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
816 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
817 [ "$STATUS" == "completed" ] ||
818 error "(7) Expect 'completed', but got '$STATUS'"
820 run_test 9a "LFSCK speed control (1)"
823 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
824 skip "Testing on UP system, the speed may be inaccurate."
830 echo "Preparing another 50 * 50 files (with error) at $(date)."
831 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
832 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
833 createmany -d $DIR/$tdir/d 50
834 createmany -m $DIR/$tdir/f 50
835 for ((i = 0; i < 50; i++)); do
836 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
839 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
840 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
841 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
842 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
843 mdd.${MDT_DEV}.lfsck_namespace |
844 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
846 error "(5) unexpected status"
849 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
850 echo "Prepared at $(date)."
854 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
857 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
858 [ "$STATUS" == "scanning-phase2" ] ||
859 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
861 local SPEED=$($SHOW_NAMESPACE |
862 awk '/^average_speed_phase2/ { print $2 }')
863 # There may be time error, normally it should be less than 2 seconds.
864 # We allow another 20% schedule error.
866 # MAX_MARGIN = 1.2 = 12 / 10
867 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
868 RUN_TIME1 * 12 / 10))
869 [ $SPEED -lt $MAX_SPEED ] ||
870 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
873 local BASE_SPEED2=150
875 do_facet $SINGLEMDS \
876 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
879 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
880 # MIN_MARGIN = 0.8 = 8 / 10
881 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
882 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
883 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
884 [ $SPEED -gt $MIN_SPEED ] ||
885 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
887 # MAX_MARGIN = 1.2 = 12 / 10
888 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
889 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
890 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
891 [ $SPEED -lt $MAX_SPEED ] ||
892 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
894 do_facet $SINGLEMDS \
895 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
896 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
897 mdd.${MDT_DEV}.lfsck_namespace |
898 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
900 error "(11) unexpected status"
903 run_test 9b "LFSCK speed control (2)"
909 echo "Preparing more files with error at $(date)."
910 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
911 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
913 for ((i = 0; i < 1000; i = $((i+2)))); do
914 mkdir -p $DIR/$tdir/d${i}
915 touch $DIR/$tdir/f${i}
916 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
919 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
920 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
922 for ((i = 1; i < 1000; i = $((i+2)))); do
923 mkdir -p $DIR/$tdir/d${i}
924 touch $DIR/$tdir/f${i}
925 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
928 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
929 echo "Prepared at $(date)."
931 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
934 mount_client $MOUNT || error "(3) Fail to start client!"
936 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
939 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
940 [ "$STATUS" == "scanning-phase1" ] ||
941 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
943 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
945 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
947 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
949 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
951 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
953 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
955 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
957 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
958 error "(14) Fail to softlink!"
960 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
961 [ "$STATUS" == "scanning-phase1" ] ||
962 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
964 do_facet $SINGLEMDS \
965 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
966 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
967 mdd.${MDT_DEV}.lfsck_namespace |
968 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
970 error "(16) unexpected status"
973 run_test 10 "System is available during LFSCK scanning"
976 local server_version=$(lustre_version_code $SINGLEMDS)
978 [[ $server_version -ge $(version_code 2.5.60) ]] ||
979 [[ $server_version -ge $(version_code 2.5.3) &&
980 $server_version -lt $(version_code 2.5.11) ]] ||
981 { skip "Need MDS version 2.5.4+ or 2.5.60+"; return; }
983 [ $OSTCOUNT -lt 2 ] &&
984 skip "The test needs at least 2 OSTs" && return
987 echo "For old client, even though it cannot access the file with"
988 echo "LOV EA hole, it should not cause the system crash."
993 $LFS mkdir -i 0 $DIR/$tdir/a1
994 if [ $OSTCOUNT -gt 2 ]; then
995 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
998 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
1002 # 256 blocks on the stripe0.
1003 # 1 block on the stripe1 for 2 OSTs case.
1004 # 256 blocks on the stripe1 for other cases.
1005 # 1 block on the stripe2 if OSTs > 2
1006 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
1008 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
1011 $LFS getstripe $DIR/$tdir/a1/f0
1013 cancel_lru_locks osc
1015 echo "Inject failure..."
1016 echo "To make a LOV EA hole..."
1017 #define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406
1018 do_facet mds1 $LCTL set_param fail_loc=0x1406
1019 chown 1.1 $DIR/$tdir/a1/f0
1021 umount_client $MOUNT
1024 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
1026 mount_client $MOUNT || error "Fail to start client!"
1028 $LFS getstripe $DIR/$tdir/a1/f0
1029 dd if=$DIR/$tdir/a1/f0 of=/dev/null
1030 return 0 # not crash
1032 run_test 20a "Don't crash client while access with LOV EA hole"
1034 $LCTL set_param debug=-lfsck > /dev/null || true
1036 # restore MDS/OST size
1037 MDSSIZE=${SAVED_MDSSIZE}
1038 OSTSIZE=${SAVED_OSTSIZE}
1039 OSTCOUNT=${SAVED_OSTCOUNT}
1041 # cleanup the system at last