3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 SAVED_MDSSIZE=${MDSSIZE}
23 SAVED_OSTSIZE=${OSTSIZE}
24 SAVED_OSTCOUNT=${OSTCOUNT}
25 # use small MDS + OST size to speed formatting time
26 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
29 # no need too much OSTs, to reduce the format/start/stop overhead
30 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
32 # build up a clean test environment.
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
37 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
40 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
41 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
43 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
44 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
48 $LCTL set_param debug=+lfsck > /dev/null || true
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
52 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
53 START_NAMESPACE="do_facet $SINGLEMDS \
54 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
55 START_LAYOUT="do_facet $SINGLEMDS \
56 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
57 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
58 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
59 SHOW_NAMESPACE="do_facet $SINGLEMDS \
60 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
61 SHOW_LAYOUT="do_facet $SINGLEMDS \
62 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
63 SHOW_LAYOUT_ON_OST="do_facet ost1 \
64 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
65 MOUNT_OPTS_SCRUB="-o user_xattr"
66 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
75 echo "preparing... $nfiles * $ndirs files will be created $(date)."
76 if [ ! -z $igif ]; then
77 #define OBD_FAIL_FID_IGIF 0x1504
78 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
81 cp $LUSTRE/tests/*.sh $DIR/$tdir/
82 if [ $ndirs -gt 0 ]; then
83 createmany -d $DIR/$tdir/d $ndirs
84 createmany -m $DIR/$tdir/f $ndirs
85 if [ $nfiles -gt 0 ]; then
86 for ((i = 0; i < $ndirs; i++)); do
87 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
88 /dev/null || error "createmany $nfiles"
91 createmany -d $DIR/$tdir/e $ndirs
94 if [ ! -z $igif ]; then
95 touch $DIR/$tdir/dummy
96 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
99 echo "prepared $(date)."
105 #define OBD_FAIL_LFSCK_DELAY1 0x1600
106 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
107 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
109 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
111 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
112 [ "$STATUS" == "scanning-phase1" ] ||
113 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
115 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
117 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
118 [ "$STATUS" == "stopped" ] ||
119 error "(6) Expect 'stopped', but got '$STATUS'"
121 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
123 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
124 [ "$STATUS" == "scanning-phase1" ] ||
125 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
127 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
128 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
129 mdd.${MDT_DEV}.lfsck_namespace |
130 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
132 error "(9) unexpected status"
135 local repaired=$($SHOW_NAMESPACE |
136 awk '/^updated_phase1/ { print $2 }')
137 [ $repaired -eq 0 ] ||
138 error "(10) Expect nothing to be repaired, but got: $repaired"
140 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
141 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
142 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
143 mdd.${MDT_DEV}.lfsck_namespace |
144 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
146 error "(12) unexpected status"
149 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
150 [ $((scanned1 + 1)) -eq $scanned2 ] ||
151 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
153 echo "stopall, should NOT crash LU-3649"
154 stopall || error "(14) Fail to stopall"
156 run_test 0 "Control LFSCK manually"
159 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
160 skip "OI Scrub not implemented for ZFS" && return
164 #define OBD_FAIL_FID_INDIR 0x1501
165 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
166 touch $DIR/$tdir/dummy
168 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
170 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
171 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
172 mdd.${MDT_DEV}.lfsck_namespace |
173 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
175 error "(4) unexpected status"
178 local repaired=$($SHOW_NAMESPACE |
179 awk '/^dirent_repaired/ { print $2 }')
180 # for interop with old server
181 [ -z "$repaired" ] &&
182 repaired=$($SHOW_NAMESPACE |
183 awk '/^updated_phase1/ { print $2 }')
185 [ $repaired -eq 1 ] ||
186 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
188 mount_client $MOUNT || error "(6) Fail to start client!"
190 #define OBD_FAIL_FID_LOOKUP 0x1505
191 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
192 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
194 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
196 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
200 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
201 skip "OI Scrub not implemented for ZFS" && return
205 #define OBD_FAIL_FID_INLMA 0x1502
206 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
207 touch $DIR/$tdir/dummy
209 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
211 #define OBD_FAIL_FID_NOLMA 0x1506
212 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
213 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
214 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
215 mdd.${MDT_DEV}.lfsck_namespace |
216 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
218 error "(4) unexpected status"
221 local repaired=$($SHOW_NAMESPACE |
222 awk '/^dirent_repaired/ { print $2 }')
223 # for interop with old server
224 [ -z "$repaired" ] &&
225 repaired=$($SHOW_NAMESPACE |
226 awk '/^updated_phase1/ { print $2 }')
228 [ $repaired -eq 1 ] ||
229 error "(5) Fail to repair missed FID-in-LMA: $repaired"
231 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
232 mount_client $MOUNT || error "(6) Fail to start client!"
234 #define OBD_FAIL_FID_LOOKUP 0x1505
235 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
236 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
240 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
245 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
246 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
247 touch $DIR/$tdir/dummy
249 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
251 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
252 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
253 mdd.${MDT_DEV}.lfsck_namespace |
254 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
256 error "(4) unexpected status"
259 local repaired=$($SHOW_NAMESPACE |
260 awk '/^linkea_repaired/ { print $2 }')
261 # for interop with old server
262 [ -z "$repaired" ] &&
263 repaired=$($SHOW_NAMESPACE |
264 awk '/^updated_phase1/ { print $2 }')
266 [ $repaired -eq 1 ] ||
267 error "(5) Fail to repair crashed linkEA: $repaired"
269 mount_client $MOUNT || error "(6) Fail to start client!"
271 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
272 error "(7) Fail to stat $DIR/$tdir/dummy"
274 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
275 local dummyname=$($LFS fid2path $DIR $dummyfid)
276 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
277 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
279 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
285 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
286 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
287 touch $DIR/$tdir/dummy
289 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
291 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
292 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
293 mdd.${MDT_DEV}.lfsck_namespace |
294 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
296 error "(4) unexpected status"
299 local repaired=$($SHOW_NAMESPACE |
300 awk '/^updated_phase2/ { print $2 }')
301 [ $repaired -eq 1 ] ||
302 error "(5) Fail to repair crashed linkEA: $repaired"
304 mount_client $MOUNT || error "(6) Fail to start client!"
306 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
307 error "(7) Fail to stat $DIR/$tdir/dummy"
309 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
310 local dummyname=$($LFS fid2path $DIR $dummyfid)
311 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
312 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
314 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
320 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
321 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
322 touch $DIR/$tdir/dummy
324 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
326 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
327 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
328 mdd.${MDT_DEV}.lfsck_namespace |
329 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
331 error "(4) unexpected status"
334 local repaired=$($SHOW_NAMESPACE |
335 awk '/^updated_phase2/ { print $2 }')
336 [ $repaired -eq 1 ] ||
337 error "(5) Fail to repair crashed linkEA: $repaired"
339 mount_client $MOUNT || error "(6) Fail to start client!"
341 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
342 error "(7) Fail to stat $DIR/$tdir/dummy"
344 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
345 local dummyname=$($LFS fid2path $DIR $dummyfid)
346 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
347 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
349 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
353 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
354 skip "OI Scrub not implemented for ZFS" && return
357 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
358 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
360 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
361 echo "start $SINGLEMDS with disabling OI scrub"
362 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
363 error "(2) Fail to start MDS!"
365 #define OBD_FAIL_LFSCK_DELAY2 0x1601
366 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
367 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
368 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
369 mdd.${MDT_DEV}.lfsck_namespace |
370 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
372 error "(5) unexpected status"
375 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
376 [ "$STATUS" == "scanning-phase1" ] ||
377 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
379 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
380 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
381 mdd.${MDT_DEV}.lfsck_namespace |
382 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
384 error "(7) unexpected status"
387 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
388 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
390 local repaired=$($SHOW_NAMESPACE |
391 awk '/^dirent_repaired/ { print $2 }')
392 # for interop with old server
393 [ -z "$repaired" ] &&
394 repaired=$($SHOW_NAMESPACE |
395 awk '/^updated_phase1/ { print $2 }')
397 [ $repaired -ge 9 ] ||
398 error "(9) Fail to re-generate FID-in-dirent: $repaired"
400 mount_client $MOUNT || error "(10) Fail to start client!"
402 #define OBD_FAIL_FID_LOOKUP 0x1505
403 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
404 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
405 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
407 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
411 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
412 skip "OI Scrub not implemented for ZFS" && return
415 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
416 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
418 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
419 echo "start $SINGLEMDS with disabling OI scrub"
420 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
421 error "(2) Fail to start MDS!"
423 #define OBD_FAIL_LFSCK_DELAY2 0x1601
424 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
425 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
426 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
427 mdd.${MDT_DEV}.lfsck_namespace |
428 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
430 error "(5) unexpected status"
433 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
434 [ "$STATUS" == "scanning-phase1" ] ||
435 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
437 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
438 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
439 mdd.${MDT_DEV}.lfsck_namespace |
440 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
442 error "(7) unexpected status"
445 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
446 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
448 local repaired=$($SHOW_NAMESPACE |
449 awk '/^dirent_repaired/ { print $2 }')
450 # for interop with old server
451 [ -z "$repaired" ] &&
452 repaired=$($SHOW_NAMESPACE |
453 awk '/^updated_phase1/ { print $2 }')
455 [ $repaired -ge 2 ] ||
456 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
458 mount_client $MOUNT || error "(10) Fail to start client!"
460 #define OBD_FAIL_FID_LOOKUP 0x1505
461 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
462 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
464 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
466 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
467 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
468 local dummyname=$($LFS fid2path $DIR $dummyfid)
469 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
470 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
472 run_test 5 "LFSCK can handle IGIF object upgrading"
477 #define OBD_FAIL_LFSCK_DELAY1 0x1600
478 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
479 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
481 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
482 [ "$STATUS" == "scanning-phase1" ] ||
483 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
485 # Sleep 3 sec to guarantee at least one object processed by LFSCK
487 # Fail the LFSCK to guarantee there is at least one checkpoint
488 #define OBD_FAIL_LFSCK_FATAL1 0x1608
489 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
490 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
491 mdd.${MDT_DEV}.lfsck_namespace |
492 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
494 error "(4) unexpected status"
497 local POS0=$($SHOW_NAMESPACE |
498 awk '/^last_checkpoint_position/ { print $2 }' |
501 #define OBD_FAIL_LFSCK_DELAY1 0x1600
502 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
503 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
505 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
506 [ "$STATUS" == "scanning-phase1" ] ||
507 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
509 local POS1=$($SHOW_NAMESPACE |
510 awk '/^latest_start_position/ { print $2 }' |
512 [[ $POS0 -lt $POS1 ]] ||
513 error "(7) Expect larger than: $POS0, but got $POS1"
515 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
516 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
517 mdd.${MDT_DEV}.lfsck_namespace |
518 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
520 error "(8) unexpected status"
523 run_test 6a "LFSCK resumes from last checkpoint (1)"
528 #define OBD_FAIL_LFSCK_DELAY2 0x1601
529 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
530 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
532 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
533 [ "$STATUS" == "scanning-phase1" ] ||
534 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
536 # Sleep 5 sec to guarantee that we are in the directory scanning
538 # Fail the LFSCK to guarantee there is at least one checkpoint
539 #define OBD_FAIL_LFSCK_FATAL2 0x1609
540 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
541 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
542 mdd.${MDT_DEV}.lfsck_namespace |
543 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
545 error "(4) unexpected status"
548 local O_POS0=$($SHOW_NAMESPACE |
549 awk '/^last_checkpoint_position/ { print $2 }' |
552 local D_POS0=$($SHOW_NAMESPACE |
553 awk '/^last_checkpoint_position/ { print $4 }')
555 #define OBD_FAIL_LFSCK_DELAY2 0x1601
556 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
557 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
559 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
560 [ "$STATUS" == "scanning-phase1" ] ||
561 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
563 local O_POS1=$($SHOW_NAMESPACE |
564 awk '/^latest_start_position/ { print $2 }' |
566 local D_POS1=$($SHOW_NAMESPACE |
567 awk '/^latest_start_position/ { print $4 }')
569 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
570 [[ $O_POS0 -lt $O_POS1 ]] ||
571 error "(7.1) $O_POS1 is not larger than $O_POS0"
573 [[ $D_POS0 -lt $D_POS1 ]] ||
574 error "(7.2) $D_POS1 is not larger than $D_POS0"
577 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
578 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
579 mdd.${MDT_DEV}.lfsck_namespace |
580 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
582 error "(8) unexpected status"
585 run_test 6b "LFSCK resumes from last checkpoint (2)"
592 #define OBD_FAIL_LFSCK_DELAY2 0x1601
593 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
594 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
596 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
597 [ "$STATUS" == "scanning-phase1" ] ||
598 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
600 # Sleep 3 sec to guarantee at least one object processed by LFSCK
602 echo "stop $SINGLEMDS"
603 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
605 echo "start $SINGLEMDS"
606 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
607 error "(5) Fail to start MDS!"
609 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
610 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
611 mdd.${MDT_DEV}.lfsck_namespace |
612 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
614 error "(6) unexpected status"
617 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
623 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
624 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
625 for ((i = 0; i < 20; i++)); do
626 touch $DIR/$tdir/dummy${i}
629 #define OBD_FAIL_LFSCK_DELAY3 0x1602
630 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
631 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
632 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
633 mdd.${MDT_DEV}.lfsck_namespace |
634 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
636 error "(4) unexpected status"
639 echo "stop $SINGLEMDS"
640 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
642 echo "start $SINGLEMDS"
643 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
644 error "(6) Fail to start MDS!"
646 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
647 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
648 mdd.${MDT_DEV}.lfsck_namespace |
649 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
651 error "(7) unexpected status"
654 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
659 formatall > /dev/null
665 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
666 [ "$STATUS" == "init" ] ||
667 error "(2) Expect 'init', but got '$STATUS'"
669 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
670 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
671 mkdir $DIR/$tdir/crashed
673 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
674 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
675 for ((i = 0; i < 5; i++)); do
676 touch $DIR/$tdir/dummy${i}
679 umount_client $MOUNT || error "(3) Fail to stop client!"
681 #define OBD_FAIL_LFSCK_DELAY2 0x1601
682 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
683 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
685 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
686 [ "$STATUS" == "scanning-phase1" ] ||
687 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
689 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
691 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
692 [ "$STATUS" == "stopped" ] ||
693 error "(7) Expect 'stopped', but got '$STATUS'"
695 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
697 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
698 [ "$STATUS" == "scanning-phase1" ] ||
699 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
701 #define OBD_FAIL_LFSCK_FATAL2 0x1609
702 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
703 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
704 mdd.${MDT_DEV}.lfsck_namespace |
705 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
707 error "(10) unexpected status"
710 #define OBD_FAIL_LFSCK_DELAY1 0x1600
711 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
712 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
714 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
715 [ "$STATUS" == "scanning-phase1" ] ||
716 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
718 #define OBD_FAIL_LFSCK_CRASH 0x160a
719 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
722 echo "stop $SINGLEMDS"
723 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
725 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
726 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
728 echo "start $SINGLEMDS"
729 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
730 error "(14) Fail to start MDS!"
732 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
733 [ "$STATUS" == "crashed" ] ||
734 error "(15) Expect 'crashed', but got '$STATUS'"
736 #define OBD_FAIL_LFSCK_DELAY2 0x1601
737 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
738 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
740 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
741 [ "$STATUS" == "scanning-phase1" ] ||
742 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
744 echo "stop $SINGLEMDS"
745 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
747 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
748 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
750 echo "start $SINGLEMDS"
751 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
752 error "(19) Fail to start MDS!"
754 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
755 [ "$STATUS" == "paused" ] ||
756 error "(20) Expect 'paused', but got '$STATUS'"
758 #define OBD_FAIL_LFSCK_DELAY3 0x1602
759 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
761 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
762 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
763 mdd.${MDT_DEV}.lfsck_namespace |
764 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
766 error "(22) unexpected status"
769 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
770 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
771 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
773 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
774 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
775 mdd.${MDT_DEV}.lfsck_namespace |
776 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
778 error "(24) unexpected status"
781 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
782 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
784 run_test 8 "LFSCK state machine"
787 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
788 skip "Testing on UP system, the speed may be inaccurate."
794 local BASE_SPEED1=100
796 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
799 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
800 [ "$STATUS" == "scanning-phase1" ] ||
801 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
803 local SPEED=$($SHOW_NAMESPACE |
804 awk '/^average_speed_phase1/ { print $2 }')
806 # There may be time error, normally it should be less than 2 seconds.
807 # We allow another 20% schedule error.
809 # MAX_MARGIN = 1.2 = 12 / 10
810 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
811 RUN_TIME1 * 12 / 10))
812 [ $SPEED -lt $MAX_SPEED ] ||
813 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
816 local BASE_SPEED2=300
818 do_facet $SINGLEMDS \
819 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
822 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
823 # MIN_MARGIN = 0.8 = 8 / 10
824 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
825 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
826 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
827 # Account for slow ZFS performance - LU-4934
828 [ $SPEED -gt $MIN_SPEED ] || [ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
829 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
831 # MAX_MARGIN = 1.2 = 12 / 10
832 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
833 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
834 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
835 [ $SPEED -lt $MAX_SPEED ] ||
836 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
838 do_facet $SINGLEMDS \
839 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
841 wait_update_facet $SINGLEMDS \
842 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
843 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
844 error "(7) Failed to get expected 'completed'"
846 run_test 9a "LFSCK speed control (1)"
849 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
850 skip "Testing on UP system, the speed may be inaccurate."
856 echo "Preparing another 50 * 50 files (with error) at $(date)."
857 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
858 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
859 createmany -d $DIR/$tdir/d 50
860 createmany -m $DIR/$tdir/f 50
861 for ((i = 0; i < 50; i++)); do
862 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
865 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
866 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
867 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
868 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
869 mdd.${MDT_DEV}.lfsck_namespace |
870 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
872 error "(5) unexpected status"
875 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
876 echo "Prepared at $(date)."
880 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
883 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
884 [ "$STATUS" == "scanning-phase2" ] ||
885 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
887 local SPEED=$($SHOW_NAMESPACE |
888 awk '/^average_speed_phase2/ { print $2 }')
889 # There may be time error, normally it should be less than 2 seconds.
890 # We allow another 20% schedule error.
892 # MAX_MARGIN = 1.2 = 12 / 10
893 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
894 RUN_TIME1 * 12 / 10))
895 [ $SPEED -lt $MAX_SPEED ] ||
896 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
899 local BASE_SPEED2=150
901 do_facet $SINGLEMDS \
902 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
905 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
906 # MIN_MARGIN = 0.8 = 8 / 10
907 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
908 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
909 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
910 [ $SPEED -gt $MIN_SPEED ] ||[ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
911 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
913 # MAX_MARGIN = 1.2 = 12 / 10
914 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
915 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
916 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
917 [ $SPEED -lt $MAX_SPEED ] ||
918 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
920 do_facet $SINGLEMDS \
921 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
922 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
923 mdd.${MDT_DEV}.lfsck_namespace |
924 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
926 error "(11) unexpected status"
929 run_test 9b "LFSCK speed control (2)"
933 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
934 skip "lookup(..)/linkea on ZFS issue" && return
938 echo "Preparing more files with error at $(date)."
939 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
940 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
942 for ((i = 0; i < 1000; i = $((i+2)))); do
943 mkdir -p $DIR/$tdir/d${i}
944 touch $DIR/$tdir/f${i}
945 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
948 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
949 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
951 for ((i = 1; i < 1000; i = $((i+2)))); do
952 mkdir -p $DIR/$tdir/d${i}
953 touch $DIR/$tdir/f${i}
954 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
957 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
958 echo "Prepared at $(date)."
960 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
963 mount_client $MOUNT || error "(3) Fail to start client!"
965 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
968 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
969 [ "$STATUS" == "scanning-phase1" ] ||
970 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
972 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
974 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
976 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
978 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
980 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
982 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
984 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
986 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
987 error "(14) Fail to softlink!"
989 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
990 [ "$STATUS" == "scanning-phase1" ] ||
991 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
993 do_facet $SINGLEMDS \
994 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
995 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
996 mdd.${MDT_DEV}.lfsck_namespace |
997 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
999 error "(16) unexpected status"
1002 run_test 10 "System is available during LFSCK scanning"
1005 ost_remove_lastid() {
1008 local rcmd="do_facet ost${ost}"
1010 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1012 # step 1: local mount
1013 mount_fstype ost${ost} || return 1
1014 # step 2: remove the specified LAST_ID
1015 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1017 unmount_fstype ost${ost} || return 2
1021 check_mount_and_prep
1022 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1023 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1028 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1030 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1031 error "(2) Fail to start ost1"
1033 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1034 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1036 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1037 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1039 wait_update_facet ost1 "$LCTL get_param -n \
1040 obdfilter.${OST_DEV}.lfsck_layout |
1041 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1043 error "(5) unexpected status"
1046 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1048 wait_update_facet ost1 "$LCTL get_param -n \
1049 obdfilter.${OST_DEV}.lfsck_layout |
1050 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1052 error "(6) unexpected status"
1055 echo "the LAST_ID(s) should have been rebuilt"
1056 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1057 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1059 run_test 11a "LFSCK can rebuild lost last_id"
1062 check_mount_and_prep
1063 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1065 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1066 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1067 do_facet ost1 $LCTL set_param fail_loc=0x160d
1068 createmany -o $DIR/$tdir/f 64
1069 local lastid1=$(do_facet ost1 "lctl get_param -n \
1070 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1071 awk -F: '{ print $2 }')
1073 umount_client $MOUNT
1074 stop ost1 || error "(1) Fail to stop ost1"
1076 #define OBD_FAIL_OST_ENOSPC 0x215
1077 do_facet ost1 $LCTL set_param fail_loc=0x215
1079 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1080 error "(2) Fail to start ost1"
1082 for ((i = 0; i < 60; i++)); do
1083 lastid2=$(do_facet ost1 "lctl get_param -n \
1084 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1085 awk -F: '{ print $2 }')
1086 [ ! -z $lastid2 ] && break;
1090 echo "the on-disk LAST_ID should be smaller than the expected one"
1091 [ $lastid1 -gt $lastid2 ] ||
1092 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1094 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1095 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1097 wait_update_facet ost1 "$LCTL get_param -n \
1098 obdfilter.${OST_DEV}.lfsck_layout |
1099 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1101 error "(6) unexpected status"
1104 stop ost1 || error "(7) Fail to stop ost1"
1106 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1107 error "(8) Fail to start ost1"
1109 echo "the on-disk LAST_ID should have been rebuilt"
1110 wait_update_facet ost1 "$LCTL get_param -n \
1111 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1112 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1113 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1114 error "(9) expect lastid1 0x100000000:$lastid1"
1117 do_facet ost1 $LCTL set_param fail_loc=0
1118 stopall || error "(10) Fail to stopall"
1120 run_test 11b "LFSCK can rebuild crashed last_id"
1123 [ $MDSCOUNT -lt 2 ] &&
1124 skip "We need at least 2 MDSes for test_12" && exit 0
1126 check_mount_and_prep
1127 for k in $(seq $MDSCOUNT); do
1128 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1129 createmany -o $DIR/$tdir/${k}/f 100 ||
1130 error "(0) Fail to create 100 files."
1133 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1134 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1135 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1137 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1138 for k in $(seq $MDSCOUNT); do
1139 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1140 mdd.$(facet_svc mds${k}).lfsck_namespace |
1141 awk '/^status/ { print $2 }')
1142 [ "$STATUS" == "scanning-phase1" ] ||
1143 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1146 echo "Stop namespace LFSCK on all targets by single lctl command."
1147 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1148 error "(4) Fail to stop LFSCK on all devices!"
1150 echo "All the LFSCK targets should be in 'stopped' status."
1151 for k in $(seq $MDSCOUNT); do
1152 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1153 mdd.$(facet_svc mds${k}).lfsck_namespace |
1154 awk '/^status/ { print $2 }')
1155 [ "$STATUS" == "stopped" ] ||
1156 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1159 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1160 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1161 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1163 echo "All the LFSCK targets should be in 'completed' status."
1164 for k in $(seq $MDSCOUNT); do
1165 wait_update_facet mds${k} "$LCTL get_param -n \
1166 mdd.$(facet_svc mds${k}).lfsck_namespace |
1167 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1168 error "(7) MDS${k} is not the expected 'completed'"
1171 echo "Start layout LFSCK on all targets by single command (-s 1)."
1172 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1173 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1175 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1176 for k in $(seq $MDSCOUNT); do
1177 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1178 mdd.$(facet_svc mds${k}).lfsck_layout |
1179 awk '/^status/ { print $2 }')
1180 [ "$STATUS" == "scanning-phase1" ] ||
1181 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1184 echo "Stop layout LFSCK on all targets by single lctl command."
1185 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1186 error "(10) Fail to stop LFSCK on all devices!"
1188 echo "All the LFSCK targets should be in 'stopped' status."
1189 for k in $(seq $MDSCOUNT); do
1190 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1191 mdd.$(facet_svc mds${k}).lfsck_layout |
1192 awk '/^status/ { print $2 }')
1193 [ "$STATUS" == "stopped" ] ||
1194 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1197 for k in $(seq $OSTCOUNT); do
1198 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1199 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1200 awk '/^status/ { print $2 }')
1201 [ "$STATUS" == "stopped" ] ||
1202 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1205 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1206 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1207 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1209 echo "All the LFSCK targets should be in 'completed' status."
1210 for k in $(seq $MDSCOUNT); do
1211 # The LFSCK status query internal is 30 seconds. For the case
1212 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1213 # time to guarantee the status sync up.
1214 wait_update_facet mds${k} "$LCTL get_param -n \
1215 mdd.$(facet_svc mds${k}).lfsck_layout |
1216 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1217 error "(14) MDS${k} is not the expected 'completed'"
1220 run_test 12 "single command to trigger LFSCK on all devices"
1224 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1225 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1226 echo "MDT-object FID."
1229 check_mount_and_prep
1231 echo "Inject failure stub to simulate bad lmm_oi"
1232 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1233 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1234 createmany -o $DIR/$tdir/f 32
1235 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1237 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1238 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1240 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1241 mdd.${MDT_DEV}.lfsck_layout |
1242 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1244 error "(2) unexpected status"
1247 local repaired=$($SHOW_LAYOUT |
1248 awk '/^repaired_others/ { print $2 }')
1249 [ $repaired -eq 32 ] ||
1250 error "(3) Fail to repair crashed lmm_oi: $repaired"
1252 run_test 13 "LFSCK can repair crashed lmm_oi"
1256 echo "The OST-object referenced by the MDT-object should be there;"
1257 echo "otherwise, the LFSCK should re-create the missed OST-object."
1260 check_mount_and_prep
1261 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1263 local count=$(precreated_ost_obj_count 0 0)
1265 echo "Inject failure stub to simulate dangling referenced MDT-object"
1266 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1267 do_facet ost1 $LCTL set_param fail_loc=0x1610
1268 createmany -o $DIR/$tdir/f $((count + 32))
1269 do_facet ost1 $LCTL set_param fail_loc=0
1271 start_full_debug_logging
1273 # exhaust other pre-created dangling cases
1274 count=$(precreated_ost_obj_count 0 0)
1275 createmany -o $DIR/$tdir/a $count ||
1276 error "(0) Fail to create $count files."
1278 echo "'ls' should fail because of dangling referenced MDT-object"
1279 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1281 echo "Trigger layout LFSCK to find out dangling reference"
1282 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1284 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1285 mdd.${MDT_DEV}.lfsck_layout |
1286 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1288 error "(3) unexpected status"
1291 local repaired=$($SHOW_LAYOUT |
1292 awk '/^repaired_dangling/ { print $2 }')
1293 [ $repaired -ge 32 ] ||
1294 error "(4) Fail to repair dangling reference: $repaired"
1296 echo "'ls' should fail because it will not repair dangling by default"
1297 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
1299 echo "Trigger layout LFSCK to repair dangling reference"
1300 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1302 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1303 mdd.${MDT_DEV}.lfsck_layout |
1304 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1306 error "(7) unexpected status"
1309 repaired=$($SHOW_LAYOUT |
1310 awk '/^repaired_dangling/ { print $2 }')
1311 [ $repaired -ge 32 ] ||
1312 error "(8) Fail to repair dangling reference: $repaired"
1314 echo "'ls' should success after layout LFSCK repairing"
1315 ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success."
1316 stop_full_debug_logging
1318 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1322 echo "If the OST-object referenced by the MDT-object back points"
1323 echo "to some non-exist MDT-object, then the LFSCK should repair"
1324 echo "the OST-object to back point to the right MDT-object."
1327 check_mount_and_prep
1328 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1330 echo "Inject failure stub to make the OST-object to back point to"
1331 echo "non-exist MDT-object."
1332 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1334 do_facet ost1 $LCTL set_param fail_loc=0x1611
1335 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1336 cancel_lru_locks osc
1337 do_facet ost1 $LCTL set_param fail_loc=0
1339 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1340 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1342 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1343 mdd.${MDT_DEV}.lfsck_layout |
1344 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1346 error "(2) unexpected status"
1349 local repaired=$($SHOW_LAYOUT |
1350 awk '/^repaired_unmatched_pair/ { print $2 }')
1351 [ $repaired -eq 1 ] ||
1352 error "(3) Fail to repair unmatched pair: $repaired"
1354 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1358 echo "If the OST-object referenced by the MDT-object back points"
1359 echo "to other MDT-object that doesn't recognize the OST-object,"
1360 echo "then the LFSCK should repair it to back point to the right"
1361 echo "MDT-object (the first one)."
1364 check_mount_and_prep
1365 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1366 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1367 cancel_lru_locks osc
1369 echo "Inject failure stub to make the OST-object to back point to"
1370 echo "other MDT-object"
1372 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1373 do_facet ost1 $LCTL set_param fail_loc=0x1612
1374 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1375 cancel_lru_locks osc
1376 do_facet ost1 $LCTL set_param fail_loc=0
1378 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1379 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1381 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1382 mdd.${MDT_DEV}.lfsck_layout |
1383 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1385 error "(2) unexpected status"
1388 local repaired=$($SHOW_LAYOUT |
1389 awk '/^repaired_unmatched_pair/ { print $2 }')
1390 [ $repaired -eq 1 ] ||
1391 error "(3) Fail to repair unmatched pair: $repaired"
1393 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1397 echo "If the OST-object's owner information does not match the owner"
1398 echo "information stored in the MDT-object, then the LFSCK trust the"
1399 echo "MDT-object and update the OST-object's owner information."
1402 check_mount_and_prep
1403 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1404 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1405 cancel_lru_locks osc
1407 echo "Inject failure stub to skip OST-object owner changing"
1408 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1409 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1410 chown 1.1 $DIR/$tdir/f0
1411 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1413 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1416 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1418 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1419 mdd.${MDT_DEV}.lfsck_layout |
1420 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1422 error "(2) unexpected status"
1425 local repaired=$($SHOW_LAYOUT |
1426 awk '/^repaired_inconsistent_owner/ { print $2 }')
1427 [ $repaired -eq 1 ] ||
1428 error "(3) Fail to repair inconsistent owner: $repaired"
1430 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1434 echo "If more than one MDT-objects reference the same OST-object,"
1435 echo "and the OST-object only recognizes one MDT-object, then the"
1436 echo "LFSCK should create new OST-objects for such non-recognized"
1440 check_mount_and_prep
1441 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1443 echo "Inject failure stub to make two MDT-objects to refernce"
1444 echo "the OST-object"
1446 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1447 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1449 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1450 cancel_lru_locks osc
1452 createmany -o $DIR/$tdir/f 1
1454 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1456 cancel_lru_locks mdc
1457 cancel_lru_locks osc
1459 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1460 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1461 [ $size -eq 1048576 ] ||
1462 error "(1) f0 (wrong) size should be 1048576, but got $size"
1464 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1467 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1469 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1470 mdd.${MDT_DEV}.lfsck_layout |
1471 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1473 error "(3) unexpected status"
1476 local repaired=$($SHOW_LAYOUT |
1477 awk '/^repaired_multiple_referenced/ { print $2 }')
1478 [ $repaired -eq 1 ] ||
1479 error "(4) Fail to repair multiple references: $repaired"
1481 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1482 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1483 error "(5) Fail to write f0."
1484 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1485 [ $size -eq 1048576 ] ||
1486 error "(6) guard size should be 1048576, but got $size"
1488 run_test 17 "LFSCK can repair multiple references"
1492 echo "The target MDT-object is there, but related stripe information"
1493 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1494 echo "layout EA entries."
1497 check_mount_and_prep
1498 $LFS mkdir -i 0 $DIR/$tdir/a1
1499 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1500 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1502 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1504 $LFS path2fid $DIR/$tdir/a1/f1
1505 $LFS getstripe $DIR/$tdir/a1/f1
1507 if [ $MDSCOUNT -ge 2 ]; then
1508 $LFS mkdir -i 1 $DIR/$tdir/a2
1509 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1510 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1511 $LFS path2fid $DIR/$tdir/a2/f2
1512 $LFS getstripe $DIR/$tdir/a2/f2
1515 cancel_lru_locks osc
1517 echo "Inject failure, to make the MDT-object lost its layout EA"
1518 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1519 do_facet mds1 $LCTL set_param fail_loc=0x1615
1520 chown 1.1 $DIR/$tdir/a1/f1
1522 if [ $MDSCOUNT -ge 2 ]; then
1523 do_facet mds2 $LCTL set_param fail_loc=0x1615
1524 chown 1.1 $DIR/$tdir/a2/f2
1530 do_facet mds1 $LCTL set_param fail_loc=0
1531 if [ $MDSCOUNT -ge 2 ]; then
1532 do_facet mds2 $LCTL set_param fail_loc=0
1535 cancel_lru_locks mdc
1536 cancel_lru_locks osc
1538 echo "The file size should be incorrect since layout EA is lost"
1539 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1540 [ "$cur_size" != "$saved_size" ] ||
1541 error "(1) Expect incorrect file1 size"
1543 if [ $MDSCOUNT -ge 2 ]; then
1544 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1545 [ "$cur_size" != "$saved_size" ] ||
1546 error "(2) Expect incorrect file2 size"
1549 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1550 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1552 for k in $(seq $MDSCOUNT); do
1553 # The LFSCK status query internal is 30 seconds. For the case
1554 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1555 # time to guarantee the status sync up.
1556 wait_update_facet mds${k} "$LCTL get_param -n \
1557 mdd.$(facet_svc mds${k}).lfsck_layout |
1558 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1559 error "(4) MDS${k} is not the expected 'completed'"
1562 for k in $(seq $OSTCOUNT); do
1563 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1564 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1565 awk '/^status/ { print $2 }')
1566 [ "$cur_status" == "completed" ] ||
1567 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1570 local repaired=$(do_facet mds1 $LCTL get_param -n \
1571 mdd.$(facet_svc mds1).lfsck_layout |
1572 awk '/^repaired_orphan/ { print $2 }')
1573 [ $repaired -eq 1 ] ||
1574 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1576 if [ $MDSCOUNT -ge 2 ]; then
1577 repaired=$(do_facet mds2 $LCTL get_param -n \
1578 mdd.$(facet_svc mds2).lfsck_layout |
1579 awk '/^repaired_orphan/ { print $2 }')
1580 [ $repaired -eq 2 ] ||
1581 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1584 $LFS path2fid $DIR/$tdir/a1/f1
1585 $LFS getstripe $DIR/$tdir/a1/f1
1587 if [ $MDSCOUNT -ge 2 ]; then
1588 $LFS path2fid $DIR/$tdir/a2/f2
1589 $LFS getstripe $DIR/$tdir/a2/f2
1592 echo "The file size should be correct after layout LFSCK scanning"
1593 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1594 [ "$cur_size" == "$saved_size" ] ||
1595 error "(7) Expect file1 size $saved_size, but got $cur_size"
1597 if [ $MDSCOUNT -ge 2 ]; then
1598 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1599 [ "$cur_size" == "$saved_size" ] ||
1600 error "(8) Expect file2 size $saved_size, but got $cur_size"
1603 run_test 18a "Find out orphan OST-object and repair it (1)"
1607 echo "The target MDT-object is lost. The LFSCK should re-create the"
1608 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1609 echo "can move it back to normal namespace manually."
1612 check_mount_and_prep
1613 $LFS mkdir -i 0 $DIR/$tdir/a1
1614 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1615 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1616 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1617 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1619 $LFS getstripe $DIR/$tdir/a1/f1
1621 if [ $MDSCOUNT -ge 2 ]; then
1622 $LFS mkdir -i 1 $DIR/$tdir/a2
1623 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1624 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1625 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1627 $LFS getstripe $DIR/$tdir/a2/f2
1630 cancel_lru_locks osc
1632 echo "Inject failure, to simulate the case of missing the MDT-object"
1633 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1634 do_facet mds1 $LCTL set_param fail_loc=0x1616
1635 rm -f $DIR/$tdir/a1/f1
1637 if [ $MDSCOUNT -ge 2 ]; then
1638 do_facet mds2 $LCTL set_param fail_loc=0x1616
1639 rm -f $DIR/$tdir/a2/f2
1645 do_facet mds1 $LCTL set_param fail_loc=0
1646 if [ $MDSCOUNT -ge 2 ]; then
1647 do_facet mds2 $LCTL set_param fail_loc=0
1650 cancel_lru_locks mdc
1651 cancel_lru_locks osc
1653 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1654 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1656 for k in $(seq $MDSCOUNT); do
1657 # The LFSCK status query internal is 30 seconds. For the case
1658 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1659 # time to guarantee the status sync up.
1660 wait_update_facet mds${k} "$LCTL get_param -n \
1661 mdd.$(facet_svc mds${k}).lfsck_layout |
1662 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1663 error "(2) MDS${k} is not the expected 'completed'"
1666 for k in $(seq $OSTCOUNT); do
1667 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1668 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1669 awk '/^status/ { print $2 }')
1670 [ "$cur_status" == "completed" ] ||
1671 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1674 local repaired=$(do_facet mds1 $LCTL get_param -n \
1675 mdd.$(facet_svc mds1).lfsck_layout |
1676 awk '/^repaired_orphan/ { print $2 }')
1677 [ $repaired -eq 1 ] ||
1678 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1680 if [ $MDSCOUNT -ge 2 ]; then
1681 repaired=$(do_facet mds2 $LCTL get_param -n \
1682 mdd.$(facet_svc mds2).lfsck_layout |
1683 awk '/^repaired_orphan/ { print $2 }')
1684 [ $repaired -eq 2 ] ||
1685 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1688 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1689 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1690 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1692 if [ $MDSCOUNT -ge 2 ]; then
1693 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1694 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1697 $LFS path2fid $DIR/$tdir/a1/f1
1698 $LFS getstripe $DIR/$tdir/a1/f1
1700 if [ $MDSCOUNT -ge 2 ]; then
1701 $LFS path2fid $DIR/$tdir/a2/f2
1702 $LFS getstripe $DIR/$tdir/a2/f2
1705 echo "The file size should be correct after layout LFSCK scanning"
1706 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1707 [ "$cur_size" == "$saved_size" ] ||
1708 error "(7) Expect file1 size $saved_size, but got $cur_size"
1710 if [ $MDSCOUNT -ge 2 ]; then
1711 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1712 [ "$cur_size" == "$saved_size" ] ||
1713 error "(8) Expect file2 size $saved_size, but got $cur_size"
1716 run_test 18b "Find out orphan OST-object and repair it (2)"
1720 echo "The target MDT-object is lost, and the OST-object FID is missing."
1721 echo "The LFSCK should re-create the MDT-object with new FID under the "
1722 echo "directory .lustre/lost+found/MDTxxxx."
1725 check_mount_and_prep
1726 $LFS mkdir -i 0 $DIR/$tdir/a1
1727 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1729 echo "Inject failure, to simulate the case of missing parent FID"
1730 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1731 do_facet ost1 $LCTL set_param fail_loc=0x1617
1733 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1734 $LFS getstripe $DIR/$tdir/a1/f1
1736 if [ $MDSCOUNT -ge 2 ]; then
1737 $LFS mkdir -i 1 $DIR/$tdir/a2
1738 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a2
1739 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1740 $LFS getstripe $DIR/$tdir/a2/f2
1743 cancel_lru_locks osc
1745 echo "Inject failure, to simulate the case of missing the MDT-object"
1746 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1747 do_facet mds1 $LCTL set_param fail_loc=0x1616
1748 rm -f $DIR/$tdir/a1/f1
1750 if [ $MDSCOUNT -ge 2 ]; then
1751 do_facet mds2 $LCTL set_param fail_loc=0x1616
1752 rm -f $DIR/$tdir/a2/f2
1758 do_facet mds1 $LCTL set_param fail_loc=0
1759 if [ $MDSCOUNT -ge 2 ]; then
1760 do_facet mds2 $LCTL set_param fail_loc=0
1763 cancel_lru_locks mdc
1764 cancel_lru_locks osc
1766 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1767 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1769 for k in $(seq $MDSCOUNT); do
1770 # The LFSCK status query internal is 30 seconds. For the case
1771 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1772 # time to guarantee the status sync up.
1773 wait_update_facet mds${k} "$LCTL get_param -n \
1774 mdd.$(facet_svc mds${k}).lfsck_layout |
1775 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1776 error "(2) MDS${k} is not the expected 'completed'"
1779 for k in $(seq $OSTCOUNT); do
1780 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1781 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1782 awk '/^status/ { print $2 }')
1783 [ "$cur_status" == "completed" ] ||
1784 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1787 if [ $MDSCOUNT -ge 2 ]; then
1793 local repaired=$(do_facet mds1 $LCTL get_param -n \
1794 mdd.$(facet_svc mds1).lfsck_layout |
1795 awk '/^repaired_orphan/ { print $2 }')
1796 [ $repaired -eq $expected ] ||
1797 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1799 if [ $MDSCOUNT -ge 2 ]; then
1800 repaired=$(do_facet mds2 $LCTL get_param -n \
1801 mdd.$(facet_svc mds2).lfsck_layout |
1802 awk '/^repaired_orphan/ { print $2 }')
1803 [ $repaired -eq 0 ] ||
1804 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1807 echo "There should NOT be some stub under .lustre/lost+found/MDT0001/"
1808 ls -ail $MOUNT/.lustre/lost+found/MDT0001/*-N-0 &&
1809 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1811 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1812 ls -ail $MOUNT/.lustre/lost+found/MDT0000/*-N-0 ||
1813 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
1815 run_test 18c "Find out orphan OST-object and repair it (3)"
1819 echo "The target MDT-object layout EA slot is occpuied by some new"
1820 echo "created OST-object when repair dangling reference case. Such"
1821 echo "conflict OST-object has never been modified. Then when found"
1822 echo "the orphan OST-object, LFSCK will replace it with the orphan"
1826 check_mount_and_prep
1828 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1829 echo "guard" > $DIR/$tdir/a1/f1
1830 echo "foo" > $DIR/$tdir/a1/f2
1831 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1832 $LFS path2fid $DIR/$tdir/a1/f1
1833 $LFS getstripe $DIR/$tdir/a1/f1
1834 $LFS path2fid $DIR/$tdir/a1/f2
1835 $LFS getstripe $DIR/$tdir/a1/f2
1836 cancel_lru_locks osc
1838 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1839 echo "to reference the same OST-object (which is f1's OST-obejct)."
1840 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1841 echo "dangling reference case, but f2's old OST-object is there."
1844 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1845 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1846 chown 1.1 $DIR/$tdir/a1/f2
1847 rm -f $DIR/$tdir/a1/f1
1850 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1852 echo "stopall to cleanup object cache"
1855 setupall > /dev/null
1857 echo "The file size should be incorrect since dangling referenced"
1858 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1859 [ "$cur_size" != "$saved_size" ] ||
1860 error "(1) Expect incorrect file2 size"
1862 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1863 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
1865 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1866 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1868 wait_update_facet mds1 "$LCTL get_param -n \
1869 mdd.$(facet_svc mds1).lfsck_layout |
1870 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1871 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
1873 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1875 for k in $(seq $MDSCOUNT); do
1876 # The LFSCK status query internal is 30 seconds. For the case
1877 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1878 # time to guarantee the status sync up.
1879 wait_update_facet mds${k} "$LCTL get_param -n \
1880 mdd.$(facet_svc mds${k}).lfsck_layout |
1881 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1882 error "(3) MDS${k} is not the expected 'completed'"
1885 for k in $(seq $OSTCOUNT); do
1886 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1887 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1888 awk '/^status/ { print $2 }')
1889 [ "$cur_status" == "completed" ] ||
1890 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
1893 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1894 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1895 awk '/^repaired_orphan/ { print $2 }')
1896 [ $repaired -eq 1 ] ||
1897 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
1899 echo "The file size should be correct after layout LFSCK scanning"
1900 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1901 [ "$cur_size" == "$saved_size" ] ||
1902 error "(6) Expect file2 size $saved_size, but got $cur_size"
1904 echo "The LFSCK should find back the original data."
1905 cat $DIR/$tdir/a1/f2
1906 $LFS path2fid $DIR/$tdir/a1/f2
1907 $LFS getstripe $DIR/$tdir/a1/f2
1909 run_test 18d "Find out orphan OST-object and repair it (4)"
1913 echo "The target MDT-object layout EA slot is occpuied by some new"
1914 echo "created OST-object when repair dangling reference case. Such"
1915 echo "conflict OST-object has been modified by others. To keep the"
1916 echo "new data, the LFSCK will create a new file to refernece this"
1917 echo "old orphan OST-object."
1920 check_mount_and_prep
1922 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1923 echo "guard" > $DIR/$tdir/a1/f1
1924 echo "foo" > $DIR/$tdir/a1/f2
1925 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1926 $LFS path2fid $DIR/$tdir/a1/f1
1927 $LFS getstripe $DIR/$tdir/a1/f1
1928 $LFS path2fid $DIR/$tdir/a1/f2
1929 $LFS getstripe $DIR/$tdir/a1/f2
1930 cancel_lru_locks osc
1932 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1933 echo "to reference the same OST-object (which is f1's OST-obejct)."
1934 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1935 echo "dangling reference case, but f2's old OST-object is there."
1938 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1939 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1940 chown 1.1 $DIR/$tdir/a1/f2
1941 rm -f $DIR/$tdir/a1/f1
1944 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1946 echo "stopall to cleanup object cache"
1949 setupall > /dev/null
1951 echo "The file size should be incorrect since dangling referenced"
1952 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1953 [ "$cur_size" != "$saved_size" ] ||
1954 error "(1) Expect incorrect file2 size"
1956 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1957 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
1959 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1960 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1962 wait_update_facet mds1 "$LCTL get_param -n \
1963 mdd.$(facet_svc mds1).lfsck_layout |
1964 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1965 error "(3) MDS1 is not the expected 'scanning-phase2'"
1967 # to guarantee all updates are synced.
1971 echo "Write new data to f2 to modify the new created OST-object."
1972 echo "dummy" >> $DIR/$tdir/a1/f2
1974 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1976 for k in $(seq $MDSCOUNT); do
1977 # The LFSCK status query internal is 30 seconds. For the case
1978 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1979 # time to guarantee the status sync up.
1980 wait_update_facet mds${k} "$LCTL get_param -n \
1981 mdd.$(facet_svc mds${k}).lfsck_layout |
1982 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1983 error "(4) MDS${k} is not the expected 'completed'"
1986 for k in $(seq $OSTCOUNT); do
1987 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1988 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1989 awk '/^status/ { print $2 }')
1990 [ "$cur_status" == "completed" ] ||
1991 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1994 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1995 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1996 awk '/^repaired_orphan/ { print $2 }')
1997 [ $repaired -eq 1 ] ||
1998 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2000 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2001 local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/*-C-0)
2003 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
2005 echo "The stub file should keep the original f2 data"
2006 cur_size=$(ls -il $cname | awk '{ print $6 }')
2007 [ "$cur_size" == "$saved_size" ] ||
2008 error "(8) Expect file2 size $saved_size, but got $cur_size"
2011 $LFS path2fid $cname
2012 $LFS getstripe $cname
2014 echo "The f2 should contains new data."
2015 cat $DIR/$tdir/a1/f2
2016 $LFS path2fid $DIR/$tdir/a1/f2
2017 $LFS getstripe $DIR/$tdir/a1/f2
2019 run_test 18e "Find out orphan OST-object and repair it (5)"
2022 check_mount_and_prep
2023 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2025 echo "foo" > $DIR/$tdir/a0
2026 echo "guard" > $DIR/$tdir/a1
2027 cancel_lru_locks osc
2029 echo "Inject failure, then client will offer wrong parent FID when read"
2030 do_facet ost1 $LCTL set_param -n \
2031 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2032 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2033 $LCTL set_param fail_loc=0x1619
2035 echo "Read RPC with wrong parent FID should be denied"
2036 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2037 $LCTL set_param fail_loc=0
2039 run_test 19a "OST-object inconsistency self detect"
2042 check_mount_and_prep
2043 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2045 echo "Inject failure stub to make the OST-object to back point to"
2046 echo "non-exist MDT-object"
2048 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2049 do_facet ost1 $LCTL set_param fail_loc=0x1611
2050 echo "foo" > $DIR/$tdir/f0
2051 cancel_lru_locks osc
2052 do_facet ost1 $LCTL set_param fail_loc=0
2054 echo "Nothing should be fixed since self detect and repair is disabled"
2055 local repaired=$(do_facet ost1 $LCTL get_param -n \
2056 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2057 awk '/^repaired/ { print $2 }')
2058 [ $repaired -eq 0 ] ||
2059 error "(1) Expected 0 repaired, but got $repaired"
2061 echo "Read RPC with right parent FID should be accepted,"
2062 echo "and cause parent FID on OST to be fixed"
2064 do_facet ost1 $LCTL set_param -n \
2065 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2066 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2068 repaired=$(do_facet ost1 $LCTL get_param -n \
2069 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2070 awk '/^repaired/ { print $2 }')
2071 [ $repaired -eq 1 ] ||
2072 error "(3) Expected 1 repaired, but got $repaired"
2074 run_test 19b "OST-object inconsistency self repair"
2077 [ $OSTCOUNT -lt 2 ] &&
2078 skip "The test needs at least 2 OSTs" && return
2081 echo "The target MDT-object and some of its OST-object are lost."
2082 echo "The LFSCK should find out the left OST-objects and re-create"
2083 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2084 echo "with the partial OST-objects (LOV EA hole)."
2086 echo "New client can access the file with LOV EA hole via normal"
2087 echo "system tools or commands without crash the system."
2089 echo "For old client, even though it cannot access the file with"
2090 echo "LOV EA hole, it should not cause the system crash."
2093 check_mount_and_prep
2094 $LFS mkdir -i 0 $DIR/$tdir/a1
2095 if [ $OSTCOUNT -gt 2 ]; then
2096 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
2099 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
2103 # 256 blocks on the stripe0.
2104 # 1 block on the stripe1 for 2 OSTs case.
2105 # 256 blocks on the stripe1 for other cases.
2106 # 1 block on the stripe2 if OSTs > 2
2107 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2108 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2109 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2111 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2112 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2113 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2116 $LFS getstripe $DIR/$tdir/a1/f0
2118 $LFS getstripe $DIR/$tdir/a1/f1
2120 $LFS getstripe $DIR/$tdir/a1/f2
2122 if [ $OSTCOUNT -gt 2 ]; then
2123 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2124 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2126 $LFS getstripe $DIR/$tdir/a1/f3
2129 cancel_lru_locks osc
2131 echo "Inject failure..."
2132 echo "To simulate f0 lost MDT-object"
2133 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2134 do_facet mds1 $LCTL set_param fail_loc=0x1616
2135 rm -f $DIR/$tdir/a1/f0
2137 echo "To simulate f1 lost MDT-object and OST-object0"
2138 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2139 do_facet mds1 $LCTL set_param fail_loc=0x161a
2140 rm -f $DIR/$tdir/a1/f1
2142 echo "To simulate f2 lost MDT-object and OST-object1"
2143 do_facet mds1 $LCTL set_param fail_val=1
2144 rm -f $DIR/$tdir/a1/f2
2146 if [ $OSTCOUNT -gt 2 ]; then
2147 echo "To simulate f3 lost MDT-object and OST-object2"
2148 do_facet mds1 $LCTL set_param fail_val=2
2149 rm -f $DIR/$tdir/a1/f3
2152 umount_client $MOUNT
2155 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2157 echo "Inject failure to slow down the LFSCK on OST0"
2158 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2159 do_facet ost1 $LCTL set_param fail_loc=0x161b
2161 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2162 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2165 do_facet ost1 $LCTL set_param fail_loc=0
2167 for k in $(seq $MDSCOUNT); do
2168 # The LFSCK status query internal is 30 seconds. For the case
2169 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2170 # time to guarantee the status sync up.
2171 wait_update_facet mds${k} "$LCTL get_param -n \
2172 mdd.$(facet_svc mds${k}).lfsck_layout |
2173 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2174 error "(2) MDS${k} is not the expected 'completed'"
2177 for k in $(seq $OSTCOUNT); do
2178 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2179 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2180 awk '/^status/ { print $2 }')
2181 [ "$cur_status" == "completed" ] ||
2182 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2185 local repaired=$(do_facet mds1 $LCTL get_param -n \
2186 mdd.$(facet_svc mds1).lfsck_layout |
2187 awk '/^repaired_orphan/ { print $2 }')
2188 if [ $OSTCOUNT -gt 2 ]; then
2189 [ $repaired -eq 9 ] ||
2190 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2192 [ $repaired -eq 4 ] ||
2193 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2196 mount_client $MOUNT || error "(5.0) Fail to start client!"
2198 LOV_PATTERN_F_HOLE=0x40000000
2201 # ${fid0}-R-0 is the old f0
2203 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2204 echo "Check $name, which is the old f0"
2206 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2208 local pattern=0x$($LFS getstripe -L $name)
2209 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2210 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2212 local stripes=$($LFS getstripe -c $name)
2213 if [ $OSTCOUNT -gt 2 ]; then
2214 [ $stripes -eq 3 ] ||
2215 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2217 [ $stripes -eq 2 ] ||
2218 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2221 local size=$(stat $name | awk '/Size:/ { print $2 }')
2222 [ $size -eq $((4096 * $bcount)) ] ||
2223 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2225 cat $name > /dev/null || error "(5.5) cannot read $name"
2227 echo "dummy" >> $name || error "(5.6) cannot write $name"
2229 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2231 touch $name || error "(5.8) cannot touch $name"
2233 rm -f $name || error "(5.9) cannot unlink $name"
2236 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2238 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2239 if [ $OSTCOUNT -gt 2 ]; then
2240 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2242 echo "Check $name, it contains the old f1's stripe1"
2245 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2247 pattern=0x$($LFS getstripe -L $name)
2248 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2249 error "(6.2) expect pattern flag hole, but got $pattern"
2251 stripes=$($LFS getstripe -c $name)
2252 if [ $OSTCOUNT -gt 2 ]; then
2253 [ $stripes -eq 3 ] ||
2254 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2256 [ $stripes -eq 2 ] ||
2257 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2260 size=$(stat $name | awk '/Size:/ { print $2 }')
2261 [ $size -eq $((4096 * $bcount)) ] ||
2262 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2264 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2266 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2267 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2270 [ $failures -eq 256 ] ||
2271 error "(6.6) expect 256 IO failures, but get $failures"
2273 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2274 [ $size -eq $((4096 * $bcount)) ] ||
2275 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2277 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2278 error "(6.8) write to the LOV EA hole should fail"
2280 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2281 error "(6.9) write to normal stripe should NOT fail"
2283 echo "foo" >> $name && error "(6.10) append write $name should fail"
2285 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2287 touch $name || error "(6.12) cannot touch $name"
2289 rm -f $name || error "(6.13) cannot unlink $name"
2292 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2294 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2295 if [ $OSTCOUNT -gt 2 ]; then
2296 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2298 echo "Check $name, it contains the old f2's stripe0"
2301 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2303 pattern=0x$($LFS getstripe -L $name)
2304 stripes=$($LFS getstripe -c $name)
2305 size=$(stat $name | awk '/Size:/ { print $2 }')
2306 if [ $OSTCOUNT -gt 2 ]; then
2307 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2308 error "(7.2.1) expect pattern flag hole, but got $pattern"
2310 [ $stripes -eq 3 ] ||
2311 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2313 [ $size -eq $((4096 * $bcount)) ] ||
2314 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2316 cat $name > /dev/null &&
2317 error "(7.5.1) normal read $name should fail"
2319 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2320 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2322 [ $failures -eq 256 ] ||
2323 error "(7.6) expect 256 IO failures, but get $failures"
2325 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2326 [ $size -eq $((4096 * $bcount)) ] ||
2327 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2329 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2330 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2332 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2333 error "(7.8.1) write to normal stripe should NOT fail"
2335 echo "foo" >> $name &&
2336 error "(7.8.3) append write $name should fail"
2338 chown $RUNAS_ID:$RUNAS_GID $name ||
2339 error "(7.9.1) cannot chown on $name"
2341 touch $name || error "(7.10.1) cannot touch $name"
2343 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2344 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2346 [ $stripes -eq 1 ] ||
2347 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2350 [ $size -eq $((4096 * (256 + 0))) ] ||
2351 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2353 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2355 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2357 chown $RUNAS_ID:$RUNAS_GID $name ||
2358 error "(7.9.2) cannot chown on $name"
2360 touch $name || error "(7.10.2) cannot touch $name"
2363 rm -f $name || error "(7.11) cannot unlink $name"
2365 [ $OSTCOUNT -le 2 ] && return
2368 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2370 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2371 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2373 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2375 pattern=0x$($LFS getstripe -L $name)
2376 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2377 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2379 stripes=$($LFS getstripe -c $name)
2380 # LFSCK does not know the old f3 had 3 stripes.
2381 # It only tries to find as much as possible.
2382 # The stripe count depends on the last stripe's offset.
2383 [ $stripes -eq 2 ] ||
2384 error "(8.3) expect the stripe count is 2, but got $stripes"
2386 size=$(stat $name | awk '/Size:/ { print $2 }')
2388 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2389 error "(8.4) expect the size $((4096 * 512)), but got $size"
2391 cat $name > /dev/null || error "(8.5) cannot read $name"
2393 echo "dummy" >> $name || error "(8.6) cannot write $name"
2395 chown $RUNAS_ID:$RUNAS_GID $name ||
2396 error "(8.7) cannot chown on $name"
2398 touch $name || error "(8.8) cannot touch $name"
2400 rm -f $name || error "(8.9) cannot unlink $name"
2402 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2405 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2406 skip "ignore the test if MDS is older than 2.5.59" && exit 0
2408 check_mount_and_prep
2409 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2411 echo "Start all LFSCK components by default (-s 1)"
2412 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2413 error "Fail to start LFSCK"
2415 echo "namespace LFSCK should be in 'scanning-phase1' status"
2416 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2417 [ "$STATUS" == "scanning-phase1" ] ||
2418 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2420 echo "layout LFSCK should be in 'scanning-phase1' status"
2421 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2422 [ "$STATUS" == "scanning-phase1" ] ||
2423 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2425 echo "Stop all LFSCK components by default"
2426 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2427 error "Fail to stop LFSCK"
2429 run_test 21 "run all LFSCK components by default"
2431 $LCTL set_param debug=-lfsck > /dev/null || true
2433 # restore MDS/OST size
2434 MDSSIZE=${SAVED_MDSSIZE}
2435 OSTSIZE=${SAVED_OSTSIZE}
2436 OSTCOUNT=${SAVED_OSTCOUNT}
2438 # cleanup the system at last