3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 MCREATE=${MCREATE:-mcreate}
23 SAVED_MDSSIZE=${MDSSIZE}
24 SAVED_OSTSIZE=${OSTSIZE}
25 SAVED_OSTCOUNT=${OSTCOUNT}
26 # use small MDS + OST size to speed formatting time
27 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
30 # no need too much OSTs, to reduce the format/start/stop overhead
31 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
33 # build up a clean test environment.
37 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
38 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
41 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
42 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
44 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
45 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19"
49 $LCTL set_param debug=+lfsck > /dev/null || true
51 MDT_DEV="${FSNAME}-MDT0000"
52 OST_DEV="${FSNAME}-OST0000"
53 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
54 START_NAMESPACE="do_facet $SINGLEMDS \
55 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
56 START_LAYOUT="do_facet $SINGLEMDS \
57 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
58 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
59 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
60 SHOW_NAMESPACE="do_facet $SINGLEMDS \
61 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
62 SHOW_LAYOUT="do_facet $SINGLEMDS \
63 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
64 SHOW_LAYOUT_ON_OST="do_facet ost1 \
65 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
66 MOUNT_OPTS_SCRUB="-o user_xattr"
67 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
76 echo "preparing... $nfiles * $ndirs files will be created $(date)."
77 if [ ! -z $igif ]; then
78 #define OBD_FAIL_FID_IGIF 0x1504
79 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
82 cp $LUSTRE/tests/*.sh $DIR/$tdir/
83 if [ $ndirs -gt 0 ]; then
84 createmany -d $DIR/$tdir/d $ndirs
85 createmany -m $DIR/$tdir/f $ndirs
86 if [ $nfiles -gt 0 ]; then
87 for ((i = 0; i < $ndirs; i++)); do
88 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
89 /dev/null || error "createmany $nfiles"
92 createmany -d $DIR/$tdir/e $ndirs
95 if [ ! -z $igif ]; then
96 touch $DIR/$tdir/dummy
97 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
100 echo "prepared $(date)."
106 #define OBD_FAIL_LFSCK_DELAY1 0x1600
107 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
108 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
110 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
112 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
113 [ "$STATUS" == "scanning-phase1" ] ||
114 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
116 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
118 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
119 [ "$STATUS" == "stopped" ] ||
120 error "(6) Expect 'stopped', but got '$STATUS'"
122 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
124 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
125 [ "$STATUS" == "scanning-phase1" ] ||
126 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
128 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
129 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
130 mdd.${MDT_DEV}.lfsck_namespace |
131 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
133 error "(9) unexpected status"
136 local repaired=$($SHOW_NAMESPACE |
137 awk '/^updated_phase1/ { print $2 }')
138 [ $repaired -eq 0 ] ||
139 error "(10) Expect nothing to be repaired, but got: $repaired"
141 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
142 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
143 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
144 mdd.${MDT_DEV}.lfsck_namespace |
145 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
147 error "(12) unexpected status"
150 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
151 [ $((scanned1 + 1)) -eq $scanned2 ] ||
152 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
154 echo "stopall, should NOT crash LU-3649"
155 stopall || error "(14) Fail to stopall"
157 run_test 0 "Control LFSCK manually"
160 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
161 skip "OI Scrub not implemented for ZFS" && return
165 #define OBD_FAIL_FID_INDIR 0x1501
166 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
167 touch $DIR/$tdir/dummy
169 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
171 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
172 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
173 mdd.${MDT_DEV}.lfsck_namespace |
174 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
176 error "(4) unexpected status"
179 local repaired=$($SHOW_NAMESPACE |
180 awk '/^dirent_repaired/ { print $2 }')
181 # for interop with old server
182 [ -z "$repaired" ] &&
183 repaired=$($SHOW_NAMESPACE |
184 awk '/^updated_phase1/ { print $2 }')
186 [ $repaired -eq 1 ] ||
187 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
189 mount_client $MOUNT || error "(6) Fail to start client!"
191 #define OBD_FAIL_FID_LOOKUP 0x1505
192 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
193 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
195 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
197 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
201 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
202 skip "OI Scrub not implemented for ZFS" && return
206 #define OBD_FAIL_FID_INLMA 0x1502
207 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
208 touch $DIR/$tdir/dummy
210 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
212 #define OBD_FAIL_FID_NOLMA 0x1506
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
214 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
215 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
216 mdd.${MDT_DEV}.lfsck_namespace |
217 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
219 error "(4) unexpected status"
222 local repaired=$($SHOW_NAMESPACE |
223 awk '/^dirent_repaired/ { print $2 }')
224 # for interop with old server
225 [ -z "$repaired" ] &&
226 repaired=$($SHOW_NAMESPACE |
227 awk '/^updated_phase1/ { print $2 }')
229 [ $repaired -eq 1 ] ||
230 error "(5) Fail to repair missed FID-in-LMA: $repaired"
232 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
233 mount_client $MOUNT || error "(6) Fail to start client!"
235 #define OBD_FAIL_FID_LOOKUP 0x1505
236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
237 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
239 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
241 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
246 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
247 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
248 touch $DIR/$tdir/dummy
250 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
252 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
253 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
254 mdd.${MDT_DEV}.lfsck_namespace |
255 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
257 error "(4) unexpected status"
260 local repaired=$($SHOW_NAMESPACE |
261 awk '/^linkea_repaired/ { print $2 }')
262 # for interop with old server
263 [ -z "$repaired" ] &&
264 repaired=$($SHOW_NAMESPACE |
265 awk '/^updated_phase1/ { print $2 }')
267 [ $repaired -eq 1 ] ||
268 error "(5) Fail to repair crashed linkEA: $repaired"
270 mount_client $MOUNT || error "(6) Fail to start client!"
272 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
273 error "(7) Fail to stat $DIR/$tdir/dummy"
275 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
276 local dummyname=$($LFS fid2path $DIR $dummyfid)
277 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
278 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
280 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
286 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
287 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
288 touch $DIR/$tdir/dummy
290 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
292 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
293 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
294 mdd.${MDT_DEV}.lfsck_namespace |
295 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
297 error "(4) unexpected status"
300 local repaired=$($SHOW_NAMESPACE |
301 awk '/^updated_phase2/ { print $2 }')
302 [ $repaired -eq 1 ] ||
303 error "(5) Fail to repair crashed linkEA: $repaired"
305 mount_client $MOUNT || error "(6) Fail to start client!"
307 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
308 error "(7) Fail to stat $DIR/$tdir/dummy"
310 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
311 local dummyname=$($LFS fid2path $DIR $dummyfid)
312 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
313 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
315 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
321 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
322 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
323 touch $DIR/$tdir/dummy
325 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
327 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
328 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
329 mdd.${MDT_DEV}.lfsck_namespace |
330 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
332 error "(4) unexpected status"
335 local repaired=$($SHOW_NAMESPACE |
336 awk '/^updated_phase2/ { print $2 }')
337 [ $repaired -eq 1 ] ||
338 error "(5) Fail to repair crashed linkEA: $repaired"
340 mount_client $MOUNT || error "(6) Fail to start client!"
342 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
343 error "(7) Fail to stat $DIR/$tdir/dummy"
345 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
346 local dummyname=$($LFS fid2path $DIR $dummyfid)
347 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
348 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
350 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
354 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
355 skip "OI Scrub not implemented for ZFS" && return
358 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
359 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
361 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
362 echo "start $SINGLEMDS with disabling OI scrub"
363 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
364 error "(2) Fail to start MDS!"
366 #define OBD_FAIL_LFSCK_DELAY2 0x1601
367 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
368 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
369 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
370 mdd.${MDT_DEV}.lfsck_namespace |
371 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
373 error "(5) unexpected status"
376 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
377 [ "$STATUS" == "scanning-phase1" ] ||
378 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
380 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
381 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
382 mdd.${MDT_DEV}.lfsck_namespace |
383 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
385 error "(7) unexpected status"
388 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
389 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
391 local repaired=$($SHOW_NAMESPACE |
392 awk '/^dirent_repaired/ { print $2 }')
393 # for interop with old server
394 [ -z "$repaired" ] &&
395 repaired=$($SHOW_NAMESPACE |
396 awk '/^updated_phase1/ { print $2 }')
398 [ $repaired -ge 9 ] ||
399 error "(9) Fail to re-generate FID-in-dirent: $repaired"
401 mount_client $MOUNT || error "(10) Fail to start client!"
403 #define OBD_FAIL_FID_LOOKUP 0x1505
404 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
405 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
406 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
408 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
412 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
413 skip "OI Scrub not implemented for ZFS" && return
416 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
417 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
419 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
420 echo "start $SINGLEMDS with disabling OI scrub"
421 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
422 error "(2) Fail to start MDS!"
424 #define OBD_FAIL_LFSCK_DELAY2 0x1601
425 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
426 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
427 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
428 mdd.${MDT_DEV}.lfsck_namespace |
429 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
431 error "(5) unexpected status"
434 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
435 [ "$STATUS" == "scanning-phase1" ] ||
436 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
438 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
439 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
440 mdd.${MDT_DEV}.lfsck_namespace |
441 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
443 error "(7) unexpected status"
446 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
447 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
449 local repaired=$($SHOW_NAMESPACE |
450 awk '/^dirent_repaired/ { print $2 }')
451 # for interop with old server
452 [ -z "$repaired" ] &&
453 repaired=$($SHOW_NAMESPACE |
454 awk '/^updated_phase1/ { print $2 }')
456 [ $repaired -ge 2 ] ||
457 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
459 mount_client $MOUNT || error "(10) Fail to start client!"
461 #define OBD_FAIL_FID_LOOKUP 0x1505
462 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
463 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
465 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
467 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
468 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
469 local dummyname=$($LFS fid2path $DIR $dummyfid)
470 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
471 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
473 run_test 5 "LFSCK can handle IGIF object upgrading"
478 #define OBD_FAIL_LFSCK_DELAY1 0x1600
479 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
480 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
482 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
483 [ "$STATUS" == "scanning-phase1" ] ||
484 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
486 # Sleep 3 sec to guarantee at least one object processed by LFSCK
488 # Fail the LFSCK to guarantee there is at least one checkpoint
489 #define OBD_FAIL_LFSCK_FATAL1 0x1608
490 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
491 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
492 mdd.${MDT_DEV}.lfsck_namespace |
493 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
495 error "(4) unexpected status"
498 local POS0=$($SHOW_NAMESPACE |
499 awk '/^last_checkpoint_position/ { print $2 }' |
502 #define OBD_FAIL_LFSCK_DELAY1 0x1600
503 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
504 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
506 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
507 [ "$STATUS" == "scanning-phase1" ] ||
508 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
510 local POS1=$($SHOW_NAMESPACE |
511 awk '/^latest_start_position/ { print $2 }' |
513 [ $POS0 -lt $POS1 ] ||
514 error "(7) Expect larger than: $POS0, but got $POS1"
516 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
517 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
518 mdd.${MDT_DEV}.lfsck_namespace |
519 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
521 error "(8) unexpected status"
524 run_test 6a "LFSCK resumes from last checkpoint (1)"
529 #define OBD_FAIL_LFSCK_DELAY2 0x1601
530 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
531 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
533 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
534 [ "$STATUS" == "scanning-phase1" ] ||
535 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
537 # Sleep 5 sec to guarantee that we are in the directory scanning
539 # Fail the LFSCK to guarantee there is at least one checkpoint
540 #define OBD_FAIL_LFSCK_FATAL2 0x1609
541 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
542 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
543 mdd.${MDT_DEV}.lfsck_namespace |
544 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
546 error "(4) unexpected status"
549 local O_POS0=$($SHOW_NAMESPACE |
550 awk '/^last_checkpoint_position/ { print $2 }' |
553 local D_POS0=$($SHOW_NAMESPACE |
554 awk '/^last_checkpoint_position/ { print $4 }')
556 #define OBD_FAIL_LFSCK_DELAY2 0x1601
557 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
558 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
560 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
561 [ "$STATUS" == "scanning-phase1" ] ||
562 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
564 local O_POS1=$($SHOW_NAMESPACE |
565 awk '/^latest_start_position/ { print $2 }' |
567 local D_POS1=$($SHOW_NAMESPACE |
568 awk '/^latest_start_position/ { print $4 }')
570 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
571 [ $O_POS0 -lt $O_POS1 ] ||
572 error "(7.1) $O_POS1 is not larger than $O_POS0"
574 [ $D_POS0 -lt $D_POS1 ] ||
575 error "(7.2) $D_POS1 is not larger than $D_POS0"
578 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
579 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
580 mdd.${MDT_DEV}.lfsck_namespace |
581 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
583 error "(8) unexpected status"
586 run_test 6b "LFSCK resumes from last checkpoint (2)"
593 #define OBD_FAIL_LFSCK_DELAY2 0x1601
594 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
595 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
597 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
598 [ "$STATUS" == "scanning-phase1" ] ||
599 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
601 # Sleep 3 sec to guarantee at least one object processed by LFSCK
603 echo "stop $SINGLEMDS"
604 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
606 echo "start $SINGLEMDS"
607 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
608 error "(5) Fail to start MDS!"
610 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
611 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
612 mdd.${MDT_DEV}.lfsck_namespace |
613 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
615 error "(6) unexpected status"
618 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
624 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
625 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
626 for ((i = 0; i < 20; i++)); do
627 touch $DIR/$tdir/dummy${i}
630 #define OBD_FAIL_LFSCK_DELAY3 0x1602
631 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
632 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
633 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
634 mdd.${MDT_DEV}.lfsck_namespace |
635 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
637 error "(4) unexpected status"
640 echo "stop $SINGLEMDS"
641 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
643 echo "start $SINGLEMDS"
644 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
645 error "(6) Fail to start MDS!"
647 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
648 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
649 mdd.${MDT_DEV}.lfsck_namespace |
650 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
652 error "(7) unexpected status"
655 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
660 formatall > /dev/null
666 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
667 [ "$STATUS" == "init" ] ||
668 error "(2) Expect 'init', but got '$STATUS'"
670 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
671 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
672 mkdir $DIR/$tdir/crashed
674 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
675 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
676 for ((i = 0; i < 5; i++)); do
677 touch $DIR/$tdir/dummy${i}
680 umount_client $MOUNT || error "(3) Fail to stop client!"
682 #define OBD_FAIL_LFSCK_DELAY2 0x1601
683 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
684 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
686 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
687 [ "$STATUS" == "scanning-phase1" ] ||
688 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
690 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
692 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
693 [ "$STATUS" == "stopped" ] ||
694 error "(7) Expect 'stopped', but got '$STATUS'"
696 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
698 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
699 [ "$STATUS" == "scanning-phase1" ] ||
700 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
702 #define OBD_FAIL_LFSCK_FATAL2 0x1609
703 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
704 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
705 mdd.${MDT_DEV}.lfsck_namespace |
706 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
708 error "(10) unexpected status"
711 #define OBD_FAIL_LFSCK_DELAY1 0x1600
712 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
713 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
715 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
716 [ "$STATUS" == "scanning-phase1" ] ||
717 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
719 #define OBD_FAIL_LFSCK_CRASH 0x160a
720 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
723 echo "stop $SINGLEMDS"
724 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
726 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
727 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
729 echo "start $SINGLEMDS"
730 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
731 error "(14) Fail to start MDS!"
733 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
734 [ "$STATUS" == "crashed" ] ||
735 error "(15) Expect 'crashed', but got '$STATUS'"
737 #define OBD_FAIL_LFSCK_DELAY2 0x1601
738 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
739 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
741 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
742 [ "$STATUS" == "scanning-phase1" ] ||
743 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
745 echo "stop $SINGLEMDS"
746 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
748 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
749 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
751 echo "start $SINGLEMDS"
752 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
753 error "(19) Fail to start MDS!"
755 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
756 [ "$STATUS" == "paused" ] ||
757 error "(20) Expect 'paused', but got '$STATUS'"
759 #define OBD_FAIL_LFSCK_DELAY3 0x1602
760 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
762 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
763 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
764 mdd.${MDT_DEV}.lfsck_namespace |
765 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
767 error "(22) unexpected status"
770 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
771 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
772 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
774 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
775 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
776 mdd.${MDT_DEV}.lfsck_namespace |
777 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
779 error "(24) unexpected status"
782 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
783 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
785 run_test 8 "LFSCK state machine"
788 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
789 skip "Testing on UP system, the speed may be inaccurate."
795 local BASE_SPEED1=100
797 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
800 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
801 [ "$STATUS" == "scanning-phase1" ] ||
802 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
804 local SPEED=$($SHOW_NAMESPACE |
805 awk '/^average_speed_phase1/ { print $2 }')
807 # There may be time error, normally it should be less than 2 seconds.
808 # We allow another 20% schedule error.
810 # MAX_MARGIN = 1.2 = 12 / 10
811 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
812 RUN_TIME1 * 12 / 10))
813 [ $SPEED -lt $MAX_SPEED ] ||
814 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
817 local BASE_SPEED2=300
819 do_facet $SINGLEMDS \
820 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
823 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
824 # MIN_MARGIN = 0.8 = 8 / 10
825 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
826 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
827 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
828 # Account for slow ZFS performance - LU-4934
829 [ $SPEED -gt $MIN_SPEED ] || [ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
830 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
832 # MAX_MARGIN = 1.2 = 12 / 10
833 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
834 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
835 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
836 [ $SPEED -lt $MAX_SPEED ] ||
837 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
839 do_facet $SINGLEMDS \
840 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
842 wait_update_facet $SINGLEMDS \
843 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
844 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
845 error "(7) Failed to get expected 'completed'"
847 run_test 9a "LFSCK speed control (1)"
850 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
851 skip "Testing on UP system, the speed may be inaccurate."
857 echo "Preparing another 50 * 50 files (with error) at $(date)."
858 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
859 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
860 createmany -d $DIR/$tdir/d 50
861 createmany -m $DIR/$tdir/f 50
862 for ((i = 0; i < 50; i++)); do
863 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
866 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
867 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
868 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
869 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
870 mdd.${MDT_DEV}.lfsck_namespace |
871 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
873 error "(5) unexpected status"
876 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
877 echo "Prepared at $(date)."
881 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
884 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
885 [ "$STATUS" == "scanning-phase2" ] ||
886 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
888 local SPEED=$($SHOW_NAMESPACE |
889 awk '/^average_speed_phase2/ { print $2 }')
890 # There may be time error, normally it should be less than 2 seconds.
891 # We allow another 20% schedule error.
893 # MAX_MARGIN = 1.2 = 12 / 10
894 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
895 RUN_TIME1 * 12 / 10))
896 [ $SPEED -lt $MAX_SPEED ] ||
897 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
900 local BASE_SPEED2=150
902 do_facet $SINGLEMDS \
903 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
906 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
907 # MIN_MARGIN = 0.8 = 8 / 10
908 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
909 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
910 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
911 [ $SPEED -gt $MIN_SPEED ] ||[ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
912 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
914 # MAX_MARGIN = 1.2 = 12 / 10
915 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
916 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
917 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
918 [ $SPEED -lt $MAX_SPEED ] ||
919 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
921 do_facet $SINGLEMDS \
922 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
923 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
924 mdd.${MDT_DEV}.lfsck_namespace |
925 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
927 error "(11) unexpected status"
930 run_test 9b "LFSCK speed control (2)"
934 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
935 skip "lookup(..)/linkea on ZFS issue" && return
939 echo "Preparing more files with error at $(date)."
940 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
941 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
943 for ((i = 0; i < 1000; i = $((i+2)))); do
944 mkdir -p $DIR/$tdir/d${i}
945 touch $DIR/$tdir/f${i}
946 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
949 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
950 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
952 for ((i = 1; i < 1000; i = $((i+2)))); do
953 mkdir -p $DIR/$tdir/d${i}
954 touch $DIR/$tdir/f${i}
955 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
958 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
959 echo "Prepared at $(date)."
961 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
964 mount_client $MOUNT || error "(3) Fail to start client!"
966 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
969 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
970 [ "$STATUS" == "scanning-phase1" ] ||
971 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
973 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
975 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
977 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
979 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
981 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
983 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
985 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
987 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
988 error "(14) Fail to softlink!"
990 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
991 [ "$STATUS" == "scanning-phase1" ] ||
992 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
994 do_facet $SINGLEMDS \
995 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
996 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
997 mdd.${MDT_DEV}.lfsck_namespace |
998 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1000 error "(16) unexpected status"
1003 run_test 10 "System is available during LFSCK scanning"
1006 ost_remove_lastid() {
1009 local rcmd="do_facet ost${ost}"
1011 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1013 # step 1: local mount
1014 mount_fstype ost${ost} || return 1
1015 # step 2: remove the specified LAST_ID
1016 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1018 unmount_fstype ost${ost} || return 2
1022 check_mount_and_prep
1023 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1024 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1029 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1031 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1032 error "(2) Fail to start ost1"
1034 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1035 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1037 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1038 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1040 wait_update_facet ost1 "$LCTL get_param -n \
1041 obdfilter.${OST_DEV}.lfsck_layout |
1042 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1044 error "(5) unexpected status"
1047 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1049 wait_update_facet ost1 "$LCTL get_param -n \
1050 obdfilter.${OST_DEV}.lfsck_layout |
1051 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1053 error "(6) unexpected status"
1056 echo "the LAST_ID(s) should have been rebuilt"
1057 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1058 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1060 run_test 11a "LFSCK can rebuild lost last_id"
1063 check_mount_and_prep
1064 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1066 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1067 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1068 do_facet ost1 $LCTL set_param fail_loc=0x160d
1069 createmany -o $DIR/$tdir/f 64
1070 local lastid1=$(do_facet ost1 "lctl get_param -n \
1071 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1072 awk -F: '{ print $2 }')
1074 umount_client $MOUNT
1075 stop ost1 || error "(1) Fail to stop ost1"
1077 #define OBD_FAIL_OST_ENOSPC 0x215
1078 do_facet ost1 $LCTL set_param fail_loc=0x215
1080 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1081 error "(2) Fail to start ost1"
1083 for ((i = 0; i < 60; i++)); do
1084 lastid2=$(do_facet ost1 "lctl get_param -n \
1085 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1086 awk -F: '{ print $2 }')
1087 [ ! -z $lastid2 ] && break;
1091 echo "the on-disk LAST_ID should be smaller than the expected one"
1092 [ $lastid1 -gt $lastid2 ] ||
1093 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1095 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1096 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1098 wait_update_facet ost1 "$LCTL get_param -n \
1099 obdfilter.${OST_DEV}.lfsck_layout |
1100 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1102 error "(6) unexpected status"
1105 stop ost1 || error "(7) Fail to stop ost1"
1107 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1108 error "(8) Fail to start ost1"
1110 echo "the on-disk LAST_ID should have been rebuilt"
1111 wait_update_facet ost1 "$LCTL get_param -n \
1112 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1113 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1114 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1115 error "(9) expect lastid1 0x100000000:$lastid1"
1118 do_facet ost1 $LCTL set_param fail_loc=0
1119 stopall || error "(10) Fail to stopall"
1121 run_test 11b "LFSCK can rebuild crashed last_id"
1124 [ $MDSCOUNT -lt 2 ] &&
1125 skip "We need at least 2 MDSes for test_12" && exit 0
1127 check_mount_and_prep
1128 for k in $(seq $MDSCOUNT); do
1129 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1130 createmany -o $DIR/$tdir/${k}/f 100 ||
1131 error "(0) Fail to create 100 files."
1134 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1135 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1136 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1138 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1139 for k in $(seq $MDSCOUNT); do
1140 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1141 mdd.$(facet_svc mds${k}).lfsck_namespace |
1142 awk '/^status/ { print $2 }')
1143 [ "$STATUS" == "scanning-phase1" ] ||
1144 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1147 echo "Stop namespace LFSCK on all targets by single lctl command."
1148 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1149 error "(4) Fail to stop LFSCK on all devices!"
1151 echo "All the LFSCK targets should be in 'stopped' status."
1152 for k in $(seq $MDSCOUNT); do
1153 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1154 mdd.$(facet_svc mds${k}).lfsck_namespace |
1155 awk '/^status/ { print $2 }')
1156 [ "$STATUS" == "stopped" ] ||
1157 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1160 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1161 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1162 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1164 echo "All the LFSCK targets should be in 'completed' status."
1165 for k in $(seq $MDSCOUNT); do
1166 wait_update_facet mds${k} "$LCTL get_param -n \
1167 mdd.$(facet_svc mds${k}).lfsck_namespace |
1168 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1169 error "(7) MDS${k} is not the expected 'completed'"
1172 echo "Start layout LFSCK on all targets by single command (-s 1)."
1173 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1174 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1176 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1177 for k in $(seq $MDSCOUNT); do
1178 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1179 mdd.$(facet_svc mds${k}).lfsck_layout |
1180 awk '/^status/ { print $2 }')
1181 [ "$STATUS" == "scanning-phase1" ] ||
1182 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1185 echo "Stop layout LFSCK on all targets by single lctl command."
1186 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1187 error "(10) Fail to stop LFSCK on all devices!"
1189 echo "All the LFSCK targets should be in 'stopped' status."
1190 for k in $(seq $MDSCOUNT); do
1191 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1192 mdd.$(facet_svc mds${k}).lfsck_layout |
1193 awk '/^status/ { print $2 }')
1194 [ "$STATUS" == "stopped" ] ||
1195 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1198 for k in $(seq $OSTCOUNT); do
1199 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1200 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1201 awk '/^status/ { print $2 }')
1202 [ "$STATUS" == "stopped" ] ||
1203 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1206 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1207 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1208 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1210 echo "All the LFSCK targets should be in 'completed' status."
1211 for k in $(seq $MDSCOUNT); do
1212 # The LFSCK status query internal is 30 seconds. For the case
1213 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1214 # time to guarantee the status sync up.
1215 wait_update_facet mds${k} "$LCTL get_param -n \
1216 mdd.$(facet_svc mds${k}).lfsck_layout |
1217 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1218 error "(14) MDS${k} is not the expected 'completed'"
1221 run_test 12 "single command to trigger LFSCK on all devices"
1225 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1226 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1227 echo "MDT-object FID."
1230 check_mount_and_prep
1232 echo "Inject failure stub to simulate bad lmm_oi"
1233 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1234 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1235 createmany -o $DIR/$tdir/f 32
1236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1238 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1239 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1241 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1242 mdd.${MDT_DEV}.lfsck_layout |
1243 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1245 error "(2) unexpected status"
1248 local repaired=$($SHOW_LAYOUT |
1249 awk '/^repaired_others/ { print $2 }')
1250 [ $repaired -eq 32 ] ||
1251 error "(3) Fail to repair crashed lmm_oi: $repaired"
1253 run_test 13 "LFSCK can repair crashed lmm_oi"
1257 echo "The OST-object referenced by the MDT-object should be there;"
1258 echo "otherwise, the LFSCK should re-create the missed OST-object."
1261 check_mount_and_prep
1262 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1264 local count=$(precreated_ost_obj_count 0 0)
1266 echo "Inject failure stub to simulate dangling referenced MDT-object"
1267 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1268 do_facet ost1 $LCTL set_param fail_loc=0x1610
1269 createmany -o $DIR/$tdir/f $((count + 32))
1270 do_facet ost1 $LCTL set_param fail_loc=0
1272 # exhaust other pre-created dangling cases
1273 count=$(precreated_ost_obj_count 0 0)
1274 createmany -o $DIR/$tdir/a $count ||
1275 error "(0) Fail to create $count files."
1277 echo "'ls' should fail because of dangling referenced MDT-object"
1278 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1280 echo "Trigger layout LFSCK to find out dangling reference"
1281 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1283 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1284 mdd.${MDT_DEV}.lfsck_layout |
1285 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1287 error "(3) unexpected status"
1290 local repaired=$($SHOW_LAYOUT |
1291 awk '/^repaired_dangling/ { print $2 }')
1292 [ $repaired -ge 32 ] ||
1293 error "(4) Fail to repair dangling reference: $repaired"
1295 echo "'ls' should fail because it will not repair dangling by default"
1296 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
1298 echo "Trigger layout LFSCK to repair dangling reference"
1299 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1301 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1302 mdd.${MDT_DEV}.lfsck_layout |
1303 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1305 error "(7) unexpected status"
1308 repaired=$($SHOW_LAYOUT |
1309 awk '/^repaired_dangling/ { print $2 }')
1310 [ $repaired -ge 32 ] ||
1311 error "(8) Fail to repair dangling reference: $repaired"
1313 echo "'ls' should success after layout LFSCK repairing"
1314 ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success."
1316 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1320 echo "If the OST-object referenced by the MDT-object back points"
1321 echo "to some non-exist MDT-object, then the LFSCK should repair"
1322 echo "the OST-object to back point to the right MDT-object."
1325 check_mount_and_prep
1326 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1328 echo "Inject failure stub to make the OST-object to back point to"
1329 echo "non-exist MDT-object."
1330 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1332 do_facet ost1 $LCTL set_param fail_loc=0x1611
1333 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1334 cancel_lru_locks osc
1335 do_facet ost1 $LCTL set_param fail_loc=0
1337 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1338 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1340 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1341 mdd.${MDT_DEV}.lfsck_layout |
1342 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1344 error "(2) unexpected status"
1347 local repaired=$($SHOW_LAYOUT |
1348 awk '/^repaired_unmatched_pair/ { print $2 }')
1349 [ $repaired -eq 1 ] ||
1350 error "(3) Fail to repair unmatched pair: $repaired"
1352 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1356 echo "If the OST-object referenced by the MDT-object back points"
1357 echo "to other MDT-object that doesn't recognize the OST-object,"
1358 echo "then the LFSCK should repair it to back point to the right"
1359 echo "MDT-object (the first one)."
1362 check_mount_and_prep
1363 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1364 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1365 cancel_lru_locks osc
1367 echo "Inject failure stub to make the OST-object to back point to"
1368 echo "other MDT-object"
1370 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1371 do_facet ost1 $LCTL set_param fail_loc=0x1612
1372 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1373 cancel_lru_locks osc
1374 do_facet ost1 $LCTL set_param fail_loc=0
1376 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1377 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1379 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1380 mdd.${MDT_DEV}.lfsck_layout |
1381 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1383 error "(2) unexpected status"
1386 local repaired=$($SHOW_LAYOUT |
1387 awk '/^repaired_unmatched_pair/ { print $2 }')
1388 [ $repaired -eq 1 ] ||
1389 error "(3) Fail to repair unmatched pair: $repaired"
1391 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1395 echo "If the OST-object's owner information does not match the owner"
1396 echo "information stored in the MDT-object, then the LFSCK trust the"
1397 echo "MDT-object and update the OST-object's owner information."
1400 check_mount_and_prep
1401 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1402 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1403 cancel_lru_locks osc
1405 echo "Inject failure stub to skip OST-object owner changing"
1406 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1407 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1408 chown 1.1 $DIR/$tdir/f0
1409 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1411 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1414 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1416 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1417 mdd.${MDT_DEV}.lfsck_layout |
1418 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1420 error "(2) unexpected status"
1423 local repaired=$($SHOW_LAYOUT |
1424 awk '/^repaired_inconsistent_owner/ { print $2 }')
1425 [ $repaired -eq 1 ] ||
1426 error "(3) Fail to repair inconsistent owner: $repaired"
1428 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1432 echo "If more than one MDT-objects reference the same OST-object,"
1433 echo "and the OST-object only recognizes one MDT-object, then the"
1434 echo "LFSCK should create new OST-objects for such non-recognized"
1438 check_mount_and_prep
1439 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1441 echo "Inject failure stub to make two MDT-objects to refernce"
1442 echo "the OST-object"
1444 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1445 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1447 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1448 cancel_lru_locks osc
1450 createmany -o $DIR/$tdir/f 1
1452 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1454 cancel_lru_locks mdc
1455 cancel_lru_locks osc
1457 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1458 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1459 [ $size -eq 1048576 ] ||
1460 error "(1) f0 (wrong) size should be 1048576, but got $size"
1462 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1465 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1467 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1468 mdd.${MDT_DEV}.lfsck_layout |
1469 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1471 error "(3) unexpected status"
1474 local repaired=$($SHOW_LAYOUT |
1475 awk '/^repaired_multiple_referenced/ { print $2 }')
1476 [ $repaired -eq 1 ] ||
1477 error "(4) Fail to repair multiple references: $repaired"
1479 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1480 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1481 error "(5) Fail to write f0."
1482 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1483 [ $size -eq 1048576 ] ||
1484 error "(6) guard size should be 1048576, but got $size"
1486 run_test 17 "LFSCK can repair multiple references"
1490 echo "The target MDT-object is there, but related stripe information"
1491 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1492 echo "layout EA entries."
1495 check_mount_and_prep
1496 $LFS mkdir -i 0 $DIR/$tdir/a1
1497 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1498 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1500 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1502 $LFS path2fid $DIR/$tdir/a1/f1
1503 $LFS getstripe $DIR/$tdir/a1/f1
1505 if [ $MDSCOUNT -ge 2 ]; then
1506 $LFS mkdir -i 1 $DIR/$tdir/a2
1507 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1508 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1509 $LFS path2fid $DIR/$tdir/a2/f2
1510 $LFS getstripe $DIR/$tdir/a2/f2
1513 cancel_lru_locks osc
1515 echo "Inject failure, to make the MDT-object lost its layout EA"
1516 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1517 do_facet mds1 $LCTL set_param fail_loc=0x1615
1518 chown 1.1 $DIR/$tdir/a1/f1
1520 if [ $MDSCOUNT -ge 2 ]; then
1521 do_facet mds2 $LCTL set_param fail_loc=0x1615
1522 chown 1.1 $DIR/$tdir/a2/f2
1528 do_facet mds1 $LCTL set_param fail_loc=0
1529 if [ $MDSCOUNT -ge 2 ]; then
1530 do_facet mds2 $LCTL set_param fail_loc=0
1533 cancel_lru_locks mdc
1534 cancel_lru_locks osc
1536 echo "The file size should be incorrect since layout EA is lost"
1537 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1538 [ "$cur_size" != "$saved_size" ] ||
1539 error "(1) Expect incorrect file1 size"
1541 if [ $MDSCOUNT -ge 2 ]; then
1542 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1543 [ "$cur_size" != "$saved_size" ] ||
1544 error "(2) Expect incorrect file2 size"
1547 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1548 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1550 for k in $(seq $MDSCOUNT); do
1551 # The LFSCK status query internal is 30 seconds. For the case
1552 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1553 # time to guarantee the status sync up.
1554 wait_update_facet mds${k} "$LCTL get_param -n \
1555 mdd.$(facet_svc mds${k}).lfsck_layout |
1556 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1557 error "(4) MDS${k} is not the expected 'completed'"
1560 for k in $(seq $OSTCOUNT); do
1561 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1562 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1563 awk '/^status/ { print $2 }')
1564 [ "$cur_status" == "completed" ] ||
1565 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1568 local repaired=$(do_facet mds1 $LCTL get_param -n \
1569 mdd.$(facet_svc mds1).lfsck_layout |
1570 awk '/^repaired_orphan/ { print $2 }')
1571 [ $repaired -eq 1 ] ||
1572 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1574 if [ $MDSCOUNT -ge 2 ]; then
1575 repaired=$(do_facet mds2 $LCTL get_param -n \
1576 mdd.$(facet_svc mds2).lfsck_layout |
1577 awk '/^repaired_orphan/ { print $2 }')
1578 [ $repaired -eq 2 ] ||
1579 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1582 $LFS path2fid $DIR/$tdir/a1/f1
1583 $LFS getstripe $DIR/$tdir/a1/f1
1585 if [ $MDSCOUNT -ge 2 ]; then
1586 $LFS path2fid $DIR/$tdir/a2/f2
1587 $LFS getstripe $DIR/$tdir/a2/f2
1590 echo "The file size should be correct after layout LFSCK scanning"
1591 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1592 [ "$cur_size" == "$saved_size" ] ||
1593 error "(7) Expect file1 size $saved_size, but got $cur_size"
1595 if [ $MDSCOUNT -ge 2 ]; then
1596 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1597 [ "$cur_size" == "$saved_size" ] ||
1598 error "(8) Expect file2 size $saved_size, but got $cur_size"
1601 run_test 18a "Find out orphan OST-object and repair it (1)"
1605 echo "The target MDT-object is lost. The LFSCK should re-create the"
1606 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1607 echo "can move it back to normal namespace manually."
1610 check_mount_and_prep
1611 $LFS mkdir -i 0 $DIR/$tdir/a1
1612 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1613 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1614 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1615 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1617 $LFS getstripe $DIR/$tdir/a1/f1
1619 if [ $MDSCOUNT -ge 2 ]; then
1620 $LFS mkdir -i 1 $DIR/$tdir/a2
1621 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1622 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1623 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1625 $LFS getstripe $DIR/$tdir/a2/f2
1628 cancel_lru_locks osc
1630 echo "Inject failure, to simulate the case of missing the MDT-object"
1631 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1632 do_facet mds1 $LCTL set_param fail_loc=0x1616
1633 rm -f $DIR/$tdir/a1/f1
1635 if [ $MDSCOUNT -ge 2 ]; then
1636 do_facet mds2 $LCTL set_param fail_loc=0x1616
1637 rm -f $DIR/$tdir/a2/f2
1643 do_facet mds1 $LCTL set_param fail_loc=0
1644 if [ $MDSCOUNT -ge 2 ]; then
1645 do_facet mds2 $LCTL set_param fail_loc=0
1648 cancel_lru_locks mdc
1649 cancel_lru_locks osc
1651 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1652 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1654 for k in $(seq $MDSCOUNT); do
1655 # The LFSCK status query internal is 30 seconds. For the case
1656 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1657 # time to guarantee the status sync up.
1658 wait_update_facet mds${k} "$LCTL get_param -n \
1659 mdd.$(facet_svc mds${k}).lfsck_layout |
1660 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1661 error "(2) MDS${k} is not the expected 'completed'"
1664 for k in $(seq $OSTCOUNT); do
1665 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1666 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1667 awk '/^status/ { print $2 }')
1668 [ "$cur_status" == "completed" ] ||
1669 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1672 local repaired=$(do_facet mds1 $LCTL get_param -n \
1673 mdd.$(facet_svc mds1).lfsck_layout |
1674 awk '/^repaired_orphan/ { print $2 }')
1675 [ $repaired -eq 1 ] ||
1676 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1678 if [ $MDSCOUNT -ge 2 ]; then
1679 repaired=$(do_facet mds2 $LCTL get_param -n \
1680 mdd.$(facet_svc mds2).lfsck_layout |
1681 awk '/^repaired_orphan/ { print $2 }')
1682 [ $repaired -eq 2 ] ||
1683 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1686 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1687 mv $MOUNT/.lustre/lost+found/MDT0000/R-${fid1} $DIR/$tdir/a1/f1 ||
1688 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/R-${fid1}"
1690 if [ $MDSCOUNT -ge 2 ]; then
1691 local name=$MOUNT/.lustre/lost+found/MDT0001/R-${fid2}
1692 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1695 $LFS path2fid $DIR/$tdir/a1/f1
1696 $LFS getstripe $DIR/$tdir/a1/f1
1698 if [ $MDSCOUNT -ge 2 ]; then
1699 $LFS path2fid $DIR/$tdir/a2/f2
1700 $LFS getstripe $DIR/$tdir/a2/f2
1703 echo "The file size should be correct after layout LFSCK scanning"
1704 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1705 [ "$cur_size" == "$saved_size" ] ||
1706 error "(7) Expect file1 size $saved_size, but got $cur_size"
1708 if [ $MDSCOUNT -ge 2 ]; then
1709 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1710 [ "$cur_size" == "$saved_size" ] ||
1711 error "(8) Expect file2 size $saved_size, but got $cur_size"
1714 run_test 18b "Find out orphan OST-object and repair it (2)"
1718 echo "The target MDT-object is lost, and the OST-object FID is missing."
1719 echo "The LFSCK should re-create the MDT-object with new FID under the "
1720 echo "directory .lustre/lost+found/MDTxxxx."
1723 check_mount_and_prep
1724 $LFS mkdir -i 0 $DIR/$tdir/a1
1725 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1727 echo "Inject failure, to simulate the case of missing parent FID"
1728 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1729 do_facet ost1 $LCTL set_param fail_loc=0x1617
1731 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1732 $LFS getstripe $DIR/$tdir/a1/f1
1734 if [ $MDSCOUNT -ge 2 ]; then
1735 $LFS mkdir -i 1 $DIR/$tdir/a2
1736 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1737 do_facet ost2 $LCTL set_param fail_loc=0x1617
1738 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1739 $LFS getstripe $DIR/$tdir/a2/f2
1742 cancel_lru_locks osc
1744 echo "Inject failure, to simulate the case of missing the MDT-object"
1745 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1746 do_facet mds1 $LCTL set_param fail_loc=0x1616
1747 rm -f $DIR/$tdir/a1/f1
1749 if [ $MDSCOUNT -ge 2 ]; then
1750 do_facet mds2 $LCTL set_param fail_loc=0x1616
1751 rm -f $DIR/$tdir/a2/f2
1757 do_facet mds1 $LCTL set_param fail_loc=0
1758 if [ $MDSCOUNT -ge 2 ]; then
1759 do_facet mds2 $LCTL set_param fail_loc=0
1762 cancel_lru_locks mdc
1763 cancel_lru_locks osc
1765 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1766 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1768 for k in $(seq $MDSCOUNT); do
1769 # The LFSCK status query internal is 30 seconds. For the case
1770 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1771 # time to guarantee the status sync up.
1772 wait_update_facet mds${k} "$LCTL get_param -n \
1773 mdd.$(facet_svc mds${k}).lfsck_layout |
1774 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1775 error "(2) MDS${k} is not the expected 'completed'"
1778 for k in $(seq $OSTCOUNT); do
1779 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1780 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1781 awk '/^status/ { print $2 }')
1782 [ "$cur_status" == "completed" ] ||
1783 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1786 if [ $MDSCOUNT -ge 2 ]; then
1792 local repaired=$(do_facet mds1 $LCTL get_param -n \
1793 mdd.$(facet_svc mds1).lfsck_layout |
1794 awk '/^repaired_orphan/ { print $2 }')
1795 [ $repaired -eq $expected ] ||
1796 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1798 if [ $MDSCOUNT -ge 2 ]; then
1799 repaired=$(do_facet mds2 $LCTL get_param -n \
1800 mdd.$(facet_svc mds2).lfsck_layout |
1801 awk '/^repaired_orphan/ { print $2 }')
1802 [ $repaired -eq 0 ] ||
1803 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1806 echo "There should be some stub under .lustre/lost+found/MDT0001/"
1807 ls -ail $MOUNT/.lustre/lost+found/MDT0001/N-* &&
1808 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1810 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1811 ls -ail $MOUNT/.lustre/lost+found/MDT0000/N-* ||
1812 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
1814 run_test 18c "Find out orphan OST-object and repair it (3)"
1818 echo "The target MDT-object layout EA slot is occpuied by some new"
1819 echo "created OST-object when repair dangling reference case. Such"
1820 echo "conflict OST-object has never been modified. Then when found"
1821 echo "the orphan OST-object, LFSCK will replace it with the orphan"
1825 check_mount_and_prep
1827 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1828 echo "guard" > $DIR/$tdir/a1/f1
1829 echo "foo" > $DIR/$tdir/a1/f2
1830 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1831 $LFS path2fid $DIR/$tdir/a1/f1
1832 $LFS getstripe $DIR/$tdir/a1/f1
1833 $LFS path2fid $DIR/$tdir/a1/f2
1834 $LFS getstripe $DIR/$tdir/a1/f2
1835 cancel_lru_locks osc
1837 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1838 echo "to reference the same OST-object (which is f1's OST-obejct)."
1839 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1840 echo "dangling reference case, but f2's old OST-object is there."
1843 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1844 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1845 chown 1.1 $DIR/$tdir/a1/f2
1846 rm -f $DIR/$tdir/a1/f1
1849 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1851 echo "stopall to cleanup object cache"
1854 setupall > /dev/null
1856 echo "The file size should be incorrect since dangling referenced"
1857 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1858 [ "$cur_size" != "$saved_size" ] ||
1859 error "(1) Expect incorrect file2 size"
1861 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1862 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
1864 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1865 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1867 wait_update_facet mds1 "$LCTL get_param -n \
1868 mdd.$(facet_svc mds1).lfsck_layout |
1869 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1870 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
1872 # LU-3469: before osp_sync() is enabled, wait for a while to guarantee
1873 # that former async repair operations have been executed on the OST(s).
1877 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1879 for k in $(seq $MDSCOUNT); do
1880 # The LFSCK status query internal is 30 seconds. For the case
1881 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1882 # time to guarantee the status sync up.
1883 wait_update_facet mds${k} "$LCTL get_param -n \
1884 mdd.$(facet_svc mds${k}).lfsck_layout |
1885 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1886 error "(3) MDS${k} is not the expected 'completed'"
1889 for k in $(seq $OSTCOUNT); do
1890 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1891 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1892 awk '/^status/ { print $2 }')
1893 [ "$cur_status" == "completed" ] ||
1894 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
1897 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1898 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1899 awk '/^repaired_orphan/ { print $2 }')
1900 [ $repaired -eq 1 ] ||
1901 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
1903 echo "The file size should be correct after layout LFSCK scanning"
1904 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1905 [ "$cur_size" == "$saved_size" ] ||
1906 error "(6) Expect file2 size $saved_size, but got $cur_size"
1908 echo "The LFSCK should find back the original data."
1909 cat $DIR/$tdir/a1/f2
1910 $LFS path2fid $DIR/$tdir/a1/f2
1911 $LFS getstripe $DIR/$tdir/a1/f2
1913 run_test 18d "Find out orphan OST-object and repair it (4)"
1917 echo "The target MDT-object layout EA slot is occpuied by some new"
1918 echo "created OST-object when repair dangling reference case. Such"
1919 echo "conflict OST-object has been modified by others. To keep the"
1920 echo "new data, the LFSCK will create a new file to refernece this"
1921 echo "old orphan OST-object."
1924 check_mount_and_prep
1926 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1927 echo "guard" > $DIR/$tdir/a1/f1
1928 echo "foo" > $DIR/$tdir/a1/f2
1929 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1930 $LFS path2fid $DIR/$tdir/a1/f1
1931 $LFS getstripe $DIR/$tdir/a1/f1
1932 $LFS path2fid $DIR/$tdir/a1/f2
1933 $LFS getstripe $DIR/$tdir/a1/f2
1934 cancel_lru_locks osc
1936 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1937 echo "to reference the same OST-object (which is f1's OST-obejct)."
1938 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1939 echo "dangling reference case, but f2's old OST-object is there."
1942 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1943 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1944 chown 1.1 $DIR/$tdir/a1/f2
1945 rm -f $DIR/$tdir/a1/f1
1948 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1950 echo "stopall to cleanup object cache"
1953 setupall > /dev/null
1955 echo "The file size should be incorrect since dangling referenced"
1956 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1957 [ "$cur_size" != "$saved_size" ] ||
1958 error "(1) Expect incorrect file2 size"
1960 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1961 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
1963 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1964 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1966 wait_update_facet mds1 "$LCTL get_param -n \
1967 mdd.$(facet_svc mds1).lfsck_layout |
1968 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1969 error "(3) MDS1 is not the expected 'scanning-phase2'"
1971 # to guarantee all updates are synced.
1975 echo "Write new data to f2 to modify the new created OST-object."
1976 echo "dummy" >> $DIR/$tdir/a1/f2
1978 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1980 for k in $(seq $MDSCOUNT); do
1981 # The LFSCK status query internal is 30 seconds. For the case
1982 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1983 # time to guarantee the status sync up.
1984 wait_update_facet mds${k} "$LCTL get_param -n \
1985 mdd.$(facet_svc mds${k}).lfsck_layout |
1986 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1987 error "(4) MDS${k} is not the expected 'completed'"
1990 for k in $(seq $OSTCOUNT); do
1991 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1992 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1993 awk '/^status/ { print $2 }')
1994 [ "$cur_status" == "completed" ] ||
1995 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1998 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1999 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2000 awk '/^repaired_orphan/ { print $2 }')
2001 [ $repaired -eq 1 ] ||
2002 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2004 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2005 local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/C-*)
2007 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
2009 echo "The stub file should keep the original f2 data"
2010 cur_size=$(ls -il $cname | awk '{ print $6 }')
2011 [ "$cur_size" == "$saved_size" ] ||
2012 error "(8) Expect file2 size $saved_size, but got $cur_size"
2015 $LFS path2fid $cname
2016 $LFS getstripe $cname
2018 echo "The f2 should contains new data."
2019 cat $DIR/$tdir/a1/f2
2020 $LFS path2fid $DIR/$tdir/a1/f2
2021 $LFS getstripe $DIR/$tdir/a1/f2
2023 run_test 18e "Find out orphan OST-object and repair it (5)"
2026 check_mount_and_prep
2027 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2029 echo "foo" > $DIR/$tdir/a0
2030 echo "guard" > $DIR/$tdir/a1
2031 cancel_lru_locks osc
2033 echo "Inject failure, then client will offer wrong parent FID when read"
2034 do_facet ost1 $LCTL set_param -n \
2035 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2036 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2037 $LCTL set_param fail_loc=0x1619
2039 echo "Read RPC with wrong parent FID should be denied"
2040 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2041 $LCTL set_param fail_loc=0
2043 run_test 19a "OST-object inconsistency self detect"
2046 check_mount_and_prep
2047 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2049 echo "Inject failure stub to make the OST-object to back point to"
2050 echo "non-exist MDT-object"
2052 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2053 do_facet ost1 $LCTL set_param fail_loc=0x1611
2054 echo "foo" > $DIR/$tdir/f0
2055 cancel_lru_locks osc
2056 do_facet ost1 $LCTL set_param fail_loc=0
2058 echo "Nothing should be fixed since self detect and repair is disabled"
2059 local repaired=$(do_facet ost1 $LCTL get_param -n \
2060 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2061 awk '/^repaired/ { print $2 }')
2062 [ $repaired -eq 0 ] ||
2063 error "(1) Expected 0 repaired, but got $repaired"
2065 echo "Read RPC with right parent FID should be accepted,"
2066 echo "and cause parent FID on OST to be fixed"
2068 do_facet ost1 $LCTL set_param -n \
2069 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2070 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2072 repaired=$(do_facet ost1 $LCTL get_param -n \
2073 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2074 awk '/^repaired/ { print $2 }')
2075 [ $repaired -eq 1 ] ||
2076 error "(3) Expected 1 repaired, but got $repaired"
2078 run_test 19b "OST-object inconsistency self repair"
2081 [ $OSTCOUNT -lt 2 ] &&
2082 skip "The test needs at least 2 OSTs" && return
2085 echo "The target MDT-object and some of its OST-object are lost."
2086 echo "The LFSCK should find out the left OST-objects and re-create"
2087 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2088 echo "with the partial OST-objects (LOV EA hole)."
2090 echo "New client can access the file with LOV EA hole via normal"
2091 echo "system tools or commands without crash the system."
2093 echo "For old client, even though it cannot access the file with"
2094 echo "LOV EA hole, it should not cause the system crash."
2097 check_mount_and_prep
2098 $LFS mkdir -i 0 $DIR/$tdir/a1
2099 if [ $OSTCOUNT -gt 2 ]; then
2100 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
2103 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
2107 # 256 blocks on the stripe0.
2108 # 1 block on the stripe1 for 2 OSTs case.
2109 # 256 blocks on the stripe1 for other cases.
2110 # 1 block on the stripe2 if OSTs > 2
2111 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2112 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2113 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2115 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2116 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2117 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2120 $LFS getstripe $DIR/$tdir/a1/f0
2122 $LFS getstripe $DIR/$tdir/a1/f1
2124 $LFS getstripe $DIR/$tdir/a1/f2
2126 if [ $OSTCOUNT -gt 2 ]; then
2127 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2128 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2130 $LFS getstripe $DIR/$tdir/a1/f3
2133 cancel_lru_locks osc
2135 echo "Inject failure..."
2136 echo "To simulate f0 lost MDT-object"
2137 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2138 do_facet mds1 $LCTL set_param fail_loc=0x1616
2139 rm -f $DIR/$tdir/a1/f0
2141 echo "To simulate f1 lost MDT-object and OST-object0"
2142 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2143 do_facet mds1 $LCTL set_param fail_loc=0x161a
2144 rm -f $DIR/$tdir/a1/f1
2146 echo "To simulate f2 lost MDT-object and OST-object1"
2147 do_facet mds1 $LCTL set_param fail_val=1
2148 rm -f $DIR/$tdir/a1/f2
2150 if [ $OSTCOUNT -gt 2 ]; then
2151 echo "To simulate f3 lost MDT-object and OST-object2"
2152 do_facet mds1 $LCTL set_param fail_val=2
2153 rm -f $DIR/$tdir/a1/f3
2156 umount_client $MOUNT
2159 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2161 echo "Inject failure to slow down the LFSCK on OST0"
2162 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2163 do_facet ost1 $LCTL set_param fail_loc=0x161b
2165 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2166 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2169 do_facet ost1 $LCTL set_param fail_loc=0
2171 for k in $(seq $MDSCOUNT); do
2172 # The LFSCK status query internal is 30 seconds. For the case
2173 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2174 # time to guarantee the status sync up.
2175 wait_update_facet mds${k} "$LCTL get_param -n \
2176 mdd.$(facet_svc mds${k}).lfsck_layout |
2177 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2178 error "(2) MDS${k} is not the expected 'completed'"
2181 for k in $(seq $OSTCOUNT); do
2182 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2183 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2184 awk '/^status/ { print $2 }')
2185 [ "$cur_status" == "completed" ] ||
2186 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2189 local repaired=$(do_facet mds1 $LCTL get_param -n \
2190 mdd.$(facet_svc mds1).lfsck_layout |
2191 awk '/^repaired_orphan/ { print $2 }')
2192 if [ $OSTCOUNT -gt 2 ]; then
2193 [ $repaired -eq 9 ] ||
2194 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2196 [ $repaired -eq 4 ] ||
2197 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2200 mount_client $MOUNT || error "(5.0) Fail to start client!"
2202 LOV_PATTERN_F_HOLE=0x40000000
2205 # R-${fid0} is the old f0
2207 local name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid0}"
2208 echo "Check $name, which is the old f0"
2210 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2212 local pattern=0x$($LFS getstripe -L $name)
2213 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2214 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2216 local stripes=$($LFS getstripe -c $name)
2217 if [ $OSTCOUNT -gt 2 ]; then
2218 [ $stripes -eq 3 ] ||
2219 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2221 [ $stripes -eq 2 ] ||
2222 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2225 local size=$(stat $name | awk '/Size:/ { print $2 }')
2226 [ $size -eq $((4096 * $bcount)) ] ||
2227 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2229 cat $name > /dev/null || error "(5.5) cannot read $name"
2231 echo "dummy" >> $name || error "(5.6) cannot write $name"
2233 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2235 touch $name || error "(5.8) cannot touch $name"
2237 rm -f $name || error "(5.9) cannot unlink $name"
2240 # R-${fid1} contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2242 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid1}"
2243 if [ $OSTCOUNT -gt 2 ]; then
2244 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2246 echo "Check $name, it contains the old f1's stripe1"
2249 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2251 pattern=0x$($LFS getstripe -L $name)
2252 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2253 error "(6.2) expect pattern flag hole, but got $pattern"
2255 stripes=$($LFS getstripe -c $name)
2256 if [ $OSTCOUNT -gt 2 ]; then
2257 [ $stripes -eq 3 ] ||
2258 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2260 [ $stripes -eq 2 ] ||
2261 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2264 size=$(stat $name | awk '/Size:/ { print $2 }')
2265 [ $size -eq $((4096 * $bcount)) ] ||
2266 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2268 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2270 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2271 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2274 [ $failures -eq 256 ] ||
2275 error "(6.6) expect 256 IO failures, but get $failures"
2277 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2278 [ $size -eq $((4096 * $bcount)) ] ||
2279 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2281 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2282 error "(6.8) write to the LOV EA hole should fail"
2284 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2285 error "(6.9) write to normal stripe should NOT fail"
2287 echo "foo" >> $name && error "(6.10) append write $name should fail"
2289 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2291 touch $name || error "(6.12) cannot touch $name"
2293 rm -f $name || error "(6.13) cannot unlink $name"
2296 # R-${fid2} it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2298 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid2}"
2299 if [ $OSTCOUNT -gt 2 ]; then
2300 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2302 echo "Check $name, it contains the old f2's stripe0"
2305 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2307 pattern=0x$($LFS getstripe -L $name)
2308 stripes=$($LFS getstripe -c $name)
2309 size=$(stat $name | awk '/Size:/ { print $2 }')
2310 if [ $OSTCOUNT -gt 2 ]; then
2311 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2312 error "(7.2.1) expect pattern flag hole, but got $pattern"
2314 [ $stripes -eq 3 ] ||
2315 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2317 [ $size -eq $((4096 * $bcount)) ] ||
2318 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2320 cat $name > /dev/null &&
2321 error "(7.5.1) normal read $name should fail"
2323 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2324 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2326 [ $failures -eq 256 ] ||
2327 error "(7.6) expect 256 IO failures, but get $failures"
2329 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2330 [ $size -eq $((4096 * $bcount)) ] ||
2331 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2333 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2334 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2336 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2337 error "(7.8.1) write to normal stripe should NOT fail"
2339 echo "foo" >> $name &&
2340 error "(7.8.3) append write $name should fail"
2342 chown $RUNAS_ID:$RUNAS_GID $name ||
2343 error "(7.9.1) cannot chown on $name"
2345 touch $name || error "(7.10.1) cannot touch $name"
2347 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2348 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2350 [ $stripes -eq 1 ] ||
2351 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2354 [ $size -eq $((4096 * (256 + 0))) ] ||
2355 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2357 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2359 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2361 chown $RUNAS_ID:$RUNAS_GID $name ||
2362 error "(7.9.2) cannot chown on $name"
2364 touch $name || error "(7.10.2) cannot touch $name"
2367 rm -f $name || error "(7.11) cannot unlink $name"
2369 [ $OSTCOUNT -le 2 ] && return
2372 # R-${fid3} should contains the old f3's stripe0 and stripe1
2374 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid3}"
2375 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2377 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2379 pattern=0x$($LFS getstripe -L $name)
2380 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2381 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2383 stripes=$($LFS getstripe -c $name)
2384 # LFSCK does not know the old f3 had 3 stripes.
2385 # It only tries to find as much as possible.
2386 # The stripe count depends on the last stripe's offset.
2387 [ $stripes -eq 2 ] ||
2388 error "(8.3) expect the stripe count is 2, but got $stripes"
2390 size=$(stat $name | awk '/Size:/ { print $2 }')
2392 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2393 error "(8.4) expect the size $((4096 * 512)), but got $size"
2395 cat $name > /dev/null || error "(8.5) cannot read $name"
2397 echo "dummy" >> $name || error "(8.6) cannot write $name"
2399 chown $RUNAS_ID:$RUNAS_GID $name ||
2400 error "(8.7) cannot chown on $name"
2402 touch $name || error "(8.8) cannot touch $name"
2404 rm -f $name || error "(8.9) cannot unlink $name"
2406 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2408 $LCTL set_param debug=-lfsck > /dev/null || true
2410 # restore MDS/OST size
2411 MDSSIZE=${SAVED_MDSSIZE}
2412 OSTSIZE=${SAVED_OSTSIZE}
2413 OSTCOUNT=${SAVED_OSTCOUNT}
2415 # cleanup the system at last