3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 SAVED_MDSSIZE=${MDSSIZE}
23 SAVED_OSTSIZE=${OSTSIZE}
24 SAVED_OSTCOUNT=${OSTCOUNT}
25 # use small MDS + OST size to speed formatting time
26 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
29 # no need too much OSTs, to reduce the format/start/stop overhead
30 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
32 # build up a clean test environment.
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
37 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
40 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
41 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
43 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
44 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
46 [ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
47 # bug number for skipped test: LU-4970
48 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 14"
52 $LCTL set_param debug=+lfsck > /dev/null || true
54 MDT_DEV="${FSNAME}-MDT0000"
55 OST_DEV="${FSNAME}-OST0000"
56 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
57 START_NAMESPACE="do_facet $SINGLEMDS \
58 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
59 START_LAYOUT="do_facet $SINGLEMDS \
60 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
61 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
62 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
63 SHOW_NAMESPACE="do_facet $SINGLEMDS \
64 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
65 SHOW_LAYOUT="do_facet $SINGLEMDS \
66 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
67 SHOW_LAYOUT_ON_OST="do_facet ost1 \
68 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
69 MOUNT_OPTS_SCRUB="-o user_xattr"
70 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
79 echo "preparing... $nfiles * $ndirs files will be created $(date)."
80 if [ ! -z $igif ]; then
81 #define OBD_FAIL_FID_IGIF 0x1504
82 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
85 cp $LUSTRE/tests/*.sh $DIR/$tdir/
86 if [ $ndirs -gt 0 ]; then
87 createmany -d $DIR/$tdir/d $ndirs
88 createmany -m $DIR/$tdir/f $ndirs
89 if [ $nfiles -gt 0 ]; then
90 for ((i = 0; i < $ndirs; i++)); do
91 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
92 /dev/null || error "createmany $nfiles"
95 createmany -d $DIR/$tdir/e $ndirs
98 if [ ! -z $igif ]; then
99 touch $DIR/$tdir/dummy
100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
103 echo "prepared $(date)."
109 #define OBD_FAIL_LFSCK_DELAY1 0x1600
110 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
111 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
113 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
115 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
116 [ "$STATUS" == "scanning-phase1" ] ||
117 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
119 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
121 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
122 [ "$STATUS" == "stopped" ] ||
123 error "(6) Expect 'stopped', but got '$STATUS'"
125 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
127 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
128 [ "$STATUS" == "scanning-phase1" ] ||
129 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
131 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
132 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
133 mdd.${MDT_DEV}.lfsck_namespace |
134 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
136 error "(9) unexpected status"
139 local repaired=$($SHOW_NAMESPACE |
140 awk '/^updated_phase1/ { print $2 }')
141 [ $repaired -eq 0 ] ||
142 error "(10) Expect nothing to be repaired, but got: $repaired"
144 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
145 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
146 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
147 mdd.${MDT_DEV}.lfsck_namespace |
148 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
150 error "(12) unexpected status"
153 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
154 [ $((scanned1 + 1)) -eq $scanned2 ] ||
155 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
157 echo "stopall, should NOT crash LU-3649"
158 stopall || error "(14) Fail to stopall"
160 run_test 0 "Control LFSCK manually"
163 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
164 skip "OI Scrub not implemented for ZFS" && return
168 #define OBD_FAIL_FID_INDIR 0x1501
169 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
170 touch $DIR/$tdir/dummy
172 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
174 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
175 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
176 mdd.${MDT_DEV}.lfsck_namespace |
177 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
179 error "(4) unexpected status"
182 local repaired=$($SHOW_NAMESPACE |
183 awk '/^dirent_repaired/ { print $2 }')
184 # for interop with old server
185 [ -z "$repaired" ] &&
186 repaired=$($SHOW_NAMESPACE |
187 awk '/^updated_phase1/ { print $2 }')
189 [ $repaired -eq 1 ] ||
190 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
192 mount_client $MOUNT || error "(6) Fail to start client!"
194 #define OBD_FAIL_FID_LOOKUP 0x1505
195 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
196 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
198 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
200 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
204 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
205 skip "OI Scrub not implemented for ZFS" && return
209 #define OBD_FAIL_FID_INLMA 0x1502
210 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
211 touch $DIR/$tdir/dummy
213 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
215 #define OBD_FAIL_FID_NOLMA 0x1506
216 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
217 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
218 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
219 mdd.${MDT_DEV}.lfsck_namespace |
220 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
222 error "(4) unexpected status"
225 local repaired=$($SHOW_NAMESPACE |
226 awk '/^dirent_repaired/ { print $2 }')
227 # for interop with old server
228 [ -z "$repaired" ] &&
229 repaired=$($SHOW_NAMESPACE |
230 awk '/^updated_phase1/ { print $2 }')
232 [ $repaired -eq 1 ] ||
233 error "(5) Fail to repair missed FID-in-LMA: $repaired"
235 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
236 mount_client $MOUNT || error "(6) Fail to start client!"
238 #define OBD_FAIL_FID_LOOKUP 0x1505
239 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
240 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
242 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
244 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
249 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
250 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
251 touch $DIR/$tdir/dummy
253 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
255 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
256 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
257 mdd.${MDT_DEV}.lfsck_namespace |
258 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
260 error "(4) unexpected status"
263 local repaired=$($SHOW_NAMESPACE |
264 awk '/^linkea_repaired/ { print $2 }')
265 # for interop with old server
266 [ -z "$repaired" ] &&
267 repaired=$($SHOW_NAMESPACE |
268 awk '/^updated_phase1/ { print $2 }')
270 [ $repaired -eq 1 ] ||
271 error "(5) Fail to repair crashed linkEA: $repaired"
273 mount_client $MOUNT || error "(6) Fail to start client!"
275 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
276 error "(7) Fail to stat $DIR/$tdir/dummy"
278 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
279 local dummyname=$($LFS fid2path $DIR $dummyfid)
280 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
281 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
283 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
289 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
290 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
291 touch $DIR/$tdir/dummy
293 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
295 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
296 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
297 mdd.${MDT_DEV}.lfsck_namespace |
298 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
300 error "(4) unexpected status"
303 local repaired=$($SHOW_NAMESPACE |
304 awk '/^updated_phase2/ { print $2 }')
305 [ $repaired -eq 1 ] ||
306 error "(5) Fail to repair crashed linkEA: $repaired"
308 mount_client $MOUNT || error "(6) Fail to start client!"
310 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
311 error "(7) Fail to stat $DIR/$tdir/dummy"
313 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
314 local dummyname=$($LFS fid2path $DIR $dummyfid)
315 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
316 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
318 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
324 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
325 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
326 touch $DIR/$tdir/dummy
328 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
330 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
331 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
332 mdd.${MDT_DEV}.lfsck_namespace |
333 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
335 error "(4) unexpected status"
338 local repaired=$($SHOW_NAMESPACE |
339 awk '/^updated_phase2/ { print $2 }')
340 [ $repaired -eq 1 ] ||
341 error "(5) Fail to repair crashed linkEA: $repaired"
343 mount_client $MOUNT || error "(6) Fail to start client!"
345 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
346 error "(7) Fail to stat $DIR/$tdir/dummy"
348 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
349 local dummyname=$($LFS fid2path $DIR $dummyfid)
350 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
351 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
353 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
357 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
358 skip "OI Scrub not implemented for ZFS" && return
361 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
362 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
364 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
365 echo "start $SINGLEMDS with disabling OI scrub"
366 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
367 error "(2) Fail to start MDS!"
369 #define OBD_FAIL_LFSCK_DELAY2 0x1601
370 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
371 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
372 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
373 mdd.${MDT_DEV}.lfsck_namespace |
374 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
376 error "(5) unexpected status"
379 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
380 [ "$STATUS" == "scanning-phase1" ] ||
381 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
383 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
384 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
385 mdd.${MDT_DEV}.lfsck_namespace |
386 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
388 error "(7) unexpected status"
391 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
392 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
394 local repaired=$($SHOW_NAMESPACE |
395 awk '/^dirent_repaired/ { print $2 }')
396 # for interop with old server
397 [ -z "$repaired" ] &&
398 repaired=$($SHOW_NAMESPACE |
399 awk '/^updated_phase1/ { print $2 }')
401 [ $repaired -ge 9 ] ||
402 error "(9) Fail to re-generate FID-in-dirent: $repaired"
404 mount_client $MOUNT || error "(10) Fail to start client!"
406 #define OBD_FAIL_FID_LOOKUP 0x1505
407 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
408 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
409 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
411 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
415 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
416 skip "OI Scrub not implemented for ZFS" && return
419 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
420 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
422 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
423 echo "start $SINGLEMDS with disabling OI scrub"
424 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
425 error "(2) Fail to start MDS!"
427 #define OBD_FAIL_LFSCK_DELAY2 0x1601
428 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
429 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
430 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
431 mdd.${MDT_DEV}.lfsck_namespace |
432 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
434 error "(5) unexpected status"
437 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
438 [ "$STATUS" == "scanning-phase1" ] ||
439 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
441 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
442 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
443 mdd.${MDT_DEV}.lfsck_namespace |
444 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
446 error "(7) unexpected status"
449 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
450 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
452 local repaired=$($SHOW_NAMESPACE |
453 awk '/^dirent_repaired/ { print $2 }')
454 # for interop with old server
455 [ -z "$repaired" ] &&
456 repaired=$($SHOW_NAMESPACE |
457 awk '/^updated_phase1/ { print $2 }')
459 [ $repaired -ge 2 ] ||
460 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
462 mount_client $MOUNT || error "(10) Fail to start client!"
464 #define OBD_FAIL_FID_LOOKUP 0x1505
465 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
466 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
468 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
470 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
471 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
472 local dummyname=$($LFS fid2path $DIR $dummyfid)
473 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
474 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
476 run_test 5 "LFSCK can handle IGIF object upgrading"
481 #define OBD_FAIL_LFSCK_DELAY1 0x1600
482 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
483 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
485 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
486 [ "$STATUS" == "scanning-phase1" ] ||
487 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
489 # Sleep 3 sec to guarantee at least one object processed by LFSCK
491 # Fail the LFSCK to guarantee there is at least one checkpoint
492 #define OBD_FAIL_LFSCK_FATAL1 0x1608
493 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
494 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
495 mdd.${MDT_DEV}.lfsck_namespace |
496 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
498 error "(4) unexpected status"
501 local POS0=$($SHOW_NAMESPACE |
502 awk '/^last_checkpoint_position/ { print $2 }' |
505 #define OBD_FAIL_LFSCK_DELAY1 0x1600
506 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
507 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
509 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
510 [ "$STATUS" == "scanning-phase1" ] ||
511 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
513 local POS1=$($SHOW_NAMESPACE |
514 awk '/^latest_start_position/ { print $2 }' |
516 [[ $POS0 -lt $POS1 ]] ||
517 error "(7) Expect larger than: $POS0, but got $POS1"
519 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
520 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
521 mdd.${MDT_DEV}.lfsck_namespace |
522 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
524 error "(8) unexpected status"
527 run_test 6a "LFSCK resumes from last checkpoint (1)"
532 #define OBD_FAIL_LFSCK_DELAY2 0x1601
533 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
534 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
536 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
537 [ "$STATUS" == "scanning-phase1" ] ||
538 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
540 # Sleep 5 sec to guarantee that we are in the directory scanning
542 # Fail the LFSCK to guarantee there is at least one checkpoint
543 #define OBD_FAIL_LFSCK_FATAL2 0x1609
544 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
545 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
546 mdd.${MDT_DEV}.lfsck_namespace |
547 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
549 error "(4) unexpected status"
552 local O_POS0=$($SHOW_NAMESPACE |
553 awk '/^last_checkpoint_position/ { print $2 }' |
556 local D_POS0=$($SHOW_NAMESPACE |
557 awk '/^last_checkpoint_position/ { print $4 }')
559 #define OBD_FAIL_LFSCK_DELAY2 0x1601
560 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
561 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
563 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
564 [ "$STATUS" == "scanning-phase1" ] ||
565 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
567 local O_POS1=$($SHOW_NAMESPACE |
568 awk '/^latest_start_position/ { print $2 }' |
570 local D_POS1=$($SHOW_NAMESPACE |
571 awk '/^latest_start_position/ { print $4 }')
573 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
574 [[ $O_POS0 -lt $O_POS1 ]] ||
575 error "(7.1) $O_POS1 is not larger than $O_POS0"
577 [[ $D_POS0 -lt $D_POS1 ]] ||
578 error "(7.2) $D_POS1 is not larger than $D_POS0"
581 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
582 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
583 mdd.${MDT_DEV}.lfsck_namespace |
584 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
586 error "(8) unexpected status"
589 run_test 6b "LFSCK resumes from last checkpoint (2)"
596 #define OBD_FAIL_LFSCK_DELAY2 0x1601
597 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
598 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
600 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
601 [ "$STATUS" == "scanning-phase1" ] ||
602 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
604 # Sleep 3 sec to guarantee at least one object processed by LFSCK
606 echo "stop $SINGLEMDS"
607 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
609 echo "start $SINGLEMDS"
610 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
611 error "(5) Fail to start MDS!"
613 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
614 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
615 mdd.${MDT_DEV}.lfsck_namespace |
616 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
618 error "(6) unexpected status"
621 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
627 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
628 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
629 for ((i = 0; i < 20; i++)); do
630 touch $DIR/$tdir/dummy${i}
633 #define OBD_FAIL_LFSCK_DELAY3 0x1602
634 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
635 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
636 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
637 mdd.${MDT_DEV}.lfsck_namespace |
638 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
640 error "(4) unexpected status"
643 echo "stop $SINGLEMDS"
644 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
646 echo "start $SINGLEMDS"
647 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
648 error "(6) Fail to start MDS!"
650 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
651 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
652 mdd.${MDT_DEV}.lfsck_namespace |
653 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
655 error "(7) unexpected status"
658 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
663 formatall > /dev/null
669 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
670 [ "$STATUS" == "init" ] ||
671 error "(2) Expect 'init', but got '$STATUS'"
673 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
674 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
675 mkdir $DIR/$tdir/crashed
677 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
678 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
679 for ((i = 0; i < 5; i++)); do
680 touch $DIR/$tdir/dummy${i}
683 umount_client $MOUNT || error "(3) Fail to stop client!"
685 #define OBD_FAIL_LFSCK_DELAY2 0x1601
686 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
687 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
689 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
690 [ "$STATUS" == "scanning-phase1" ] ||
691 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
693 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
695 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
696 [ "$STATUS" == "stopped" ] ||
697 error "(7) Expect 'stopped', but got '$STATUS'"
699 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
701 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
702 [ "$STATUS" == "scanning-phase1" ] ||
703 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
705 #define OBD_FAIL_LFSCK_FATAL2 0x1609
706 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
707 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
708 mdd.${MDT_DEV}.lfsck_namespace |
709 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
711 error "(10) unexpected status"
714 #define OBD_FAIL_LFSCK_DELAY1 0x1600
715 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
716 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
718 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
719 [ "$STATUS" == "scanning-phase1" ] ||
720 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
722 #define OBD_FAIL_LFSCK_CRASH 0x160a
723 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
726 echo "stop $SINGLEMDS"
727 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
729 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
730 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
732 echo "start $SINGLEMDS"
733 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
734 error "(14) Fail to start MDS!"
736 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
737 [ "$STATUS" == "crashed" ] ||
738 error "(15) Expect 'crashed', but got '$STATUS'"
740 #define OBD_FAIL_LFSCK_DELAY2 0x1601
741 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
742 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
744 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
745 [ "$STATUS" == "scanning-phase1" ] ||
746 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
748 echo "stop $SINGLEMDS"
749 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
751 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
752 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
754 echo "start $SINGLEMDS"
755 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
756 error "(19) Fail to start MDS!"
758 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
759 [ "$STATUS" == "paused" ] ||
760 error "(20) Expect 'paused', but got '$STATUS'"
762 #define OBD_FAIL_LFSCK_DELAY3 0x1602
763 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
765 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
766 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
767 mdd.${MDT_DEV}.lfsck_namespace |
768 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
770 error "(22) unexpected status"
773 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
774 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
775 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
777 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
778 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
779 mdd.${MDT_DEV}.lfsck_namespace |
780 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
782 error "(24) unexpected status"
785 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
786 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
788 run_test 8 "LFSCK state machine"
791 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
792 skip "Testing on UP system, the speed may be inaccurate."
798 local BASE_SPEED1=100
800 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
803 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
804 [ "$STATUS" == "scanning-phase1" ] ||
805 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
807 local SPEED=$($SHOW_NAMESPACE |
808 awk '/^average_speed_phase1/ { print $2 }')
810 # There may be time error, normally it should be less than 2 seconds.
811 # We allow another 20% schedule error.
813 # MAX_MARGIN = 1.2 = 12 / 10
814 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
815 RUN_TIME1 * 12 / 10))
816 [ $SPEED -lt $MAX_SPEED ] ||
817 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
820 local BASE_SPEED2=300
822 do_facet $SINGLEMDS \
823 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
826 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
827 # MIN_MARGIN = 0.8 = 8 / 10
828 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
829 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
830 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
831 # Account for slow ZFS performance - LU-4934
832 [ $SPEED -gt $MIN_SPEED ] || [ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
833 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
835 # MAX_MARGIN = 1.2 = 12 / 10
836 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
837 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
838 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
839 [ $SPEED -lt $MAX_SPEED ] ||
840 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
842 do_facet $SINGLEMDS \
843 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
845 wait_update_facet $SINGLEMDS \
846 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
847 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
848 error "(7) Failed to get expected 'completed'"
850 run_test 9a "LFSCK speed control (1)"
853 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
854 skip "Testing on UP system, the speed may be inaccurate."
860 echo "Preparing another 50 * 50 files (with error) at $(date)."
861 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
862 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
863 createmany -d $DIR/$tdir/d 50
864 createmany -m $DIR/$tdir/f 50
865 for ((i = 0; i < 50; i++)); do
866 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
869 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
870 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
871 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
872 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
873 mdd.${MDT_DEV}.lfsck_namespace |
874 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
876 error "(5) unexpected status"
879 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
880 echo "Prepared at $(date)."
884 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
887 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
888 [ "$STATUS" == "scanning-phase2" ] ||
889 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
891 local SPEED=$($SHOW_NAMESPACE |
892 awk '/^average_speed_phase2/ { print $2 }')
893 # There may be time error, normally it should be less than 2 seconds.
894 # We allow another 20% schedule error.
896 # MAX_MARGIN = 1.2 = 12 / 10
897 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
898 RUN_TIME1 * 12 / 10))
899 [ $SPEED -lt $MAX_SPEED ] ||
900 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
903 local BASE_SPEED2=150
905 do_facet $SINGLEMDS \
906 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
909 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
910 # MIN_MARGIN = 0.8 = 8 / 10
911 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
912 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
913 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
914 [ $SPEED -gt $MIN_SPEED ] ||[ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
915 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
917 # MAX_MARGIN = 1.2 = 12 / 10
918 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
919 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
920 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
921 [ $SPEED -lt $MAX_SPEED ] ||
922 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
924 do_facet $SINGLEMDS \
925 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
926 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
927 mdd.${MDT_DEV}.lfsck_namespace |
928 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
930 error "(11) unexpected status"
933 run_test 9b "LFSCK speed control (2)"
937 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
938 skip "lookup(..)/linkea on ZFS issue" && return
942 echo "Preparing more files with error at $(date)."
943 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
944 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
946 for ((i = 0; i < 1000; i = $((i+2)))); do
947 mkdir -p $DIR/$tdir/d${i}
948 touch $DIR/$tdir/f${i}
949 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
952 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
953 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
955 for ((i = 1; i < 1000; i = $((i+2)))); do
956 mkdir -p $DIR/$tdir/d${i}
957 touch $DIR/$tdir/f${i}
958 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
961 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
962 echo "Prepared at $(date)."
964 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
967 mount_client $MOUNT || error "(3) Fail to start client!"
969 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
972 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
973 [ "$STATUS" == "scanning-phase1" ] ||
974 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
976 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
978 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
980 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
982 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
984 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
986 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
988 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
990 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
991 error "(14) Fail to softlink!"
993 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
994 [ "$STATUS" == "scanning-phase1" ] ||
995 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
997 do_facet $SINGLEMDS \
998 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
999 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1000 mdd.${MDT_DEV}.lfsck_namespace |
1001 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1003 error "(16) unexpected status"
1006 run_test 10 "System is available during LFSCK scanning"
1009 ost_remove_lastid() {
1012 local rcmd="do_facet ost${ost}"
1014 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1016 # step 1: local mount
1017 mount_fstype ost${ost} || return 1
1018 # step 2: remove the specified LAST_ID
1019 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1021 unmount_fstype ost${ost} || return 2
1025 check_mount_and_prep
1026 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1027 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1032 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1034 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1035 error "(2) Fail to start ost1"
1037 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1038 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1040 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1041 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1043 wait_update_facet ost1 "$LCTL get_param -n \
1044 obdfilter.${OST_DEV}.lfsck_layout |
1045 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1047 error "(5) unexpected status"
1050 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1052 wait_update_facet ost1 "$LCTL get_param -n \
1053 obdfilter.${OST_DEV}.lfsck_layout |
1054 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1056 error "(6) unexpected status"
1059 echo "the LAST_ID(s) should have been rebuilt"
1060 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1061 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1063 run_test 11a "LFSCK can rebuild lost last_id"
1066 check_mount_and_prep
1067 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1069 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1070 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1071 do_facet ost1 $LCTL set_param fail_loc=0x160d
1072 createmany -o $DIR/$tdir/f 64
1073 local lastid1=$(do_facet ost1 "lctl get_param -n \
1074 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1075 awk -F: '{ print $2 }')
1077 umount_client $MOUNT
1078 stop ost1 || error "(1) Fail to stop ost1"
1080 #define OBD_FAIL_OST_ENOSPC 0x215
1081 do_facet ost1 $LCTL set_param fail_loc=0x215
1083 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1084 error "(2) Fail to start ost1"
1086 for ((i = 0; i < 60; i++)); do
1087 lastid2=$(do_facet ost1 "lctl get_param -n \
1088 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1089 awk -F: '{ print $2 }')
1090 [ ! -z $lastid2 ] && break;
1094 echo "the on-disk LAST_ID should be smaller than the expected one"
1095 [ $lastid1 -gt $lastid2 ] ||
1096 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1098 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1099 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1101 wait_update_facet ost1 "$LCTL get_param -n \
1102 obdfilter.${OST_DEV}.lfsck_layout |
1103 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1105 error "(6) unexpected status"
1108 stop ost1 || error "(7) Fail to stop ost1"
1110 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1111 error "(8) Fail to start ost1"
1113 echo "the on-disk LAST_ID should have been rebuilt"
1114 wait_update_facet ost1 "$LCTL get_param -n \
1115 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1116 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1117 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1118 error "(9) expect lastid1 0x100000000:$lastid1"
1121 do_facet ost1 $LCTL set_param fail_loc=0
1122 stopall || error "(10) Fail to stopall"
1124 run_test 11b "LFSCK can rebuild crashed last_id"
1127 [ $MDSCOUNT -lt 2 ] &&
1128 skip "We need at least 2 MDSes for test_12" && exit 0
1130 check_mount_and_prep
1131 for k in $(seq $MDSCOUNT); do
1132 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1133 createmany -o $DIR/$tdir/${k}/f 100 ||
1134 error "(0) Fail to create 100 files."
1137 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1138 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1139 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1141 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1142 for k in $(seq $MDSCOUNT); do
1143 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1144 mdd.$(facet_svc mds${k}).lfsck_namespace |
1145 awk '/^status/ { print $2 }')
1146 [ "$STATUS" == "scanning-phase1" ] ||
1147 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1150 echo "Stop namespace LFSCK on all targets by single lctl command."
1151 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1152 error "(4) Fail to stop LFSCK on all devices!"
1154 echo "All the LFSCK targets should be in 'stopped' status."
1155 for k in $(seq $MDSCOUNT); do
1156 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1157 mdd.$(facet_svc mds${k}).lfsck_namespace |
1158 awk '/^status/ { print $2 }')
1159 [ "$STATUS" == "stopped" ] ||
1160 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1163 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1164 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1165 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1167 echo "All the LFSCK targets should be in 'completed' status."
1168 for k in $(seq $MDSCOUNT); do
1169 wait_update_facet mds${k} "$LCTL get_param -n \
1170 mdd.$(facet_svc mds${k}).lfsck_namespace |
1171 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1172 error "(7) MDS${k} is not the expected 'completed'"
1175 echo "Start layout LFSCK on all targets by single command (-s 1)."
1176 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1177 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1179 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1180 for k in $(seq $MDSCOUNT); do
1181 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1182 mdd.$(facet_svc mds${k}).lfsck_layout |
1183 awk '/^status/ { print $2 }')
1184 [ "$STATUS" == "scanning-phase1" ] ||
1185 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1188 echo "Stop layout LFSCK on all targets by single lctl command."
1189 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1190 error "(10) Fail to stop LFSCK on all devices!"
1192 echo "All the LFSCK targets should be in 'stopped' status."
1193 for k in $(seq $MDSCOUNT); do
1194 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1195 mdd.$(facet_svc mds${k}).lfsck_layout |
1196 awk '/^status/ { print $2 }')
1197 [ "$STATUS" == "stopped" ] ||
1198 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1201 for k in $(seq $OSTCOUNT); do
1202 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1203 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1204 awk '/^status/ { print $2 }')
1205 [ "$STATUS" == "stopped" ] ||
1206 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1209 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1210 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1211 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1213 echo "All the LFSCK targets should be in 'completed' status."
1214 for k in $(seq $MDSCOUNT); do
1215 # The LFSCK status query internal is 30 seconds. For the case
1216 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1217 # time to guarantee the status sync up.
1218 wait_update_facet mds${k} "$LCTL get_param -n \
1219 mdd.$(facet_svc mds${k}).lfsck_layout |
1220 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1221 error "(14) MDS${k} is not the expected 'completed'"
1224 run_test 12 "single command to trigger LFSCK on all devices"
1228 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1229 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1230 echo "MDT-object FID."
1233 check_mount_and_prep
1235 echo "Inject failure stub to simulate bad lmm_oi"
1236 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1237 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1238 createmany -o $DIR/$tdir/f 32
1239 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1241 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1242 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1244 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1245 mdd.${MDT_DEV}.lfsck_layout |
1246 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1248 error "(2) unexpected status"
1251 local repaired=$($SHOW_LAYOUT |
1252 awk '/^repaired_others/ { print $2 }')
1253 [ $repaired -eq 32 ] ||
1254 error "(3) Fail to repair crashed lmm_oi: $repaired"
1256 run_test 13 "LFSCK can repair crashed lmm_oi"
1260 echo "The OST-object referenced by the MDT-object should be there;"
1261 echo "otherwise, the LFSCK should re-create the missed OST-object."
1264 check_mount_and_prep
1265 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1267 local count=$(precreated_ost_obj_count 0 0)
1269 echo "Inject failure stub to simulate dangling referenced MDT-object"
1270 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1271 do_facet ost1 $LCTL set_param fail_loc=0x1610
1272 createmany -o $DIR/$tdir/f $((count + 32))
1273 do_facet ost1 $LCTL set_param fail_loc=0
1275 start_full_debug_logging
1277 # exhaust other pre-created dangling cases
1278 count=$(precreated_ost_obj_count 0 0)
1279 createmany -o $DIR/$tdir/a $count ||
1280 error "(0) Fail to create $count files."
1282 echo "'ls' should fail because of dangling referenced MDT-object"
1283 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1285 echo "Trigger layout LFSCK to find out dangling reference"
1286 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1288 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1289 mdd.${MDT_DEV}.lfsck_layout |
1290 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1292 error "(3) unexpected status"
1295 local repaired=$($SHOW_LAYOUT |
1296 awk '/^repaired_dangling/ { print $2 }')
1297 [ $repaired -ge 32 ] ||
1298 error "(4) Fail to repair dangling reference: $repaired"
1300 echo "'ls' should fail because it will not repair dangling by default"
1301 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
1303 echo "Trigger layout LFSCK to repair dangling reference"
1304 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1306 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1307 mdd.${MDT_DEV}.lfsck_layout |
1308 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1310 error "(7) unexpected status"
1313 repaired=$($SHOW_LAYOUT |
1314 awk '/^repaired_dangling/ { print $2 }')
1315 [ $repaired -ge 32 ] ||
1316 error "(8) Fail to repair dangling reference: $repaired"
1318 echo "'ls' should success after layout LFSCK repairing"
1319 ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success."
1320 stop_full_debug_logging
1322 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1326 echo "If the OST-object referenced by the MDT-object back points"
1327 echo "to some non-exist MDT-object, then the LFSCK should repair"
1328 echo "the OST-object to back point to the right MDT-object."
1331 check_mount_and_prep
1332 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1334 echo "Inject failure stub to make the OST-object to back point to"
1335 echo "non-exist MDT-object."
1336 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1338 do_facet ost1 $LCTL set_param fail_loc=0x1611
1339 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1340 cancel_lru_locks osc
1341 do_facet ost1 $LCTL set_param fail_loc=0
1343 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1344 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1346 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1347 mdd.${MDT_DEV}.lfsck_layout |
1348 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1350 error "(2) unexpected status"
1353 local repaired=$($SHOW_LAYOUT |
1354 awk '/^repaired_unmatched_pair/ { print $2 }')
1355 [ $repaired -eq 1 ] ||
1356 error "(3) Fail to repair unmatched pair: $repaired"
1358 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1362 echo "If the OST-object referenced by the MDT-object back points"
1363 echo "to other MDT-object that doesn't recognize the OST-object,"
1364 echo "then the LFSCK should repair it to back point to the right"
1365 echo "MDT-object (the first one)."
1368 check_mount_and_prep
1369 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1370 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1371 cancel_lru_locks osc
1373 echo "Inject failure stub to make the OST-object to back point to"
1374 echo "other MDT-object"
1376 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1377 do_facet ost1 $LCTL set_param fail_loc=0x1612
1378 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1379 cancel_lru_locks osc
1380 do_facet ost1 $LCTL set_param fail_loc=0
1382 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1383 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1385 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1386 mdd.${MDT_DEV}.lfsck_layout |
1387 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1389 error "(2) unexpected status"
1392 local repaired=$($SHOW_LAYOUT |
1393 awk '/^repaired_unmatched_pair/ { print $2 }')
1394 [ $repaired -eq 1 ] ||
1395 error "(3) Fail to repair unmatched pair: $repaired"
1397 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1401 echo "If the OST-object's owner information does not match the owner"
1402 echo "information stored in the MDT-object, then the LFSCK trust the"
1403 echo "MDT-object and update the OST-object's owner information."
1406 check_mount_and_prep
1407 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1408 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1409 cancel_lru_locks osc
1411 echo "Inject failure stub to skip OST-object owner changing"
1412 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1413 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1414 chown 1.1 $DIR/$tdir/f0
1415 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1417 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1420 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1422 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1423 mdd.${MDT_DEV}.lfsck_layout |
1424 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1426 error "(2) unexpected status"
1429 local repaired=$($SHOW_LAYOUT |
1430 awk '/^repaired_inconsistent_owner/ { print $2 }')
1431 [ $repaired -eq 1 ] ||
1432 error "(3) Fail to repair inconsistent owner: $repaired"
1434 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1438 echo "If more than one MDT-objects reference the same OST-object,"
1439 echo "and the OST-object only recognizes one MDT-object, then the"
1440 echo "LFSCK should create new OST-objects for such non-recognized"
1444 check_mount_and_prep
1445 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1447 echo "Inject failure stub to make two MDT-objects to refernce"
1448 echo "the OST-object"
1450 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1451 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1453 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1454 cancel_lru_locks osc
1456 createmany -o $DIR/$tdir/f 1
1458 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1460 cancel_lru_locks mdc
1461 cancel_lru_locks osc
1463 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1464 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1465 [ $size -eq 1048576 ] ||
1466 error "(1) f0 (wrong) size should be 1048576, but got $size"
1468 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1471 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1473 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1474 mdd.${MDT_DEV}.lfsck_layout |
1475 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1477 error "(3) unexpected status"
1480 local repaired=$($SHOW_LAYOUT |
1481 awk '/^repaired_multiple_referenced/ { print $2 }')
1482 [ $repaired -eq 1 ] ||
1483 error "(4) Fail to repair multiple references: $repaired"
1485 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1486 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1487 error "(5) Fail to write f0."
1488 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1489 [ $size -eq 1048576 ] ||
1490 error "(6) guard size should be 1048576, but got $size"
1492 run_test 17 "LFSCK can repair multiple references"
1496 echo "The target MDT-object is there, but related stripe information"
1497 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1498 echo "layout EA entries."
1501 check_mount_and_prep
1502 $LFS mkdir -i 0 $DIR/$tdir/a1
1503 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1504 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1506 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1508 $LFS path2fid $DIR/$tdir/a1/f1
1509 $LFS getstripe $DIR/$tdir/a1/f1
1511 if [ $MDSCOUNT -ge 2 ]; then
1512 $LFS mkdir -i 1 $DIR/$tdir/a2
1513 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1514 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1515 $LFS path2fid $DIR/$tdir/a2/f2
1516 $LFS getstripe $DIR/$tdir/a2/f2
1519 cancel_lru_locks osc
1521 echo "Inject failure, to make the MDT-object lost its layout EA"
1522 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1523 do_facet mds1 $LCTL set_param fail_loc=0x1615
1524 chown 1.1 $DIR/$tdir/a1/f1
1526 if [ $MDSCOUNT -ge 2 ]; then
1527 do_facet mds2 $LCTL set_param fail_loc=0x1615
1528 chown 1.1 $DIR/$tdir/a2/f2
1534 do_facet mds1 $LCTL set_param fail_loc=0
1535 if [ $MDSCOUNT -ge 2 ]; then
1536 do_facet mds2 $LCTL set_param fail_loc=0
1539 cancel_lru_locks mdc
1540 cancel_lru_locks osc
1542 echo "The file size should be incorrect since layout EA is lost"
1543 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1544 [ "$cur_size" != "$saved_size" ] ||
1545 error "(1) Expect incorrect file1 size"
1547 if [ $MDSCOUNT -ge 2 ]; then
1548 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1549 [ "$cur_size" != "$saved_size" ] ||
1550 error "(2) Expect incorrect file2 size"
1553 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1554 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1556 for k in $(seq $MDSCOUNT); do
1557 # The LFSCK status query internal is 30 seconds. For the case
1558 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1559 # time to guarantee the status sync up.
1560 wait_update_facet mds${k} "$LCTL get_param -n \
1561 mdd.$(facet_svc mds${k}).lfsck_layout |
1562 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1563 error "(4) MDS${k} is not the expected 'completed'"
1566 for k in $(seq $OSTCOUNT); do
1567 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1568 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1569 awk '/^status/ { print $2 }')
1570 [ "$cur_status" == "completed" ] ||
1571 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1574 local repaired=$(do_facet mds1 $LCTL get_param -n \
1575 mdd.$(facet_svc mds1).lfsck_layout |
1576 awk '/^repaired_orphan/ { print $2 }')
1577 [ $repaired -eq 1 ] ||
1578 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1580 if [ $MDSCOUNT -ge 2 ]; then
1581 repaired=$(do_facet mds2 $LCTL get_param -n \
1582 mdd.$(facet_svc mds2).lfsck_layout |
1583 awk '/^repaired_orphan/ { print $2 }')
1584 [ $repaired -eq 2 ] ||
1585 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1588 $LFS path2fid $DIR/$tdir/a1/f1
1589 $LFS getstripe $DIR/$tdir/a1/f1
1591 if [ $MDSCOUNT -ge 2 ]; then
1592 $LFS path2fid $DIR/$tdir/a2/f2
1593 $LFS getstripe $DIR/$tdir/a2/f2
1596 echo "The file size should be correct after layout LFSCK scanning"
1597 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1598 [ "$cur_size" == "$saved_size" ] ||
1599 error "(7) Expect file1 size $saved_size, but got $cur_size"
1601 if [ $MDSCOUNT -ge 2 ]; then
1602 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1603 [ "$cur_size" == "$saved_size" ] ||
1604 error "(8) Expect file2 size $saved_size, but got $cur_size"
1607 run_test 18a "Find out orphan OST-object and repair it (1)"
1611 echo "The target MDT-object is lost. The LFSCK should re-create the"
1612 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1613 echo "can move it back to normal namespace manually."
1616 check_mount_and_prep
1617 $LFS mkdir -i 0 $DIR/$tdir/a1
1618 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1619 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1620 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1621 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1623 $LFS getstripe $DIR/$tdir/a1/f1
1625 if [ $MDSCOUNT -ge 2 ]; then
1626 $LFS mkdir -i 1 $DIR/$tdir/a2
1627 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1628 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1629 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1631 $LFS getstripe $DIR/$tdir/a2/f2
1634 cancel_lru_locks osc
1636 echo "Inject failure, to simulate the case of missing the MDT-object"
1637 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1638 do_facet mds1 $LCTL set_param fail_loc=0x1616
1639 rm -f $DIR/$tdir/a1/f1
1641 if [ $MDSCOUNT -ge 2 ]; then
1642 do_facet mds2 $LCTL set_param fail_loc=0x1616
1643 rm -f $DIR/$tdir/a2/f2
1649 do_facet mds1 $LCTL set_param fail_loc=0
1650 if [ $MDSCOUNT -ge 2 ]; then
1651 do_facet mds2 $LCTL set_param fail_loc=0
1654 cancel_lru_locks mdc
1655 cancel_lru_locks osc
1657 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1658 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1660 for k in $(seq $MDSCOUNT); do
1661 # The LFSCK status query internal is 30 seconds. For the case
1662 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1663 # time to guarantee the status sync up.
1664 wait_update_facet mds${k} "$LCTL get_param -n \
1665 mdd.$(facet_svc mds${k}).lfsck_layout |
1666 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1667 error "(2) MDS${k} is not the expected 'completed'"
1670 for k in $(seq $OSTCOUNT); do
1671 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1672 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1673 awk '/^status/ { print $2 }')
1674 [ "$cur_status" == "completed" ] ||
1675 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1678 local repaired=$(do_facet mds1 $LCTL get_param -n \
1679 mdd.$(facet_svc mds1).lfsck_layout |
1680 awk '/^repaired_orphan/ { print $2 }')
1681 [ $repaired -eq 1 ] ||
1682 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1684 if [ $MDSCOUNT -ge 2 ]; then
1685 repaired=$(do_facet mds2 $LCTL get_param -n \
1686 mdd.$(facet_svc mds2).lfsck_layout |
1687 awk '/^repaired_orphan/ { print $2 }')
1688 [ $repaired -eq 2 ] ||
1689 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1692 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1693 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1694 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1696 if [ $MDSCOUNT -ge 2 ]; then
1697 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1698 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1701 $LFS path2fid $DIR/$tdir/a1/f1
1702 $LFS getstripe $DIR/$tdir/a1/f1
1704 if [ $MDSCOUNT -ge 2 ]; then
1705 $LFS path2fid $DIR/$tdir/a2/f2
1706 $LFS getstripe $DIR/$tdir/a2/f2
1709 echo "The file size should be correct after layout LFSCK scanning"
1710 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1711 [ "$cur_size" == "$saved_size" ] ||
1712 error "(7) Expect file1 size $saved_size, but got $cur_size"
1714 if [ $MDSCOUNT -ge 2 ]; then
1715 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1716 [ "$cur_size" == "$saved_size" ] ||
1717 error "(8) Expect file2 size $saved_size, but got $cur_size"
1720 run_test 18b "Find out orphan OST-object and repair it (2)"
1724 echo "The target MDT-object is lost, and the OST-object FID is missing."
1725 echo "The LFSCK should re-create the MDT-object with new FID under the "
1726 echo "directory .lustre/lost+found/MDTxxxx."
1729 check_mount_and_prep
1730 $LFS mkdir -i 0 $DIR/$tdir/a1
1731 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1733 echo "Inject failure, to simulate the case of missing parent FID"
1734 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1735 do_facet ost1 $LCTL set_param fail_loc=0x1617
1737 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1738 $LFS getstripe $DIR/$tdir/a1/f1
1740 if [ $MDSCOUNT -ge 2 ]; then
1741 $LFS mkdir -i 1 $DIR/$tdir/a2
1742 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1743 do_facet ost2 $LCTL set_param fail_loc=0x1617
1744 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1745 $LFS getstripe $DIR/$tdir/a2/f2
1748 cancel_lru_locks osc
1750 echo "Inject failure, to simulate the case of missing the MDT-object"
1751 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1752 do_facet mds1 $LCTL set_param fail_loc=0x1616
1753 rm -f $DIR/$tdir/a1/f1
1755 if [ $MDSCOUNT -ge 2 ]; then
1756 do_facet mds2 $LCTL set_param fail_loc=0x1616
1757 rm -f $DIR/$tdir/a2/f2
1763 do_facet mds1 $LCTL set_param fail_loc=0
1764 if [ $MDSCOUNT -ge 2 ]; then
1765 do_facet mds2 $LCTL set_param fail_loc=0
1768 cancel_lru_locks mdc
1769 cancel_lru_locks osc
1771 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1772 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1774 for k in $(seq $MDSCOUNT); do
1775 # The LFSCK status query internal is 30 seconds. For the case
1776 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1777 # time to guarantee the status sync up.
1778 wait_update_facet mds${k} "$LCTL get_param -n \
1779 mdd.$(facet_svc mds${k}).lfsck_layout |
1780 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1781 error "(2) MDS${k} is not the expected 'completed'"
1784 for k in $(seq $OSTCOUNT); do
1785 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1786 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1787 awk '/^status/ { print $2 }')
1788 [ "$cur_status" == "completed" ] ||
1789 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1792 if [ $MDSCOUNT -ge 2 ]; then
1798 local repaired=$(do_facet mds1 $LCTL get_param -n \
1799 mdd.$(facet_svc mds1).lfsck_layout |
1800 awk '/^repaired_orphan/ { print $2 }')
1801 [ $repaired -eq $expected ] ||
1802 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1804 if [ $MDSCOUNT -ge 2 ]; then
1805 repaired=$(do_facet mds2 $LCTL get_param -n \
1806 mdd.$(facet_svc mds2).lfsck_layout |
1807 awk '/^repaired_orphan/ { print $2 }')
1808 [ $repaired -eq 0 ] ||
1809 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1812 echo "There should be some stub under .lustre/lost+found/MDT0001/"
1813 ls -ail $MOUNT/.lustre/lost+found/MDT0001/*-N-0 &&
1814 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1816 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1817 ls -ail $MOUNT/.lustre/lost+found/MDT0000/*-N-0 ||
1818 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
1820 run_test 18c "Find out orphan OST-object and repair it (3)"
1824 echo "The target MDT-object layout EA slot is occpuied by some new"
1825 echo "created OST-object when repair dangling reference case. Such"
1826 echo "conflict OST-object has never been modified. Then when found"
1827 echo "the orphan OST-object, LFSCK will replace it with the orphan"
1831 check_mount_and_prep
1833 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1834 echo "guard" > $DIR/$tdir/a1/f1
1835 echo "foo" > $DIR/$tdir/a1/f2
1836 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1837 $LFS path2fid $DIR/$tdir/a1/f1
1838 $LFS getstripe $DIR/$tdir/a1/f1
1839 $LFS path2fid $DIR/$tdir/a1/f2
1840 $LFS getstripe $DIR/$tdir/a1/f2
1841 cancel_lru_locks osc
1843 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1844 echo "to reference the same OST-object (which is f1's OST-obejct)."
1845 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1846 echo "dangling reference case, but f2's old OST-object is there."
1849 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1850 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1851 chown 1.1 $DIR/$tdir/a1/f2
1852 rm -f $DIR/$tdir/a1/f1
1855 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1857 echo "stopall to cleanup object cache"
1860 setupall > /dev/null
1862 echo "The file size should be incorrect since dangling referenced"
1863 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1864 [ "$cur_size" != "$saved_size" ] ||
1865 error "(1) Expect incorrect file2 size"
1867 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1868 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
1870 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1871 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1873 wait_update_facet mds1 "$LCTL get_param -n \
1874 mdd.$(facet_svc mds1).lfsck_layout |
1875 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1876 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
1878 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1880 for k in $(seq $MDSCOUNT); do
1881 # The LFSCK status query internal is 30 seconds. For the case
1882 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1883 # time to guarantee the status sync up.
1884 wait_update_facet mds${k} "$LCTL get_param -n \
1885 mdd.$(facet_svc mds${k}).lfsck_layout |
1886 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1887 error "(3) MDS${k} is not the expected 'completed'"
1890 for k in $(seq $OSTCOUNT); do
1891 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1892 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1893 awk '/^status/ { print $2 }')
1894 [ "$cur_status" == "completed" ] ||
1895 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
1898 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1899 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1900 awk '/^repaired_orphan/ { print $2 }')
1901 [ $repaired -eq 1 ] ||
1902 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
1904 echo "The file size should be correct after layout LFSCK scanning"
1905 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1906 [ "$cur_size" == "$saved_size" ] ||
1907 error "(6) Expect file2 size $saved_size, but got $cur_size"
1909 echo "The LFSCK should find back the original data."
1910 cat $DIR/$tdir/a1/f2
1911 $LFS path2fid $DIR/$tdir/a1/f2
1912 $LFS getstripe $DIR/$tdir/a1/f2
1914 run_test 18d "Find out orphan OST-object and repair it (4)"
1918 echo "The target MDT-object layout EA slot is occpuied by some new"
1919 echo "created OST-object when repair dangling reference case. Such"
1920 echo "conflict OST-object has been modified by others. To keep the"
1921 echo "new data, the LFSCK will create a new file to refernece this"
1922 echo "old orphan OST-object."
1925 check_mount_and_prep
1927 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1928 echo "guard" > $DIR/$tdir/a1/f1
1929 echo "foo" > $DIR/$tdir/a1/f2
1930 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1931 $LFS path2fid $DIR/$tdir/a1/f1
1932 $LFS getstripe $DIR/$tdir/a1/f1
1933 $LFS path2fid $DIR/$tdir/a1/f2
1934 $LFS getstripe $DIR/$tdir/a1/f2
1935 cancel_lru_locks osc
1937 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1938 echo "to reference the same OST-object (which is f1's OST-obejct)."
1939 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1940 echo "dangling reference case, but f2's old OST-object is there."
1943 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1944 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1945 chown 1.1 $DIR/$tdir/a1/f2
1946 rm -f $DIR/$tdir/a1/f1
1949 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1951 echo "stopall to cleanup object cache"
1954 setupall > /dev/null
1956 echo "The file size should be incorrect since dangling referenced"
1957 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1958 [ "$cur_size" != "$saved_size" ] ||
1959 error "(1) Expect incorrect file2 size"
1961 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1962 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
1964 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1965 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1967 wait_update_facet mds1 "$LCTL get_param -n \
1968 mdd.$(facet_svc mds1).lfsck_layout |
1969 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1970 error "(3) MDS1 is not the expected 'scanning-phase2'"
1972 # to guarantee all updates are synced.
1976 echo "Write new data to f2 to modify the new created OST-object."
1977 echo "dummy" >> $DIR/$tdir/a1/f2
1979 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1981 for k in $(seq $MDSCOUNT); do
1982 # The LFSCK status query internal is 30 seconds. For the case
1983 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1984 # time to guarantee the status sync up.
1985 wait_update_facet mds${k} "$LCTL get_param -n \
1986 mdd.$(facet_svc mds${k}).lfsck_layout |
1987 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1988 error "(4) MDS${k} is not the expected 'completed'"
1991 for k in $(seq $OSTCOUNT); do
1992 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1993 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1994 awk '/^status/ { print $2 }')
1995 [ "$cur_status" == "completed" ] ||
1996 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1999 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2000 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2001 awk '/^repaired_orphan/ { print $2 }')
2002 [ $repaired -eq 1 ] ||
2003 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2005 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2006 local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/*-C-0)
2008 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
2010 echo "The stub file should keep the original f2 data"
2011 cur_size=$(ls -il $cname | awk '{ print $6 }')
2012 [ "$cur_size" == "$saved_size" ] ||
2013 error "(8) Expect file2 size $saved_size, but got $cur_size"
2016 $LFS path2fid $cname
2017 $LFS getstripe $cname
2019 echo "The f2 should contains new data."
2020 cat $DIR/$tdir/a1/f2
2021 $LFS path2fid $DIR/$tdir/a1/f2
2022 $LFS getstripe $DIR/$tdir/a1/f2
2024 run_test 18e "Find out orphan OST-object and repair it (5)"
2027 check_mount_and_prep
2028 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2030 echo "foo" > $DIR/$tdir/a0
2031 echo "guard" > $DIR/$tdir/a1
2032 cancel_lru_locks osc
2034 echo "Inject failure, then client will offer wrong parent FID when read"
2035 do_facet ost1 $LCTL set_param -n \
2036 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2037 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2038 $LCTL set_param fail_loc=0x1619
2040 echo "Read RPC with wrong parent FID should be denied"
2041 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2042 $LCTL set_param fail_loc=0
2044 run_test 19a "OST-object inconsistency self detect"
2047 check_mount_and_prep
2048 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2050 echo "Inject failure stub to make the OST-object to back point to"
2051 echo "non-exist MDT-object"
2053 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2054 do_facet ost1 $LCTL set_param fail_loc=0x1611
2055 echo "foo" > $DIR/$tdir/f0
2056 cancel_lru_locks osc
2057 do_facet ost1 $LCTL set_param fail_loc=0
2059 echo "Nothing should be fixed since self detect and repair is disabled"
2060 local repaired=$(do_facet ost1 $LCTL get_param -n \
2061 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2062 awk '/^repaired/ { print $2 }')
2063 [ $repaired -eq 0 ] ||
2064 error "(1) Expected 0 repaired, but got $repaired"
2066 echo "Read RPC with right parent FID should be accepted,"
2067 echo "and cause parent FID on OST to be fixed"
2069 do_facet ost1 $LCTL set_param -n \
2070 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2071 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2073 repaired=$(do_facet ost1 $LCTL get_param -n \
2074 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2075 awk '/^repaired/ { print $2 }')
2076 [ $repaired -eq 1 ] ||
2077 error "(3) Expected 1 repaired, but got $repaired"
2079 run_test 19b "OST-object inconsistency self repair"
2082 [ $OSTCOUNT -lt 2 ] &&
2083 skip "The test needs at least 2 OSTs" && return
2086 echo "The target MDT-object and some of its OST-object are lost."
2087 echo "The LFSCK should find out the left OST-objects and re-create"
2088 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2089 echo "with the partial OST-objects (LOV EA hole)."
2091 echo "New client can access the file with LOV EA hole via normal"
2092 echo "system tools or commands without crash the system."
2094 echo "For old client, even though it cannot access the file with"
2095 echo "LOV EA hole, it should not cause the system crash."
2098 check_mount_and_prep
2099 $LFS mkdir -i 0 $DIR/$tdir/a1
2100 if [ $OSTCOUNT -gt 2 ]; then
2101 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
2104 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
2108 # 256 blocks on the stripe0.
2109 # 1 block on the stripe1 for 2 OSTs case.
2110 # 256 blocks on the stripe1 for other cases.
2111 # 1 block on the stripe2 if OSTs > 2
2112 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2113 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2114 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2116 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2117 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2118 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2121 $LFS getstripe $DIR/$tdir/a1/f0
2123 $LFS getstripe $DIR/$tdir/a1/f1
2125 $LFS getstripe $DIR/$tdir/a1/f2
2127 if [ $OSTCOUNT -gt 2 ]; then
2128 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2129 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2131 $LFS getstripe $DIR/$tdir/a1/f3
2134 cancel_lru_locks osc
2136 echo "Inject failure..."
2137 echo "To simulate f0 lost MDT-object"
2138 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2139 do_facet mds1 $LCTL set_param fail_loc=0x1616
2140 rm -f $DIR/$tdir/a1/f0
2142 echo "To simulate f1 lost MDT-object and OST-object0"
2143 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2144 do_facet mds1 $LCTL set_param fail_loc=0x161a
2145 rm -f $DIR/$tdir/a1/f1
2147 echo "To simulate f2 lost MDT-object and OST-object1"
2148 do_facet mds1 $LCTL set_param fail_val=1
2149 rm -f $DIR/$tdir/a1/f2
2151 if [ $OSTCOUNT -gt 2 ]; then
2152 echo "To simulate f3 lost MDT-object and OST-object2"
2153 do_facet mds1 $LCTL set_param fail_val=2
2154 rm -f $DIR/$tdir/a1/f3
2157 umount_client $MOUNT
2160 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2162 echo "Inject failure to slow down the LFSCK on OST0"
2163 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2164 do_facet ost1 $LCTL set_param fail_loc=0x161b
2166 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2167 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2170 do_facet ost1 $LCTL set_param fail_loc=0
2172 for k in $(seq $MDSCOUNT); do
2173 # The LFSCK status query internal is 30 seconds. For the case
2174 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2175 # time to guarantee the status sync up.
2176 wait_update_facet mds${k} "$LCTL get_param -n \
2177 mdd.$(facet_svc mds${k}).lfsck_layout |
2178 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2179 error "(2) MDS${k} is not the expected 'completed'"
2182 for k in $(seq $OSTCOUNT); do
2183 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2184 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2185 awk '/^status/ { print $2 }')
2186 [ "$cur_status" == "completed" ] ||
2187 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2190 local repaired=$(do_facet mds1 $LCTL get_param -n \
2191 mdd.$(facet_svc mds1).lfsck_layout |
2192 awk '/^repaired_orphan/ { print $2 }')
2193 if [ $OSTCOUNT -gt 2 ]; then
2194 [ $repaired -eq 9 ] ||
2195 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2197 [ $repaired -eq 4 ] ||
2198 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2201 mount_client $MOUNT || error "(5.0) Fail to start client!"
2203 LOV_PATTERN_F_HOLE=0x40000000
2206 # ${fid0}-R-0 is the old f0
2208 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2209 echo "Check $name, which is the old f0"
2211 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2213 local pattern=0x$($LFS getstripe -L $name)
2214 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2215 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2217 local stripes=$($LFS getstripe -c $name)
2218 if [ $OSTCOUNT -gt 2 ]; then
2219 [ $stripes -eq 3 ] ||
2220 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2222 [ $stripes -eq 2 ] ||
2223 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2226 local size=$(stat $name | awk '/Size:/ { print $2 }')
2227 [ $size -eq $((4096 * $bcount)) ] ||
2228 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2230 cat $name > /dev/null || error "(5.5) cannot read $name"
2232 echo "dummy" >> $name || error "(5.6) cannot write $name"
2234 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2236 touch $name || error "(5.8) cannot touch $name"
2238 rm -f $name || error "(5.9) cannot unlink $name"
2241 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2243 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2244 if [ $OSTCOUNT -gt 2 ]; then
2245 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2247 echo "Check $name, it contains the old f1's stripe1"
2250 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2252 pattern=0x$($LFS getstripe -L $name)
2253 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2254 error "(6.2) expect pattern flag hole, but got $pattern"
2256 stripes=$($LFS getstripe -c $name)
2257 if [ $OSTCOUNT -gt 2 ]; then
2258 [ $stripes -eq 3 ] ||
2259 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2261 [ $stripes -eq 2 ] ||
2262 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2265 size=$(stat $name | awk '/Size:/ { print $2 }')
2266 [ $size -eq $((4096 * $bcount)) ] ||
2267 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2269 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2271 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2272 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2275 [ $failures -eq 256 ] ||
2276 error "(6.6) expect 256 IO failures, but get $failures"
2278 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2279 [ $size -eq $((4096 * $bcount)) ] ||
2280 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2282 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2283 error "(6.8) write to the LOV EA hole should fail"
2285 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2286 error "(6.9) write to normal stripe should NOT fail"
2288 echo "foo" >> $name && error "(6.10) append write $name should fail"
2290 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2292 touch $name || error "(6.12) cannot touch $name"
2294 rm -f $name || error "(6.13) cannot unlink $name"
2297 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2299 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2300 if [ $OSTCOUNT -gt 2 ]; then
2301 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2303 echo "Check $name, it contains the old f2's stripe0"
2306 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2308 pattern=0x$($LFS getstripe -L $name)
2309 stripes=$($LFS getstripe -c $name)
2310 size=$(stat $name | awk '/Size:/ { print $2 }')
2311 if [ $OSTCOUNT -gt 2 ]; then
2312 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2313 error "(7.2.1) expect pattern flag hole, but got $pattern"
2315 [ $stripes -eq 3 ] ||
2316 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2318 [ $size -eq $((4096 * $bcount)) ] ||
2319 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2321 cat $name > /dev/null &&
2322 error "(7.5.1) normal read $name should fail"
2324 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2325 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2327 [ $failures -eq 256 ] ||
2328 error "(7.6) expect 256 IO failures, but get $failures"
2330 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2331 [ $size -eq $((4096 * $bcount)) ] ||
2332 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2334 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2335 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2337 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2338 error "(7.8.1) write to normal stripe should NOT fail"
2340 echo "foo" >> $name &&
2341 error "(7.8.3) append write $name should fail"
2343 chown $RUNAS_ID:$RUNAS_GID $name ||
2344 error "(7.9.1) cannot chown on $name"
2346 touch $name || error "(7.10.1) cannot touch $name"
2348 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2349 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2351 [ $stripes -eq 1 ] ||
2352 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2355 [ $size -eq $((4096 * (256 + 0))) ] ||
2356 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2358 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2360 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2362 chown $RUNAS_ID:$RUNAS_GID $name ||
2363 error "(7.9.2) cannot chown on $name"
2365 touch $name || error "(7.10.2) cannot touch $name"
2368 rm -f $name || error "(7.11) cannot unlink $name"
2370 [ $OSTCOUNT -le 2 ] && return
2373 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2375 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2376 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2378 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2380 pattern=0x$($LFS getstripe -L $name)
2381 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2382 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2384 stripes=$($LFS getstripe -c $name)
2385 # LFSCK does not know the old f3 had 3 stripes.
2386 # It only tries to find as much as possible.
2387 # The stripe count depends on the last stripe's offset.
2388 [ $stripes -eq 2 ] ||
2389 error "(8.3) expect the stripe count is 2, but got $stripes"
2391 size=$(stat $name | awk '/Size:/ { print $2 }')
2393 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2394 error "(8.4) expect the size $((4096 * 512)), but got $size"
2396 cat $name > /dev/null || error "(8.5) cannot read $name"
2398 echo "dummy" >> $name || error "(8.6) cannot write $name"
2400 chown $RUNAS_ID:$RUNAS_GID $name ||
2401 error "(8.7) cannot chown on $name"
2403 touch $name || error "(8.8) cannot touch $name"
2405 rm -f $name || error "(8.9) cannot unlink $name"
2407 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2410 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
2411 skip "ignore the test if MDS is older than 2.5.59" && exit 0
2413 check_mount_and_prep
2414 createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
2416 echo "Start all LFSCK components by default (-s 1)"
2417 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r ||
2418 error "Fail to start LFSCK"
2420 echo "namespace LFSCK should be in 'scanning-phase1' status"
2421 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
2422 [ "$STATUS" == "scanning-phase1" ] ||
2423 error "Expect namespace 'scanning-phase1', but got '$STATUS'"
2425 echo "layout LFSCK should be in 'scanning-phase1' status"
2426 STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
2427 [ "$STATUS" == "scanning-phase1" ] ||
2428 error "Expect layout 'scanning-phase1', but got '$STATUS'"
2430 echo "Stop all LFSCK components by default"
2431 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 ||
2432 error "Fail to stop LFSCK"
2434 run_test 21 "run all LFSCK components by default"
2436 $LCTL set_param debug=-lfsck > /dev/null || true
2438 # restore MDS/OST size
2439 MDSSIZE=${SAVED_MDSSIZE}
2440 OSTSIZE=${SAVED_OSTSIZE}
2441 OSTCOUNT=${SAVED_OSTCOUNT}
2443 # cleanup the system at last