3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 MCREATE=${MCREATE:-mcreate}
23 SAVED_MDSSIZE=${MDSSIZE}
24 SAVED_OSTSIZE=${OSTSIZE}
25 SAVED_OSTCOUNT=${OSTCOUNT}
26 # use small MDS + OST size to speed formatting time
27 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
30 # no need too much OSTs, to reduce the format/start/stop overhead
31 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
33 # build up a clean test environment.
37 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
38 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
41 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
42 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
44 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
45 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19"
47 [ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
48 # bug number for skipped test: LU-4970
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 14"
53 $LCTL set_param debug=+lfsck > /dev/null || true
55 MDT_DEV="${FSNAME}-MDT0000"
56 OST_DEV="${FSNAME}-OST0000"
57 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
58 START_NAMESPACE="do_facet $SINGLEMDS \
59 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
60 START_LAYOUT="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
62 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
63 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
64 SHOW_NAMESPACE="do_facet $SINGLEMDS \
65 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
66 SHOW_LAYOUT="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
68 SHOW_LAYOUT_ON_OST="do_facet ost1 \
69 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
70 MOUNT_OPTS_SCRUB="-o user_xattr"
71 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
80 echo "preparing... $nfiles * $ndirs files will be created $(date)."
81 if [ ! -z $igif ]; then
82 #define OBD_FAIL_FID_IGIF 0x1504
83 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
86 cp $LUSTRE/tests/*.sh $DIR/$tdir/
87 if [ $ndirs -gt 0 ]; then
88 createmany -d $DIR/$tdir/d $ndirs
89 createmany -m $DIR/$tdir/f $ndirs
90 if [ $nfiles -gt 0 ]; then
91 for ((i = 0; i < $ndirs; i++)); do
92 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
93 /dev/null || error "createmany $nfiles"
96 createmany -d $DIR/$tdir/e $ndirs
99 if [ ! -z $igif ]; then
100 touch $DIR/$tdir/dummy
101 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
104 echo "prepared $(date)."
110 #define OBD_FAIL_LFSCK_DELAY1 0x1600
111 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
112 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
114 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
116 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
117 [ "$STATUS" == "scanning-phase1" ] ||
118 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
120 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
122 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
123 [ "$STATUS" == "stopped" ] ||
124 error "(6) Expect 'stopped', but got '$STATUS'"
126 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
129 [ "$STATUS" == "scanning-phase1" ] ||
130 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
132 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
133 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
134 mdd.${MDT_DEV}.lfsck_namespace |
135 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
137 error "(9) unexpected status"
140 local repaired=$($SHOW_NAMESPACE |
141 awk '/^updated_phase1/ { print $2 }')
142 [ $repaired -eq 0 ] ||
143 error "(10) Expect nothing to be repaired, but got: $repaired"
145 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
146 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
147 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
148 mdd.${MDT_DEV}.lfsck_namespace |
149 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
151 error "(12) unexpected status"
154 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
155 [ $((scanned1 + 1)) -eq $scanned2 ] ||
156 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
158 echo "stopall, should NOT crash LU-3649"
159 stopall || error "(14) Fail to stopall"
161 run_test 0 "Control LFSCK manually"
164 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
165 skip "OI Scrub not implemented for ZFS" && return
169 #define OBD_FAIL_FID_INDIR 0x1501
170 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
171 touch $DIR/$tdir/dummy
173 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
175 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
176 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
177 mdd.${MDT_DEV}.lfsck_namespace |
178 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
180 error "(4) unexpected status"
183 local repaired=$($SHOW_NAMESPACE |
184 awk '/^dirent_repaired/ { print $2 }')
185 # for interop with old server
186 [ -z "$repaired" ] &&
187 repaired=$($SHOW_NAMESPACE |
188 awk '/^updated_phase1/ { print $2 }')
190 [ $repaired -eq 1 ] ||
191 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
193 mount_client $MOUNT || error "(6) Fail to start client!"
195 #define OBD_FAIL_FID_LOOKUP 0x1505
196 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
197 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
199 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
201 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
205 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
206 skip "OI Scrub not implemented for ZFS" && return
210 #define OBD_FAIL_FID_INLMA 0x1502
211 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
212 touch $DIR/$tdir/dummy
214 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
216 #define OBD_FAIL_FID_NOLMA 0x1506
217 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
218 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
219 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
220 mdd.${MDT_DEV}.lfsck_namespace |
221 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
223 error "(4) unexpected status"
226 local repaired=$($SHOW_NAMESPACE |
227 awk '/^dirent_repaired/ { print $2 }')
228 # for interop with old server
229 [ -z "$repaired" ] &&
230 repaired=$($SHOW_NAMESPACE |
231 awk '/^updated_phase1/ { print $2 }')
233 [ $repaired -eq 1 ] ||
234 error "(5) Fail to repair missed FID-in-LMA: $repaired"
236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
237 mount_client $MOUNT || error "(6) Fail to start client!"
239 #define OBD_FAIL_FID_LOOKUP 0x1505
240 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
241 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
243 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
245 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
250 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
251 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
252 touch $DIR/$tdir/dummy
254 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
256 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
257 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
258 mdd.${MDT_DEV}.lfsck_namespace |
259 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
261 error "(4) unexpected status"
264 local repaired=$($SHOW_NAMESPACE |
265 awk '/^linkea_repaired/ { print $2 }')
266 # for interop with old server
267 [ -z "$repaired" ] &&
268 repaired=$($SHOW_NAMESPACE |
269 awk '/^updated_phase1/ { print $2 }')
271 [ $repaired -eq 1 ] ||
272 error "(5) Fail to repair crashed linkEA: $repaired"
274 mount_client $MOUNT || error "(6) Fail to start client!"
276 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
277 error "(7) Fail to stat $DIR/$tdir/dummy"
279 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
280 local dummyname=$($LFS fid2path $DIR $dummyfid)
281 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
282 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
284 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
290 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
291 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
292 touch $DIR/$tdir/dummy
294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
296 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
297 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
298 mdd.${MDT_DEV}.lfsck_namespace |
299 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
301 error "(4) unexpected status"
304 local repaired=$($SHOW_NAMESPACE |
305 awk '/^updated_phase2/ { print $2 }')
306 [ $repaired -eq 1 ] ||
307 error "(5) Fail to repair crashed linkEA: $repaired"
309 mount_client $MOUNT || error "(6) Fail to start client!"
311 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
312 error "(7) Fail to stat $DIR/$tdir/dummy"
314 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
315 local dummyname=$($LFS fid2path $DIR $dummyfid)
316 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
317 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
319 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
325 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
326 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
327 touch $DIR/$tdir/dummy
329 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
331 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
332 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
333 mdd.${MDT_DEV}.lfsck_namespace |
334 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
336 error "(4) unexpected status"
339 local repaired=$($SHOW_NAMESPACE |
340 awk '/^updated_phase2/ { print $2 }')
341 [ $repaired -eq 1 ] ||
342 error "(5) Fail to repair crashed linkEA: $repaired"
344 mount_client $MOUNT || error "(6) Fail to start client!"
346 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
347 error "(7) Fail to stat $DIR/$tdir/dummy"
349 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
350 local dummyname=$($LFS fid2path $DIR $dummyfid)
351 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
352 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
354 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
358 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
359 skip "OI Scrub not implemented for ZFS" && return
362 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
363 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
365 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
366 echo "start $SINGLEMDS with disabling OI scrub"
367 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
368 error "(2) Fail to start MDS!"
370 #define OBD_FAIL_LFSCK_DELAY2 0x1601
371 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
372 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
373 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
374 mdd.${MDT_DEV}.lfsck_namespace |
375 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
377 error "(5) unexpected status"
380 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
381 [ "$STATUS" == "scanning-phase1" ] ||
382 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
384 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
385 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
386 mdd.${MDT_DEV}.lfsck_namespace |
387 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
389 error "(7) unexpected status"
392 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
393 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
395 local repaired=$($SHOW_NAMESPACE |
396 awk '/^dirent_repaired/ { print $2 }')
397 # for interop with old server
398 [ -z "$repaired" ] &&
399 repaired=$($SHOW_NAMESPACE |
400 awk '/^updated_phase1/ { print $2 }')
402 [ $repaired -ge 9 ] ||
403 error "(9) Fail to re-generate FID-in-dirent: $repaired"
405 mount_client $MOUNT || error "(10) Fail to start client!"
407 #define OBD_FAIL_FID_LOOKUP 0x1505
408 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
409 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
410 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
412 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
416 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
417 skip "OI Scrub not implemented for ZFS" && return
420 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
421 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
423 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
424 echo "start $SINGLEMDS with disabling OI scrub"
425 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
426 error "(2) Fail to start MDS!"
428 #define OBD_FAIL_LFSCK_DELAY2 0x1601
429 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
430 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
431 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
432 mdd.${MDT_DEV}.lfsck_namespace |
433 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
435 error "(5) unexpected status"
438 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
439 [ "$STATUS" == "scanning-phase1" ] ||
440 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
442 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
443 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
444 mdd.${MDT_DEV}.lfsck_namespace |
445 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
447 error "(7) unexpected status"
450 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
451 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
453 local repaired=$($SHOW_NAMESPACE |
454 awk '/^dirent_repaired/ { print $2 }')
455 # for interop with old server
456 [ -z "$repaired" ] &&
457 repaired=$($SHOW_NAMESPACE |
458 awk '/^updated_phase1/ { print $2 }')
460 [ $repaired -ge 2 ] ||
461 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
463 mount_client $MOUNT || error "(10) Fail to start client!"
465 #define OBD_FAIL_FID_LOOKUP 0x1505
466 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
467 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
469 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
471 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
472 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
473 local dummyname=$($LFS fid2path $DIR $dummyfid)
474 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
475 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
477 run_test 5 "LFSCK can handle IGIF object upgrading"
482 #define OBD_FAIL_LFSCK_DELAY1 0x1600
483 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
484 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
486 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
487 [ "$STATUS" == "scanning-phase1" ] ||
488 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
490 # Sleep 3 sec to guarantee at least one object processed by LFSCK
492 # Fail the LFSCK to guarantee there is at least one checkpoint
493 #define OBD_FAIL_LFSCK_FATAL1 0x1608
494 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
495 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
496 mdd.${MDT_DEV}.lfsck_namespace |
497 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
499 error "(4) unexpected status"
502 local POS0=$($SHOW_NAMESPACE |
503 awk '/^last_checkpoint_position/ { print $2 }' |
506 #define OBD_FAIL_LFSCK_DELAY1 0x1600
507 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
508 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
510 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
511 [ "$STATUS" == "scanning-phase1" ] ||
512 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
514 local POS1=$($SHOW_NAMESPACE |
515 awk '/^latest_start_position/ { print $2 }' |
517 [ $POS0 -lt $POS1 ] ||
518 error "(7) Expect larger than: $POS0, but got $POS1"
520 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
521 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
522 mdd.${MDT_DEV}.lfsck_namespace |
523 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
525 error "(8) unexpected status"
528 run_test 6a "LFSCK resumes from last checkpoint (1)"
533 #define OBD_FAIL_LFSCK_DELAY2 0x1601
534 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
535 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
537 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
538 [ "$STATUS" == "scanning-phase1" ] ||
539 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
541 # Sleep 5 sec to guarantee that we are in the directory scanning
543 # Fail the LFSCK to guarantee there is at least one checkpoint
544 #define OBD_FAIL_LFSCK_FATAL2 0x1609
545 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
546 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
547 mdd.${MDT_DEV}.lfsck_namespace |
548 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
550 error "(4) unexpected status"
553 local O_POS0=$($SHOW_NAMESPACE |
554 awk '/^last_checkpoint_position/ { print $2 }' |
557 local D_POS0=$($SHOW_NAMESPACE |
558 awk '/^last_checkpoint_position/ { print $4 }')
560 #define OBD_FAIL_LFSCK_DELAY2 0x1601
561 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
562 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
564 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
565 [ "$STATUS" == "scanning-phase1" ] ||
566 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
568 local O_POS1=$($SHOW_NAMESPACE |
569 awk '/^latest_start_position/ { print $2 }' |
571 local D_POS1=$($SHOW_NAMESPACE |
572 awk '/^latest_start_position/ { print $4 }')
574 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
575 [ $O_POS0 -lt $O_POS1 ] ||
576 error "(7.1) $O_POS1 is not larger than $O_POS0"
578 [ $D_POS0 -lt $D_POS1 ] ||
579 error "(7.2) $D_POS1 is not larger than $D_POS0"
582 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
583 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
584 mdd.${MDT_DEV}.lfsck_namespace |
585 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
587 error "(8) unexpected status"
590 run_test 6b "LFSCK resumes from last checkpoint (2)"
597 #define OBD_FAIL_LFSCK_DELAY2 0x1601
598 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
599 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
601 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
602 [ "$STATUS" == "scanning-phase1" ] ||
603 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
605 # Sleep 3 sec to guarantee at least one object processed by LFSCK
607 echo "stop $SINGLEMDS"
608 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
610 echo "start $SINGLEMDS"
611 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
612 error "(5) Fail to start MDS!"
614 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
615 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
616 mdd.${MDT_DEV}.lfsck_namespace |
617 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
619 error "(6) unexpected status"
622 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
628 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
629 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
630 for ((i = 0; i < 20; i++)); do
631 touch $DIR/$tdir/dummy${i}
634 #define OBD_FAIL_LFSCK_DELAY3 0x1602
635 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
636 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
637 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
638 mdd.${MDT_DEV}.lfsck_namespace |
639 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
641 error "(4) unexpected status"
644 echo "stop $SINGLEMDS"
645 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
647 echo "start $SINGLEMDS"
648 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
649 error "(6) Fail to start MDS!"
651 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
652 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
653 mdd.${MDT_DEV}.lfsck_namespace |
654 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
656 error "(7) unexpected status"
659 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
664 formatall > /dev/null
670 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
671 [ "$STATUS" == "init" ] ||
672 error "(2) Expect 'init', but got '$STATUS'"
674 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
675 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
676 mkdir $DIR/$tdir/crashed
678 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
679 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
680 for ((i = 0; i < 5; i++)); do
681 touch $DIR/$tdir/dummy${i}
684 umount_client $MOUNT || error "(3) Fail to stop client!"
686 #define OBD_FAIL_LFSCK_DELAY2 0x1601
687 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
688 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
690 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
691 [ "$STATUS" == "scanning-phase1" ] ||
692 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
694 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
696 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
697 [ "$STATUS" == "stopped" ] ||
698 error "(7) Expect 'stopped', but got '$STATUS'"
700 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
702 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
703 [ "$STATUS" == "scanning-phase1" ] ||
704 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
706 #define OBD_FAIL_LFSCK_FATAL2 0x1609
707 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
708 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
709 mdd.${MDT_DEV}.lfsck_namespace |
710 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
712 error "(10) unexpected status"
715 #define OBD_FAIL_LFSCK_DELAY1 0x1600
716 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
717 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
719 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
720 [ "$STATUS" == "scanning-phase1" ] ||
721 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
723 #define OBD_FAIL_LFSCK_CRASH 0x160a
724 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
727 echo "stop $SINGLEMDS"
728 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
730 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
731 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
733 echo "start $SINGLEMDS"
734 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
735 error "(14) Fail to start MDS!"
737 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
738 [ "$STATUS" == "crashed" ] ||
739 error "(15) Expect 'crashed', but got '$STATUS'"
741 #define OBD_FAIL_LFSCK_DELAY2 0x1601
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
743 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
745 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
746 [ "$STATUS" == "scanning-phase1" ] ||
747 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
749 echo "stop $SINGLEMDS"
750 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
752 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
753 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
755 echo "start $SINGLEMDS"
756 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
757 error "(19) Fail to start MDS!"
759 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
760 [ "$STATUS" == "paused" ] ||
761 error "(20) Expect 'paused', but got '$STATUS'"
763 #define OBD_FAIL_LFSCK_DELAY3 0x1602
764 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
766 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
767 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
768 mdd.${MDT_DEV}.lfsck_namespace |
769 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
771 error "(22) unexpected status"
774 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
775 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
776 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
778 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
779 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
780 mdd.${MDT_DEV}.lfsck_namespace |
781 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
783 error "(24) unexpected status"
786 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
787 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
789 run_test 8 "LFSCK state machine"
792 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
793 skip "Testing on UP system, the speed may be inaccurate."
799 local BASE_SPEED1=100
801 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
804 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
805 [ "$STATUS" == "scanning-phase1" ] ||
806 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
808 local SPEED=$($SHOW_NAMESPACE |
809 awk '/^average_speed_phase1/ { print $2 }')
811 # There may be time error, normally it should be less than 2 seconds.
812 # We allow another 20% schedule error.
814 # MAX_MARGIN = 1.2 = 12 / 10
815 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
816 RUN_TIME1 * 12 / 10))
817 [ $SPEED -lt $MAX_SPEED ] ||
818 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
821 local BASE_SPEED2=300
823 do_facet $SINGLEMDS \
824 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
827 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
828 # MIN_MARGIN = 0.8 = 8 / 10
829 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
830 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
831 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
832 # Account for slow ZFS performance - LU-4934
833 [ $SPEED -gt $MIN_SPEED ] || [ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
834 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
836 # MAX_MARGIN = 1.2 = 12 / 10
837 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
838 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
839 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
840 [ $SPEED -lt $MAX_SPEED ] ||
841 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
843 do_facet $SINGLEMDS \
844 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
846 wait_update_facet $SINGLEMDS \
847 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
848 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
849 error "(7) Failed to get expected 'completed'"
851 run_test 9a "LFSCK speed control (1)"
854 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
855 skip "Testing on UP system, the speed may be inaccurate."
861 echo "Preparing another 50 * 50 files (with error) at $(date)."
862 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
863 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
864 createmany -d $DIR/$tdir/d 50
865 createmany -m $DIR/$tdir/f 50
866 for ((i = 0; i < 50; i++)); do
867 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
870 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
871 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
872 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
873 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
874 mdd.${MDT_DEV}.lfsck_namespace |
875 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
877 error "(5) unexpected status"
880 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
881 echo "Prepared at $(date)."
885 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
888 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
889 [ "$STATUS" == "scanning-phase2" ] ||
890 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
892 local SPEED=$($SHOW_NAMESPACE |
893 awk '/^average_speed_phase2/ { print $2 }')
894 # There may be time error, normally it should be less than 2 seconds.
895 # We allow another 20% schedule error.
897 # MAX_MARGIN = 1.2 = 12 / 10
898 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
899 RUN_TIME1 * 12 / 10))
900 [ $SPEED -lt $MAX_SPEED ] ||
901 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
904 local BASE_SPEED2=150
906 do_facet $SINGLEMDS \
907 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
910 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
911 # MIN_MARGIN = 0.8 = 8 / 10
912 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
913 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
914 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
915 [ $SPEED -gt $MIN_SPEED ] ||[ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
916 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
918 # MAX_MARGIN = 1.2 = 12 / 10
919 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
920 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
921 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
922 [ $SPEED -lt $MAX_SPEED ] ||
923 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
925 do_facet $SINGLEMDS \
926 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
927 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
928 mdd.${MDT_DEV}.lfsck_namespace |
929 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
931 error "(11) unexpected status"
934 run_test 9b "LFSCK speed control (2)"
938 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
939 skip "lookup(..)/linkea on ZFS issue" && return
943 echo "Preparing more files with error at $(date)."
944 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
945 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
947 for ((i = 0; i < 1000; i = $((i+2)))); do
948 mkdir -p $DIR/$tdir/d${i}
949 touch $DIR/$tdir/f${i}
950 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
953 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
954 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
956 for ((i = 1; i < 1000; i = $((i+2)))); do
957 mkdir -p $DIR/$tdir/d${i}
958 touch $DIR/$tdir/f${i}
959 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
962 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
963 echo "Prepared at $(date)."
965 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
968 mount_client $MOUNT || error "(3) Fail to start client!"
970 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
973 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
974 [ "$STATUS" == "scanning-phase1" ] ||
975 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
977 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
979 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
981 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
983 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
985 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
987 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
989 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
991 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
992 error "(14) Fail to softlink!"
994 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
995 [ "$STATUS" == "scanning-phase1" ] ||
996 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
998 do_facet $SINGLEMDS \
999 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1000 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1001 mdd.${MDT_DEV}.lfsck_namespace |
1002 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1004 error "(16) unexpected status"
1007 run_test 10 "System is available during LFSCK scanning"
1010 ost_remove_lastid() {
1013 local rcmd="do_facet ost${ost}"
1015 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1017 # step 1: local mount
1018 mount_fstype ost${ost} || return 1
1019 # step 2: remove the specified LAST_ID
1020 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1022 unmount_fstype ost${ost} || return 2
1026 check_mount_and_prep
1027 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1028 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1033 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1035 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1036 error "(2) Fail to start ost1"
1038 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1039 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1041 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1042 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1044 wait_update_facet ost1 "$LCTL get_param -n \
1045 obdfilter.${OST_DEV}.lfsck_layout |
1046 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1048 error "(5) unexpected status"
1051 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1053 wait_update_facet ost1 "$LCTL get_param -n \
1054 obdfilter.${OST_DEV}.lfsck_layout |
1055 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1057 error "(6) unexpected status"
1060 echo "the LAST_ID(s) should have been rebuilt"
1061 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1062 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1064 run_test 11a "LFSCK can rebuild lost last_id"
1067 check_mount_and_prep
1068 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1070 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1071 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1072 do_facet ost1 $LCTL set_param fail_loc=0x160d
1073 createmany -o $DIR/$tdir/f 64
1074 local lastid1=$(do_facet ost1 "lctl get_param -n \
1075 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1076 awk -F: '{ print $2 }')
1078 umount_client $MOUNT
1079 stop ost1 || error "(1) Fail to stop ost1"
1081 #define OBD_FAIL_OST_ENOSPC 0x215
1082 do_facet ost1 $LCTL set_param fail_loc=0x215
1084 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1085 error "(2) Fail to start ost1"
1087 for ((i = 0; i < 60; i++)); do
1088 lastid2=$(do_facet ost1 "lctl get_param -n \
1089 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1090 awk -F: '{ print $2 }')
1091 [ ! -z $lastid2 ] && break;
1095 echo "the on-disk LAST_ID should be smaller than the expected one"
1096 [ $lastid1 -gt $lastid2 ] ||
1097 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1099 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1100 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1102 wait_update_facet ost1 "$LCTL get_param -n \
1103 obdfilter.${OST_DEV}.lfsck_layout |
1104 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1106 error "(6) unexpected status"
1109 stop ost1 || error "(7) Fail to stop ost1"
1111 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1112 error "(8) Fail to start ost1"
1114 echo "the on-disk LAST_ID should have been rebuilt"
1115 wait_update_facet ost1 "$LCTL get_param -n \
1116 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1117 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1118 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1119 error "(9) expect lastid1 0x100000000:$lastid1"
1122 do_facet ost1 $LCTL set_param fail_loc=0
1123 stopall || error "(10) Fail to stopall"
1125 run_test 11b "LFSCK can rebuild crashed last_id"
1128 [ $MDSCOUNT -lt 2 ] &&
1129 skip "We need at least 2 MDSes for test_12" && exit 0
1131 check_mount_and_prep
1132 for k in $(seq $MDSCOUNT); do
1133 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1134 createmany -o $DIR/$tdir/${k}/f 100 ||
1135 error "(0) Fail to create 100 files."
1138 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1139 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1140 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1142 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1143 for k in $(seq $MDSCOUNT); do
1144 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1145 mdd.$(facet_svc mds${k}).lfsck_namespace |
1146 awk '/^status/ { print $2 }')
1147 [ "$STATUS" == "scanning-phase1" ] ||
1148 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1151 echo "Stop namespace LFSCK on all targets by single lctl command."
1152 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1153 error "(4) Fail to stop LFSCK on all devices!"
1155 echo "All the LFSCK targets should be in 'stopped' status."
1156 for k in $(seq $MDSCOUNT); do
1157 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1158 mdd.$(facet_svc mds${k}).lfsck_namespace |
1159 awk '/^status/ { print $2 }')
1160 [ "$STATUS" == "stopped" ] ||
1161 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1164 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1165 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1166 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1168 echo "All the LFSCK targets should be in 'completed' status."
1169 for k in $(seq $MDSCOUNT); do
1170 wait_update_facet mds${k} "$LCTL get_param -n \
1171 mdd.$(facet_svc mds${k}).lfsck_namespace |
1172 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1173 error "(7) MDS${k} is not the expected 'completed'"
1176 echo "Start layout LFSCK on all targets by single command (-s 1)."
1177 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1178 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1180 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1181 for k in $(seq $MDSCOUNT); do
1182 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1183 mdd.$(facet_svc mds${k}).lfsck_layout |
1184 awk '/^status/ { print $2 }')
1185 [ "$STATUS" == "scanning-phase1" ] ||
1186 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1189 echo "Stop layout LFSCK on all targets by single lctl command."
1190 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1191 error "(10) Fail to stop LFSCK on all devices!"
1193 echo "All the LFSCK targets should be in 'stopped' status."
1194 for k in $(seq $MDSCOUNT); do
1195 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1196 mdd.$(facet_svc mds${k}).lfsck_layout |
1197 awk '/^status/ { print $2 }')
1198 [ "$STATUS" == "stopped" ] ||
1199 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1202 for k in $(seq $OSTCOUNT); do
1203 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1204 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1205 awk '/^status/ { print $2 }')
1206 [ "$STATUS" == "stopped" ] ||
1207 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1210 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1211 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1212 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1214 echo "All the LFSCK targets should be in 'completed' status."
1215 for k in $(seq $MDSCOUNT); do
1216 # The LFSCK status query internal is 30 seconds. For the case
1217 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1218 # time to guarantee the status sync up.
1219 wait_update_facet mds${k} "$LCTL get_param -n \
1220 mdd.$(facet_svc mds${k}).lfsck_layout |
1221 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1222 error "(14) MDS${k} is not the expected 'completed'"
1225 run_test 12 "single command to trigger LFSCK on all devices"
1229 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1230 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1231 echo "MDT-object FID."
1234 check_mount_and_prep
1236 echo "Inject failure stub to simulate bad lmm_oi"
1237 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1239 createmany -o $DIR/$tdir/f 32
1240 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1242 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1243 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1245 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1246 mdd.${MDT_DEV}.lfsck_layout |
1247 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1249 error "(2) unexpected status"
1252 local repaired=$($SHOW_LAYOUT |
1253 awk '/^repaired_others/ { print $2 }')
1254 [ $repaired -eq 32 ] ||
1255 error "(3) Fail to repair crashed lmm_oi: $repaired"
1257 run_test 13 "LFSCK can repair crashed lmm_oi"
1261 echo "The OST-object referenced by the MDT-object should be there;"
1262 echo "otherwise, the LFSCK should re-create the missed OST-object."
1265 check_mount_and_prep
1266 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1268 local count=$(precreated_ost_obj_count 0 0)
1270 echo "Inject failure stub to simulate dangling referenced MDT-object"
1271 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1272 do_facet ost1 $LCTL set_param fail_loc=0x1610
1273 createmany -o $DIR/$tdir/f $((count + 32))
1274 do_facet ost1 $LCTL set_param fail_loc=0
1276 # exhaust other pre-created dangling cases
1277 count=$(precreated_ost_obj_count 0 0)
1278 createmany -o $DIR/$tdir/a $count ||
1279 error "(0) Fail to create $count files."
1281 echo "'ls' should fail because of dangling referenced MDT-object"
1282 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1284 echo "Trigger layout LFSCK to find out dangling reference"
1285 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1287 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1288 mdd.${MDT_DEV}.lfsck_layout |
1289 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1291 error "(3) unexpected status"
1294 local repaired=$($SHOW_LAYOUT |
1295 awk '/^repaired_dangling/ { print $2 }')
1296 [ $repaired -ge 32 ] ||
1297 error "(4) Fail to repair dangling reference: $repaired"
1299 echo "'ls' should fail because it will not repair dangling by default"
1300 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
1302 echo "Trigger layout LFSCK to repair dangling reference"
1303 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1305 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1306 mdd.${MDT_DEV}.lfsck_layout |
1307 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1309 error "(7) unexpected status"
1312 repaired=$($SHOW_LAYOUT |
1313 awk '/^repaired_dangling/ { print $2 }')
1314 [ $repaired -ge 32 ] ||
1315 error "(8) Fail to repair dangling reference: $repaired"
1317 echo "'ls' should success after layout LFSCK repairing"
1318 ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success."
1320 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1324 echo "If the OST-object referenced by the MDT-object back points"
1325 echo "to some non-exist MDT-object, then the LFSCK should repair"
1326 echo "the OST-object to back point to the right MDT-object."
1329 check_mount_and_prep
1330 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1332 echo "Inject failure stub to make the OST-object to back point to"
1333 echo "non-exist MDT-object."
1334 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1336 do_facet ost1 $LCTL set_param fail_loc=0x1611
1337 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1338 cancel_lru_locks osc
1339 do_facet ost1 $LCTL set_param fail_loc=0
1341 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1342 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1344 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1345 mdd.${MDT_DEV}.lfsck_layout |
1346 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1348 error "(2) unexpected status"
1351 local repaired=$($SHOW_LAYOUT |
1352 awk '/^repaired_unmatched_pair/ { print $2 }')
1353 [ $repaired -eq 1 ] ||
1354 error "(3) Fail to repair unmatched pair: $repaired"
1356 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1360 echo "If the OST-object referenced by the MDT-object back points"
1361 echo "to other MDT-object that doesn't recognize the OST-object,"
1362 echo "then the LFSCK should repair it to back point to the right"
1363 echo "MDT-object (the first one)."
1366 check_mount_and_prep
1367 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1368 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1369 cancel_lru_locks osc
1371 echo "Inject failure stub to make the OST-object to back point to"
1372 echo "other MDT-object"
1374 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1375 do_facet ost1 $LCTL set_param fail_loc=0x1612
1376 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1377 cancel_lru_locks osc
1378 do_facet ost1 $LCTL set_param fail_loc=0
1380 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1381 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1383 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1384 mdd.${MDT_DEV}.lfsck_layout |
1385 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1387 error "(2) unexpected status"
1390 local repaired=$($SHOW_LAYOUT |
1391 awk '/^repaired_unmatched_pair/ { print $2 }')
1392 [ $repaired -eq 1 ] ||
1393 error "(3) Fail to repair unmatched pair: $repaired"
1395 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1399 echo "If the OST-object's owner information does not match the owner"
1400 echo "information stored in the MDT-object, then the LFSCK trust the"
1401 echo "MDT-object and update the OST-object's owner information."
1404 check_mount_and_prep
1405 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1406 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1407 cancel_lru_locks osc
1409 echo "Inject failure stub to skip OST-object owner changing"
1410 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1411 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1412 chown 1.1 $DIR/$tdir/f0
1413 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1415 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1418 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1420 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1421 mdd.${MDT_DEV}.lfsck_layout |
1422 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1424 error "(2) unexpected status"
1427 local repaired=$($SHOW_LAYOUT |
1428 awk '/^repaired_inconsistent_owner/ { print $2 }')
1429 [ $repaired -eq 1 ] ||
1430 error "(3) Fail to repair inconsistent owner: $repaired"
1432 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1436 echo "If more than one MDT-objects reference the same OST-object,"
1437 echo "and the OST-object only recognizes one MDT-object, then the"
1438 echo "LFSCK should create new OST-objects for such non-recognized"
1442 check_mount_and_prep
1443 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1445 echo "Inject failure stub to make two MDT-objects to refernce"
1446 echo "the OST-object"
1448 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1449 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1451 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1452 cancel_lru_locks osc
1454 createmany -o $DIR/$tdir/f 1
1456 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1458 cancel_lru_locks mdc
1459 cancel_lru_locks osc
1461 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1462 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1463 [ $size -eq 1048576 ] ||
1464 error "(1) f0 (wrong) size should be 1048576, but got $size"
1466 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1469 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1471 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1472 mdd.${MDT_DEV}.lfsck_layout |
1473 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1475 error "(3) unexpected status"
1478 local repaired=$($SHOW_LAYOUT |
1479 awk '/^repaired_multiple_referenced/ { print $2 }')
1480 [ $repaired -eq 1 ] ||
1481 error "(4) Fail to repair multiple references: $repaired"
1483 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1484 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1485 error "(5) Fail to write f0."
1486 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1487 [ $size -eq 1048576 ] ||
1488 error "(6) guard size should be 1048576, but got $size"
1490 run_test 17 "LFSCK can repair multiple references"
1494 echo "The target MDT-object is there, but related stripe information"
1495 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1496 echo "layout EA entries."
1499 check_mount_and_prep
1500 $LFS mkdir -i 0 $DIR/$tdir/a1
1501 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1502 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1504 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1506 $LFS path2fid $DIR/$tdir/a1/f1
1507 $LFS getstripe $DIR/$tdir/a1/f1
1509 if [ $MDSCOUNT -ge 2 ]; then
1510 $LFS mkdir -i 1 $DIR/$tdir/a2
1511 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1512 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1513 $LFS path2fid $DIR/$tdir/a2/f2
1514 $LFS getstripe $DIR/$tdir/a2/f2
1517 cancel_lru_locks osc
1519 echo "Inject failure, to make the MDT-object lost its layout EA"
1520 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1521 do_facet mds1 $LCTL set_param fail_loc=0x1615
1522 chown 1.1 $DIR/$tdir/a1/f1
1524 if [ $MDSCOUNT -ge 2 ]; then
1525 do_facet mds2 $LCTL set_param fail_loc=0x1615
1526 chown 1.1 $DIR/$tdir/a2/f2
1532 do_facet mds1 $LCTL set_param fail_loc=0
1533 if [ $MDSCOUNT -ge 2 ]; then
1534 do_facet mds2 $LCTL set_param fail_loc=0
1537 cancel_lru_locks mdc
1538 cancel_lru_locks osc
1540 echo "The file size should be incorrect since layout EA is lost"
1541 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1542 [ "$cur_size" != "$saved_size" ] ||
1543 error "(1) Expect incorrect file1 size"
1545 if [ $MDSCOUNT -ge 2 ]; then
1546 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1547 [ "$cur_size" != "$saved_size" ] ||
1548 error "(2) Expect incorrect file2 size"
1551 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1552 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1554 for k in $(seq $MDSCOUNT); do
1555 # The LFSCK status query internal is 30 seconds. For the case
1556 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1557 # time to guarantee the status sync up.
1558 wait_update_facet mds${k} "$LCTL get_param -n \
1559 mdd.$(facet_svc mds${k}).lfsck_layout |
1560 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1561 error "(4) MDS${k} is not the expected 'completed'"
1564 for k in $(seq $OSTCOUNT); do
1565 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1566 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1567 awk '/^status/ { print $2 }')
1568 [ "$cur_status" == "completed" ] ||
1569 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1572 local repaired=$(do_facet mds1 $LCTL get_param -n \
1573 mdd.$(facet_svc mds1).lfsck_layout |
1574 awk '/^repaired_orphan/ { print $2 }')
1575 [ $repaired -eq 1 ] ||
1576 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1578 if [ $MDSCOUNT -ge 2 ]; then
1579 repaired=$(do_facet mds2 $LCTL get_param -n \
1580 mdd.$(facet_svc mds2).lfsck_layout |
1581 awk '/^repaired_orphan/ { print $2 }')
1582 [ $repaired -eq 2 ] ||
1583 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1586 $LFS path2fid $DIR/$tdir/a1/f1
1587 $LFS getstripe $DIR/$tdir/a1/f1
1589 if [ $MDSCOUNT -ge 2 ]; then
1590 $LFS path2fid $DIR/$tdir/a2/f2
1591 $LFS getstripe $DIR/$tdir/a2/f2
1594 echo "The file size should be correct after layout LFSCK scanning"
1595 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1596 [ "$cur_size" == "$saved_size" ] ||
1597 error "(7) Expect file1 size $saved_size, but got $cur_size"
1599 if [ $MDSCOUNT -ge 2 ]; then
1600 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1601 [ "$cur_size" == "$saved_size" ] ||
1602 error "(8) Expect file2 size $saved_size, but got $cur_size"
1605 run_test 18a "Find out orphan OST-object and repair it (1)"
1609 echo "The target MDT-object is lost. The LFSCK should re-create the"
1610 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1611 echo "can move it back to normal namespace manually."
1614 check_mount_and_prep
1615 $LFS mkdir -i 0 $DIR/$tdir/a1
1616 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1617 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1618 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1619 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1621 $LFS getstripe $DIR/$tdir/a1/f1
1623 if [ $MDSCOUNT -ge 2 ]; then
1624 $LFS mkdir -i 1 $DIR/$tdir/a2
1625 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1626 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1627 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1629 $LFS getstripe $DIR/$tdir/a2/f2
1632 cancel_lru_locks osc
1634 echo "Inject failure, to simulate the case of missing the MDT-object"
1635 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1636 do_facet mds1 $LCTL set_param fail_loc=0x1616
1637 rm -f $DIR/$tdir/a1/f1
1639 if [ $MDSCOUNT -ge 2 ]; then
1640 do_facet mds2 $LCTL set_param fail_loc=0x1616
1641 rm -f $DIR/$tdir/a2/f2
1647 do_facet mds1 $LCTL set_param fail_loc=0
1648 if [ $MDSCOUNT -ge 2 ]; then
1649 do_facet mds2 $LCTL set_param fail_loc=0
1652 cancel_lru_locks mdc
1653 cancel_lru_locks osc
1655 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1656 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1658 for k in $(seq $MDSCOUNT); do
1659 # The LFSCK status query internal is 30 seconds. For the case
1660 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1661 # time to guarantee the status sync up.
1662 wait_update_facet mds${k} "$LCTL get_param -n \
1663 mdd.$(facet_svc mds${k}).lfsck_layout |
1664 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1665 error "(2) MDS${k} is not the expected 'completed'"
1668 for k in $(seq $OSTCOUNT); do
1669 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1670 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1671 awk '/^status/ { print $2 }')
1672 [ "$cur_status" == "completed" ] ||
1673 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1676 local repaired=$(do_facet mds1 $LCTL get_param -n \
1677 mdd.$(facet_svc mds1).lfsck_layout |
1678 awk '/^repaired_orphan/ { print $2 }')
1679 [ $repaired -eq 1 ] ||
1680 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1682 if [ $MDSCOUNT -ge 2 ]; then
1683 repaired=$(do_facet mds2 $LCTL get_param -n \
1684 mdd.$(facet_svc mds2).lfsck_layout |
1685 awk '/^repaired_orphan/ { print $2 }')
1686 [ $repaired -eq 2 ] ||
1687 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1690 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1691 mv $MOUNT/.lustre/lost+found/MDT0000/R-${fid1} $DIR/$tdir/a1/f1 ||
1692 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/R-${fid1}"
1694 if [ $MDSCOUNT -ge 2 ]; then
1695 local name=$MOUNT/.lustre/lost+found/MDT0001/R-${fid2}
1696 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1699 $LFS path2fid $DIR/$tdir/a1/f1
1700 $LFS getstripe $DIR/$tdir/a1/f1
1702 if [ $MDSCOUNT -ge 2 ]; then
1703 $LFS path2fid $DIR/$tdir/a2/f2
1704 $LFS getstripe $DIR/$tdir/a2/f2
1707 echo "The file size should be correct after layout LFSCK scanning"
1708 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1709 [ "$cur_size" == "$saved_size" ] ||
1710 error "(7) Expect file1 size $saved_size, but got $cur_size"
1712 if [ $MDSCOUNT -ge 2 ]; then
1713 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1714 [ "$cur_size" == "$saved_size" ] ||
1715 error "(8) Expect file2 size $saved_size, but got $cur_size"
1718 run_test 18b "Find out orphan OST-object and repair it (2)"
1722 echo "The target MDT-object is lost, and the OST-object FID is missing."
1723 echo "The LFSCK should re-create the MDT-object with new FID under the "
1724 echo "directory .lustre/lost+found/MDTxxxx."
1727 check_mount_and_prep
1728 $LFS mkdir -i 0 $DIR/$tdir/a1
1729 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1731 echo "Inject failure, to simulate the case of missing parent FID"
1732 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1733 do_facet ost1 $LCTL set_param fail_loc=0x1617
1735 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1736 $LFS getstripe $DIR/$tdir/a1/f1
1738 if [ $MDSCOUNT -ge 2 ]; then
1739 $LFS mkdir -i 1 $DIR/$tdir/a2
1740 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1741 do_facet ost2 $LCTL set_param fail_loc=0x1617
1742 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1743 $LFS getstripe $DIR/$tdir/a2/f2
1746 cancel_lru_locks osc
1748 echo "Inject failure, to simulate the case of missing the MDT-object"
1749 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1750 do_facet mds1 $LCTL set_param fail_loc=0x1616
1751 rm -f $DIR/$tdir/a1/f1
1753 if [ $MDSCOUNT -ge 2 ]; then
1754 do_facet mds2 $LCTL set_param fail_loc=0x1616
1755 rm -f $DIR/$tdir/a2/f2
1761 do_facet mds1 $LCTL set_param fail_loc=0
1762 if [ $MDSCOUNT -ge 2 ]; then
1763 do_facet mds2 $LCTL set_param fail_loc=0
1766 cancel_lru_locks mdc
1767 cancel_lru_locks osc
1769 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1770 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1772 for k in $(seq $MDSCOUNT); do
1773 # The LFSCK status query internal is 30 seconds. For the case
1774 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1775 # time to guarantee the status sync up.
1776 wait_update_facet mds${k} "$LCTL get_param -n \
1777 mdd.$(facet_svc mds${k}).lfsck_layout |
1778 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1779 error "(2) MDS${k} is not the expected 'completed'"
1782 for k in $(seq $OSTCOUNT); do
1783 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1784 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1785 awk '/^status/ { print $2 }')
1786 [ "$cur_status" == "completed" ] ||
1787 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1790 if [ $MDSCOUNT -ge 2 ]; then
1796 local repaired=$(do_facet mds1 $LCTL get_param -n \
1797 mdd.$(facet_svc mds1).lfsck_layout |
1798 awk '/^repaired_orphan/ { print $2 }')
1799 [ $repaired -eq $expected ] ||
1800 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1802 if [ $MDSCOUNT -ge 2 ]; then
1803 repaired=$(do_facet mds2 $LCTL get_param -n \
1804 mdd.$(facet_svc mds2).lfsck_layout |
1805 awk '/^repaired_orphan/ { print $2 }')
1806 [ $repaired -eq 0 ] ||
1807 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1810 echo "There should be some stub under .lustre/lost+found/MDT0001/"
1811 ls -ail $MOUNT/.lustre/lost+found/MDT0001/N-* &&
1812 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1814 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1815 ls -ail $MOUNT/.lustre/lost+found/MDT0000/N-* ||
1816 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
1818 run_test 18c "Find out orphan OST-object and repair it (3)"
1822 echo "The target MDT-object layout EA slot is occpuied by some new"
1823 echo "created OST-object when repair dangling reference case. Such"
1824 echo "conflict OST-object has never been modified. Then when found"
1825 echo "the orphan OST-object, LFSCK will replace it with the orphan"
1829 check_mount_and_prep
1831 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1832 echo "guard" > $DIR/$tdir/a1/f1
1833 echo "foo" > $DIR/$tdir/a1/f2
1834 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1835 $LFS path2fid $DIR/$tdir/a1/f1
1836 $LFS getstripe $DIR/$tdir/a1/f1
1837 $LFS path2fid $DIR/$tdir/a1/f2
1838 $LFS getstripe $DIR/$tdir/a1/f2
1839 cancel_lru_locks osc
1841 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1842 echo "to reference the same OST-object (which is f1's OST-obejct)."
1843 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1844 echo "dangling reference case, but f2's old OST-object is there."
1847 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1848 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1849 chown 1.1 $DIR/$tdir/a1/f2
1850 rm -f $DIR/$tdir/a1/f1
1853 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1855 echo "stopall to cleanup object cache"
1858 setupall > /dev/null
1860 echo "The file size should be incorrect since dangling referenced"
1861 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1862 [ "$cur_size" != "$saved_size" ] ||
1863 error "(1) Expect incorrect file2 size"
1865 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1866 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
1868 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1869 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1871 wait_update_facet mds1 "$LCTL get_param -n \
1872 mdd.$(facet_svc mds1).lfsck_layout |
1873 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1874 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
1876 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1878 for k in $(seq $MDSCOUNT); do
1879 # The LFSCK status query internal is 30 seconds. For the case
1880 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1881 # time to guarantee the status sync up.
1882 wait_update_facet mds${k} "$LCTL get_param -n \
1883 mdd.$(facet_svc mds${k}).lfsck_layout |
1884 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1885 error "(3) MDS${k} is not the expected 'completed'"
1888 for k in $(seq $OSTCOUNT); do
1889 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1890 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1891 awk '/^status/ { print $2 }')
1892 [ "$cur_status" == "completed" ] ||
1893 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
1896 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1897 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1898 awk '/^repaired_orphan/ { print $2 }')
1899 [ $repaired -eq 1 ] ||
1900 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
1902 echo "The file size should be correct after layout LFSCK scanning"
1903 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1904 [ "$cur_size" == "$saved_size" ] ||
1905 error "(6) Expect file2 size $saved_size, but got $cur_size"
1907 echo "The LFSCK should find back the original data."
1908 cat $DIR/$tdir/a1/f2
1909 $LFS path2fid $DIR/$tdir/a1/f2
1910 $LFS getstripe $DIR/$tdir/a1/f2
1912 run_test 18d "Find out orphan OST-object and repair it (4)"
1916 echo "The target MDT-object layout EA slot is occpuied by some new"
1917 echo "created OST-object when repair dangling reference case. Such"
1918 echo "conflict OST-object has been modified by others. To keep the"
1919 echo "new data, the LFSCK will create a new file to refernece this"
1920 echo "old orphan OST-object."
1923 check_mount_and_prep
1925 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1926 echo "guard" > $DIR/$tdir/a1/f1
1927 echo "foo" > $DIR/$tdir/a1/f2
1928 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1929 $LFS path2fid $DIR/$tdir/a1/f1
1930 $LFS getstripe $DIR/$tdir/a1/f1
1931 $LFS path2fid $DIR/$tdir/a1/f2
1932 $LFS getstripe $DIR/$tdir/a1/f2
1933 cancel_lru_locks osc
1935 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1936 echo "to reference the same OST-object (which is f1's OST-obejct)."
1937 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1938 echo "dangling reference case, but f2's old OST-object is there."
1941 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1942 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1943 chown 1.1 $DIR/$tdir/a1/f2
1944 rm -f $DIR/$tdir/a1/f1
1947 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1949 echo "stopall to cleanup object cache"
1952 setupall > /dev/null
1954 echo "The file size should be incorrect since dangling referenced"
1955 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1956 [ "$cur_size" != "$saved_size" ] ||
1957 error "(1) Expect incorrect file2 size"
1959 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1960 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
1962 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1963 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1965 wait_update_facet mds1 "$LCTL get_param -n \
1966 mdd.$(facet_svc mds1).lfsck_layout |
1967 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1968 error "(3) MDS1 is not the expected 'scanning-phase2'"
1970 # to guarantee all updates are synced.
1974 echo "Write new data to f2 to modify the new created OST-object."
1975 echo "dummy" >> $DIR/$tdir/a1/f2
1977 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1979 for k in $(seq $MDSCOUNT); do
1980 # The LFSCK status query internal is 30 seconds. For the case
1981 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1982 # time to guarantee the status sync up.
1983 wait_update_facet mds${k} "$LCTL get_param -n \
1984 mdd.$(facet_svc mds${k}).lfsck_layout |
1985 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1986 error "(4) MDS${k} is not the expected 'completed'"
1989 for k in $(seq $OSTCOUNT); do
1990 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1991 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1992 awk '/^status/ { print $2 }')
1993 [ "$cur_status" == "completed" ] ||
1994 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1997 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1998 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1999 awk '/^repaired_orphan/ { print $2 }')
2000 [ $repaired -eq 1 ] ||
2001 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2003 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2004 local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/C-*)
2006 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
2008 echo "The stub file should keep the original f2 data"
2009 cur_size=$(ls -il $cname | awk '{ print $6 }')
2010 [ "$cur_size" == "$saved_size" ] ||
2011 error "(8) Expect file2 size $saved_size, but got $cur_size"
2014 $LFS path2fid $cname
2015 $LFS getstripe $cname
2017 echo "The f2 should contains new data."
2018 cat $DIR/$tdir/a1/f2
2019 $LFS path2fid $DIR/$tdir/a1/f2
2020 $LFS getstripe $DIR/$tdir/a1/f2
2022 run_test 18e "Find out orphan OST-object and repair it (5)"
2025 check_mount_and_prep
2026 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2028 echo "foo" > $DIR/$tdir/a0
2029 echo "guard" > $DIR/$tdir/a1
2030 cancel_lru_locks osc
2032 echo "Inject failure, then client will offer wrong parent FID when read"
2033 do_facet ost1 $LCTL set_param -n \
2034 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2035 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2036 $LCTL set_param fail_loc=0x1619
2038 echo "Read RPC with wrong parent FID should be denied"
2039 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2040 $LCTL set_param fail_loc=0
2042 run_test 19a "OST-object inconsistency self detect"
2045 check_mount_and_prep
2046 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2048 echo "Inject failure stub to make the OST-object to back point to"
2049 echo "non-exist MDT-object"
2051 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2052 do_facet ost1 $LCTL set_param fail_loc=0x1611
2053 echo "foo" > $DIR/$tdir/f0
2054 cancel_lru_locks osc
2055 do_facet ost1 $LCTL set_param fail_loc=0
2057 echo "Nothing should be fixed since self detect and repair is disabled"
2058 local repaired=$(do_facet ost1 $LCTL get_param -n \
2059 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2060 awk '/^repaired/ { print $2 }')
2061 [ $repaired -eq 0 ] ||
2062 error "(1) Expected 0 repaired, but got $repaired"
2064 echo "Read RPC with right parent FID should be accepted,"
2065 echo "and cause parent FID on OST to be fixed"
2067 do_facet ost1 $LCTL set_param -n \
2068 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2069 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2071 repaired=$(do_facet ost1 $LCTL get_param -n \
2072 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2073 awk '/^repaired/ { print $2 }')
2074 [ $repaired -eq 1 ] ||
2075 error "(3) Expected 1 repaired, but got $repaired"
2077 run_test 19b "OST-object inconsistency self repair"
2080 [ $OSTCOUNT -lt 2 ] &&
2081 skip "The test needs at least 2 OSTs" && return
2084 echo "The target MDT-object and some of its OST-object are lost."
2085 echo "The LFSCK should find out the left OST-objects and re-create"
2086 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2087 echo "with the partial OST-objects (LOV EA hole)."
2089 echo "New client can access the file with LOV EA hole via normal"
2090 echo "system tools or commands without crash the system."
2092 echo "For old client, even though it cannot access the file with"
2093 echo "LOV EA hole, it should not cause the system crash."
2096 check_mount_and_prep
2097 $LFS mkdir -i 0 $DIR/$tdir/a1
2098 if [ $OSTCOUNT -gt 2 ]; then
2099 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
2102 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
2106 # 256 blocks on the stripe0.
2107 # 1 block on the stripe1 for 2 OSTs case.
2108 # 256 blocks on the stripe1 for other cases.
2109 # 1 block on the stripe2 if OSTs > 2
2110 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2111 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2112 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2114 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2115 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2116 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2119 $LFS getstripe $DIR/$tdir/a1/f0
2121 $LFS getstripe $DIR/$tdir/a1/f1
2123 $LFS getstripe $DIR/$tdir/a1/f2
2125 if [ $OSTCOUNT -gt 2 ]; then
2126 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2127 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2129 $LFS getstripe $DIR/$tdir/a1/f3
2132 cancel_lru_locks osc
2134 echo "Inject failure..."
2135 echo "To simulate f0 lost MDT-object"
2136 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2137 do_facet mds1 $LCTL set_param fail_loc=0x1616
2138 rm -f $DIR/$tdir/a1/f0
2140 echo "To simulate f1 lost MDT-object and OST-object0"
2141 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2142 do_facet mds1 $LCTL set_param fail_loc=0x161a
2143 rm -f $DIR/$tdir/a1/f1
2145 echo "To simulate f2 lost MDT-object and OST-object1"
2146 do_facet mds1 $LCTL set_param fail_val=1
2147 rm -f $DIR/$tdir/a1/f2
2149 if [ $OSTCOUNT -gt 2 ]; then
2150 echo "To simulate f3 lost MDT-object and OST-object2"
2151 do_facet mds1 $LCTL set_param fail_val=2
2152 rm -f $DIR/$tdir/a1/f3
2155 umount_client $MOUNT
2158 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2160 echo "Inject failure to slow down the LFSCK on OST0"
2161 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2162 do_facet ost1 $LCTL set_param fail_loc=0x161b
2164 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2165 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2168 do_facet ost1 $LCTL set_param fail_loc=0
2170 for k in $(seq $MDSCOUNT); do
2171 # The LFSCK status query internal is 30 seconds. For the case
2172 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2173 # time to guarantee the status sync up.
2174 wait_update_facet mds${k} "$LCTL get_param -n \
2175 mdd.$(facet_svc mds${k}).lfsck_layout |
2176 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2177 error "(2) MDS${k} is not the expected 'completed'"
2180 for k in $(seq $OSTCOUNT); do
2181 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2182 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2183 awk '/^status/ { print $2 }')
2184 [ "$cur_status" == "completed" ] ||
2185 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2188 local repaired=$(do_facet mds1 $LCTL get_param -n \
2189 mdd.$(facet_svc mds1).lfsck_layout |
2190 awk '/^repaired_orphan/ { print $2 }')
2191 if [ $OSTCOUNT -gt 2 ]; then
2192 [ $repaired -eq 9 ] ||
2193 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2195 [ $repaired -eq 4 ] ||
2196 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2199 mount_client $MOUNT || error "(5.0) Fail to start client!"
2201 LOV_PATTERN_F_HOLE=0x40000000
2204 # R-${fid0} is the old f0
2206 local name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid0}"
2207 echo "Check $name, which is the old f0"
2209 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2211 local pattern=0x$($LFS getstripe -L $name)
2212 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2213 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2215 local stripes=$($LFS getstripe -c $name)
2216 if [ $OSTCOUNT -gt 2 ]; then
2217 [ $stripes -eq 3 ] ||
2218 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2220 [ $stripes -eq 2 ] ||
2221 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2224 local size=$(stat $name | awk '/Size:/ { print $2 }')
2225 [ $size -eq $((4096 * $bcount)) ] ||
2226 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2228 cat $name > /dev/null || error "(5.5) cannot read $name"
2230 echo "dummy" >> $name || error "(5.6) cannot write $name"
2232 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2234 touch $name || error "(5.8) cannot touch $name"
2236 rm -f $name || error "(5.9) cannot unlink $name"
2239 # R-${fid1} contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2241 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid1}"
2242 if [ $OSTCOUNT -gt 2 ]; then
2243 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2245 echo "Check $name, it contains the old f1's stripe1"
2248 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2250 pattern=0x$($LFS getstripe -L $name)
2251 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2252 error "(6.2) expect pattern flag hole, but got $pattern"
2254 stripes=$($LFS getstripe -c $name)
2255 if [ $OSTCOUNT -gt 2 ]; then
2256 [ $stripes -eq 3 ] ||
2257 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2259 [ $stripes -eq 2 ] ||
2260 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2263 size=$(stat $name | awk '/Size:/ { print $2 }')
2264 [ $size -eq $((4096 * $bcount)) ] ||
2265 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2267 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2269 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2270 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2273 [ $failures -eq 256 ] ||
2274 error "(6.6) expect 256 IO failures, but get $failures"
2276 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2277 [ $size -eq $((4096 * $bcount)) ] ||
2278 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2280 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2281 error "(6.8) write to the LOV EA hole should fail"
2283 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2284 error "(6.9) write to normal stripe should NOT fail"
2286 echo "foo" >> $name && error "(6.10) append write $name should fail"
2288 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2290 touch $name || error "(6.12) cannot touch $name"
2292 rm -f $name || error "(6.13) cannot unlink $name"
2295 # R-${fid2} it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2297 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid2}"
2298 if [ $OSTCOUNT -gt 2 ]; then
2299 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2301 echo "Check $name, it contains the old f2's stripe0"
2304 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2306 pattern=0x$($LFS getstripe -L $name)
2307 stripes=$($LFS getstripe -c $name)
2308 size=$(stat $name | awk '/Size:/ { print $2 }')
2309 if [ $OSTCOUNT -gt 2 ]; then
2310 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2311 error "(7.2.1) expect pattern flag hole, but got $pattern"
2313 [ $stripes -eq 3 ] ||
2314 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2316 [ $size -eq $((4096 * $bcount)) ] ||
2317 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2319 cat $name > /dev/null &&
2320 error "(7.5.1) normal read $name should fail"
2322 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2323 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2325 [ $failures -eq 256 ] ||
2326 error "(7.6) expect 256 IO failures, but get $failures"
2328 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2329 [ $size -eq $((4096 * $bcount)) ] ||
2330 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2332 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2333 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2335 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2336 error "(7.8.1) write to normal stripe should NOT fail"
2338 echo "foo" >> $name &&
2339 error "(7.8.3) append write $name should fail"
2341 chown $RUNAS_ID:$RUNAS_GID $name ||
2342 error "(7.9.1) cannot chown on $name"
2344 touch $name || error "(7.10.1) cannot touch $name"
2346 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2347 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2349 [ $stripes -eq 1 ] ||
2350 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2353 [ $size -eq $((4096 * (256 + 0))) ] ||
2354 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2356 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2358 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2360 chown $RUNAS_ID:$RUNAS_GID $name ||
2361 error "(7.9.2) cannot chown on $name"
2363 touch $name || error "(7.10.2) cannot touch $name"
2366 rm -f $name || error "(7.11) cannot unlink $name"
2368 [ $OSTCOUNT -le 2 ] && return
2371 # R-${fid3} should contains the old f3's stripe0 and stripe1
2373 name="$MOUNT/.lustre/lost+found/MDT0000/R-${fid3}"
2374 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2376 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2378 pattern=0x$($LFS getstripe -L $name)
2379 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2380 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2382 stripes=$($LFS getstripe -c $name)
2383 # LFSCK does not know the old f3 had 3 stripes.
2384 # It only tries to find as much as possible.
2385 # The stripe count depends on the last stripe's offset.
2386 [ $stripes -eq 2 ] ||
2387 error "(8.3) expect the stripe count is 2, but got $stripes"
2389 size=$(stat $name | awk '/Size:/ { print $2 }')
2391 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2392 error "(8.4) expect the size $((4096 * 512)), but got $size"
2394 cat $name > /dev/null || error "(8.5) cannot read $name"
2396 echo "dummy" >> $name || error "(8.6) cannot write $name"
2398 chown $RUNAS_ID:$RUNAS_GID $name ||
2399 error "(8.7) cannot chown on $name"
2401 touch $name || error "(8.8) cannot touch $name"
2403 rm -f $name || error "(8.9) cannot unlink $name"
2405 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2407 $LCTL set_param debug=-lfsck > /dev/null || true
2409 # restore MDS/OST size
2410 MDSSIZE=${SAVED_MDSSIZE}
2411 OSTSIZE=${SAVED_OSTSIZE}
2412 OSTCOUNT=${SAVED_OSTCOUNT}
2414 # cleanup the system at last