3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_LFSCK_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 MCREATE=${MCREATE:-mcreate}
23 SAVED_MDSSIZE=${MDSSIZE}
24 SAVED_OSTSIZE=${OSTSIZE}
25 SAVED_OSTCOUNT=${OSTCOUNT}
26 # use small MDS + OST size to speed formatting time
27 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
30 # no need too much OSTs, to reduce the format/start/stop overhead
31 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
33 # build up a clean test environment.
37 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
38 skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
41 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.90) ]] &&
42 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
44 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
45 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19"
47 [ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
48 # bug number for skipped test: LU-4970
49 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 14"
53 $LCTL set_param debug=+lfsck > /dev/null || true
55 MDT_DEV="${FSNAME}-MDT0000"
56 OST_DEV="${FSNAME}-OST0000"
57 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
58 START_NAMESPACE="do_facet $SINGLEMDS \
59 $LCTL lfsck_start -M ${MDT_DEV} -t namespace"
60 START_LAYOUT="do_facet $SINGLEMDS \
61 $LCTL lfsck_start -M ${MDT_DEV} -t layout"
62 START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout"
63 STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
64 SHOW_NAMESPACE="do_facet $SINGLEMDS \
65 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace"
66 SHOW_LAYOUT="do_facet $SINGLEMDS \
67 $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout"
68 SHOW_LAYOUT_ON_OST="do_facet ost1 \
69 $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
70 MOUNT_OPTS_SCRUB="-o user_xattr"
71 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
80 echo "preparing... $nfiles * $ndirs files will be created $(date)."
81 if [ ! -z $igif ]; then
82 #define OBD_FAIL_FID_IGIF 0x1504
83 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
86 cp $LUSTRE/tests/*.sh $DIR/$tdir/
87 if [ $ndirs -gt 0 ]; then
88 createmany -d $DIR/$tdir/d $ndirs
89 createmany -m $DIR/$tdir/f $ndirs
90 if [ $nfiles -gt 0 ]; then
91 for ((i = 0; i < $ndirs; i++)); do
92 createmany -m $DIR/$tdir/d${i}/f $nfiles > \
93 /dev/null || error "createmany $nfiles"
96 createmany -d $DIR/$tdir/e $ndirs
99 if [ ! -z $igif ]; then
100 touch $DIR/$tdir/dummy
101 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
104 echo "prepared $(date)."
110 #define OBD_FAIL_LFSCK_DELAY1 0x1600
111 do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
112 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
114 $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
116 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
117 [ "$STATUS" == "scanning-phase1" ] ||
118 error "(4) Expect 'scanning-phase1', but got '$STATUS'"
120 $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
122 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
123 [ "$STATUS" == "stopped" ] ||
124 error "(6) Expect 'stopped', but got '$STATUS'"
126 $START_NAMESPACE || error "(7) Fail to start LFSCK for namespace!"
128 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
129 [ "$STATUS" == "scanning-phase1" ] ||
130 error "(8) Expect 'scanning-phase1', but got '$STATUS'"
132 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
133 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
134 mdd.${MDT_DEV}.lfsck_namespace |
135 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
137 error "(9) unexpected status"
140 local repaired=$($SHOW_NAMESPACE |
141 awk '/^updated_phase1/ { print $2 }')
142 [ $repaired -eq 0 ] ||
143 error "(10) Expect nothing to be repaired, but got: $repaired"
145 local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
146 $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
147 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
148 mdd.${MDT_DEV}.lfsck_namespace |
149 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
151 error "(12) unexpected status"
154 local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
155 [ $((scanned1 + 1)) -eq $scanned2 ] ||
156 error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
158 echo "stopall, should NOT crash LU-3649"
159 stopall || error "(14) Fail to stopall"
161 run_test 0 "Control LFSCK manually"
164 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
165 skip "OI Scrub not implemented for ZFS" && return
169 #define OBD_FAIL_FID_INDIR 0x1501
170 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
171 touch $DIR/$tdir/dummy
173 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
175 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
176 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
177 mdd.${MDT_DEV}.lfsck_namespace |
178 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
180 error "(4) unexpected status"
183 local repaired=$($SHOW_NAMESPACE |
184 awk '/^dirent_repaired/ { print $2 }')
185 # for interop with old server
186 [ -z "$repaired" ] &&
187 repaired=$($SHOW_NAMESPACE |
188 awk '/^updated_phase1/ { print $2 }')
190 [ $repaired -eq 1 ] ||
191 error "(5) Fail to repair crashed FID-in-dirent: $repaired"
193 mount_client $MOUNT || error "(6) Fail to start client!"
195 #define OBD_FAIL_FID_LOOKUP 0x1505
196 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
197 ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
199 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
201 run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
205 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
206 skip "OI Scrub not implemented for ZFS" && return
210 #define OBD_FAIL_FID_INLMA 0x1502
211 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
212 touch $DIR/$tdir/dummy
214 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
216 #define OBD_FAIL_FID_NOLMA 0x1506
217 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
218 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
219 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
220 mdd.${MDT_DEV}.lfsck_namespace |
221 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
223 error "(4) unexpected status"
226 local repaired=$($SHOW_NAMESPACE |
227 awk '/^dirent_repaired/ { print $2 }')
228 # for interop with old server
229 [ -z "$repaired" ] &&
230 repaired=$($SHOW_NAMESPACE |
231 awk '/^updated_phase1/ { print $2 }')
233 [ $repaired -eq 1 ] ||
234 error "(5) Fail to repair missed FID-in-LMA: $repaired"
236 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
237 mount_client $MOUNT || error "(6) Fail to start client!"
239 #define OBD_FAIL_FID_LOOKUP 0x1505
240 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
241 stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
243 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
245 run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
250 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
251 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
252 touch $DIR/$tdir/dummy
254 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
256 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
257 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
258 mdd.${MDT_DEV}.lfsck_namespace |
259 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
261 error "(4) unexpected status"
264 local repaired=$($SHOW_NAMESPACE |
265 awk '/^linkea_repaired/ { print $2 }')
266 # for interop with old server
267 [ -z "$repaired" ] &&
268 repaired=$($SHOW_NAMESPACE |
269 awk '/^updated_phase1/ { print $2 }')
271 [ $repaired -eq 1 ] ||
272 error "(5) Fail to repair crashed linkEA: $repaired"
274 mount_client $MOUNT || error "(6) Fail to start client!"
276 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
277 error "(7) Fail to stat $DIR/$tdir/dummy"
279 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
280 local dummyname=$($LFS fid2path $DIR $dummyfid)
281 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
282 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
284 run_test 2a "LFSCK can find out and repair crashed linkEA entry"
290 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
291 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
292 touch $DIR/$tdir/dummy
294 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
296 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
297 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
298 mdd.${MDT_DEV}.lfsck_namespace |
299 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
301 error "(4) unexpected status"
304 local repaired=$($SHOW_NAMESPACE |
305 awk '/^updated_phase2/ { print $2 }')
306 [ $repaired -eq 1 ] ||
307 error "(5) Fail to repair crashed linkEA: $repaired"
309 mount_client $MOUNT || error "(6) Fail to start client!"
311 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
312 error "(7) Fail to stat $DIR/$tdir/dummy"
314 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
315 local dummyname=$($LFS fid2path $DIR $dummyfid)
316 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
317 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
319 run_test 2b "LFSCK can find out and remove invalid linkEA entry"
325 #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605
326 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
327 touch $DIR/$tdir/dummy
329 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
331 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
332 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
333 mdd.${MDT_DEV}.lfsck_namespace |
334 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
336 error "(4) unexpected status"
339 local repaired=$($SHOW_NAMESPACE |
340 awk '/^updated_phase2/ { print $2 }')
341 [ $repaired -eq 1 ] ||
342 error "(5) Fail to repair crashed linkEA: $repaired"
344 mount_client $MOUNT || error "(6) Fail to start client!"
346 stat $DIR/$tdir/dummy | grep "Links: 1" > /dev/null ||
347 error "(7) Fail to stat $DIR/$tdir/dummy"
349 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
350 local dummyname=$($LFS fid2path $DIR $dummyfid)
351 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
352 error "(8) Fail to repair linkEA: $dummyfid $dummyname"
354 run_test 2c "LFSCK can find out and remove repeated linkEA entry"
358 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
359 skip "OI Scrub not implemented for ZFS" && return
362 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
363 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
365 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
366 echo "start $SINGLEMDS with disabling OI scrub"
367 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
368 error "(2) Fail to start MDS!"
370 #define OBD_FAIL_LFSCK_DELAY2 0x1601
371 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
372 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
373 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
374 mdd.${MDT_DEV}.lfsck_namespace |
375 awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
377 error "(5) unexpected status"
380 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
381 [ "$STATUS" == "scanning-phase1" ] ||
382 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
384 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
385 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
386 mdd.${MDT_DEV}.lfsck_namespace |
387 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
389 error "(7) unexpected status"
392 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
393 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
395 local repaired=$($SHOW_NAMESPACE |
396 awk '/^dirent_repaired/ { print $2 }')
397 # for interop with old server
398 [ -z "$repaired" ] &&
399 repaired=$($SHOW_NAMESPACE |
400 awk '/^updated_phase1/ { print $2 }')
402 [ $repaired -ge 9 ] ||
403 error "(9) Fail to re-generate FID-in-dirent: $repaired"
405 mount_client $MOUNT || error "(10) Fail to start client!"
407 #define OBD_FAIL_FID_LOOKUP 0x1505
408 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
409 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
410 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
412 run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
416 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
417 skip "OI Scrub not implemented for ZFS" && return
420 cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
421 stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
423 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
424 echo "start $SINGLEMDS with disabling OI scrub"
425 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
426 error "(2) Fail to start MDS!"
428 #define OBD_FAIL_LFSCK_DELAY2 0x1601
429 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
430 $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
431 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
432 mdd.${MDT_DEV}.lfsck_namespace |
433 awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
435 error "(5) unexpected status"
438 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
439 [ "$STATUS" == "scanning-phase1" ] ||
440 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
442 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
443 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
444 mdd.${MDT_DEV}.lfsck_namespace |
445 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
447 error "(7) unexpected status"
450 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
451 [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
453 local repaired=$($SHOW_NAMESPACE |
454 awk '/^dirent_repaired/ { print $2 }')
455 # for interop with old server
456 [ -z "$repaired" ] &&
457 repaired=$($SHOW_NAMESPACE |
458 awk '/^updated_phase1/ { print $2 }')
460 [ $repaired -ge 2 ] ||
461 error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
463 mount_client $MOUNT || error "(10) Fail to start client!"
465 #define OBD_FAIL_FID_LOOKUP 0x1505
466 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
467 stat $DIR/$tdir/dummy > /dev/null || error "(11) no FID-in-LMA."
469 ls $DIR/$tdir/ > /dev/null || error "(12) no FID-in-dirent."
471 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
472 local dummyfid=$($LFS path2fid $DIR/$tdir/dummy)
473 local dummyname=$($LFS fid2path $DIR $dummyfid)
474 [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
475 error "(13) Fail to generate linkEA: $dummyfid $dummyname"
477 run_test 5 "LFSCK can handle IGIF object upgrading"
482 #define OBD_FAIL_LFSCK_DELAY1 0x1600
483 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
484 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
486 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
487 [ "$STATUS" == "scanning-phase1" ] ||
488 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
490 # Sleep 3 sec to guarantee at least one object processed by LFSCK
492 # Fail the LFSCK to guarantee there is at least one checkpoint
493 #define OBD_FAIL_LFSCK_FATAL1 0x1608
494 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
495 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
496 mdd.${MDT_DEV}.lfsck_namespace |
497 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
499 error "(4) unexpected status"
502 local POS0=$($SHOW_NAMESPACE |
503 awk '/^last_checkpoint_position/ { print $2 }' |
506 #define OBD_FAIL_LFSCK_DELAY1 0x1600
507 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
508 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
510 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
511 [ "$STATUS" == "scanning-phase1" ] ||
512 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
514 local POS1=$($SHOW_NAMESPACE |
515 awk '/^latest_start_position/ { print $2 }' |
517 [ $POS0 -lt $POS1 ] ||
518 error "(7) Expect larger than: $POS0, but got $POS1"
520 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
521 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
522 mdd.${MDT_DEV}.lfsck_namespace |
523 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
525 error "(8) unexpected status"
528 run_test 6a "LFSCK resumes from last checkpoint (1)"
533 #define OBD_FAIL_LFSCK_DELAY2 0x1601
534 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
535 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
537 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
538 [ "$STATUS" == "scanning-phase1" ] ||
539 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
541 # Sleep 5 sec to guarantee that we are in the directory scanning
543 # Fail the LFSCK to guarantee there is at least one checkpoint
544 #define OBD_FAIL_LFSCK_FATAL2 0x1609
545 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
546 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
547 mdd.${MDT_DEV}.lfsck_namespace |
548 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
550 error "(4) unexpected status"
553 local O_POS0=$($SHOW_NAMESPACE |
554 awk '/^last_checkpoint_position/ { print $2 }' |
557 local D_POS0=$($SHOW_NAMESPACE |
558 awk '/^last_checkpoint_position/ { print $4 }')
560 #define OBD_FAIL_LFSCK_DELAY2 0x1601
561 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
562 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
564 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
565 [ "$STATUS" == "scanning-phase1" ] ||
566 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
568 local O_POS1=$($SHOW_NAMESPACE |
569 awk '/^latest_start_position/ { print $2 }' |
571 local D_POS1=$($SHOW_NAMESPACE |
572 awk '/^latest_start_position/ { print $4 }')
574 if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
575 [ $O_POS0 -lt $O_POS1 ] ||
576 error "(7.1) $O_POS1 is not larger than $O_POS0"
578 [ $D_POS0 -lt $D_POS1 ] ||
579 error "(7.2) $D_POS1 is not larger than $D_POS0"
582 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
583 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
584 mdd.${MDT_DEV}.lfsck_namespace |
585 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
587 error "(8) unexpected status"
590 run_test 6b "LFSCK resumes from last checkpoint (2)"
597 #define OBD_FAIL_LFSCK_DELAY2 0x1601
598 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
599 $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
601 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
602 [ "$STATUS" == "scanning-phase1" ] ||
603 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
605 # Sleep 3 sec to guarantee at least one object processed by LFSCK
607 echo "stop $SINGLEMDS"
608 stop $SINGLEMDS > /dev/null || error "(4) Fail to stop MDS!"
610 echo "start $SINGLEMDS"
611 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
612 error "(5) Fail to start MDS!"
614 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
615 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
616 mdd.${MDT_DEV}.lfsck_namespace |
617 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
619 error "(6) unexpected status"
622 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
628 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
629 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
630 for ((i = 0; i < 20; i++)); do
631 touch $DIR/$tdir/dummy${i}
634 #define OBD_FAIL_LFSCK_DELAY3 0x1602
635 do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
636 $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
637 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
638 mdd.${MDT_DEV}.lfsck_namespace |
639 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
641 error "(4) unexpected status"
644 echo "stop $SINGLEMDS"
645 stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
647 echo "start $SINGLEMDS"
648 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
649 error "(6) Fail to start MDS!"
651 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
652 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
653 mdd.${MDT_DEV}.lfsck_namespace |
654 awk '/^status/ { print \\\$2 }'" "completed" 30 || {
656 error "(7) unexpected status"
659 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
664 formatall > /dev/null
670 local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
671 [ "$STATUS" == "init" ] ||
672 error "(2) Expect 'init', but got '$STATUS'"
674 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
675 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
676 mkdir $DIR/$tdir/crashed
678 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
679 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
680 for ((i = 0; i < 5; i++)); do
681 touch $DIR/$tdir/dummy${i}
684 umount_client $MOUNT || error "(3) Fail to stop client!"
686 #define OBD_FAIL_LFSCK_DELAY2 0x1601
687 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
688 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
690 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
691 [ "$STATUS" == "scanning-phase1" ] ||
692 error "(5) Expect 'scanning-phase1', but got '$STATUS'"
694 $STOP_LFSCK || error "(6) Fail to stop LFSCK!"
696 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
697 [ "$STATUS" == "stopped" ] ||
698 error "(7) Expect 'stopped', but got '$STATUS'"
700 $START_NAMESPACE || error "(8) Fail to start LFSCK for namespace!"
702 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
703 [ "$STATUS" == "scanning-phase1" ] ||
704 error "(9) Expect 'scanning-phase1', but got '$STATUS'"
706 #define OBD_FAIL_LFSCK_FATAL2 0x1609
707 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
708 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
709 mdd.${MDT_DEV}.lfsck_namespace |
710 awk '/^status/ { print \\\$2 }'" "failed" 6 || {
712 error "(10) unexpected status"
715 #define OBD_FAIL_LFSCK_DELAY1 0x1600
716 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
717 $START_NAMESPACE || error "(11) Fail to start LFSCK for namespace!"
719 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
720 [ "$STATUS" == "scanning-phase1" ] ||
721 error "(12) Expect 'scanning-phase1', but got '$STATUS'"
723 #define OBD_FAIL_LFSCK_CRASH 0x160a
724 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160a
727 echo "stop $SINGLEMDS"
728 stop $SINGLEMDS > /dev/null || error "(13) Fail to stop MDS!"
730 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
731 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
733 echo "start $SINGLEMDS"
734 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
735 error "(14) Fail to start MDS!"
737 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
738 [ "$STATUS" == "crashed" ] ||
739 error "(15) Expect 'crashed', but got '$STATUS'"
741 #define OBD_FAIL_LFSCK_DELAY2 0x1601
742 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
743 $START_NAMESPACE || error "(16) Fail to start LFSCK for namespace!"
745 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
746 [ "$STATUS" == "scanning-phase1" ] ||
747 error "(17) Expect 'scanning-phase1', but got '$STATUS'"
749 echo "stop $SINGLEMDS"
750 stop $SINGLEMDS > /dev/null || error "(18) Fail to stop MDS!"
752 #define OBD_FAIL_LFSCK_NO_AUTO 0x160b
753 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160b
755 echo "start $SINGLEMDS"
756 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
757 error "(19) Fail to start MDS!"
759 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
760 [ "$STATUS" == "paused" ] ||
761 error "(20) Expect 'paused', but got '$STATUS'"
763 #define OBD_FAIL_LFSCK_DELAY3 0x1602
764 do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
766 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
767 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
768 mdd.${MDT_DEV}.lfsck_namespace |
769 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
771 error "(22) unexpected status"
774 local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
775 [ "$FLAGS" == "scanned-once,inconsistent" ] ||
776 error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
778 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
779 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
780 mdd.${MDT_DEV}.lfsck_namespace |
781 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
783 error "(24) unexpected status"
786 FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
787 [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
789 run_test 8 "LFSCK state machine"
792 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
793 skip "Testing on UP system, the speed may be inaccurate."
799 local BASE_SPEED1=100
801 $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
804 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
805 [ "$STATUS" == "scanning-phase1" ] ||
806 error "(3) Expect 'scanning-phase1', but got '$STATUS'"
808 local SPEED=$($SHOW_NAMESPACE |
809 awk '/^average_speed_phase1/ { print $2 }')
811 # There may be time error, normally it should be less than 2 seconds.
812 # We allow another 20% schedule error.
814 # MAX_MARGIN = 1.2 = 12 / 10
815 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
816 RUN_TIME1 * 12 / 10))
817 [ $SPEED -lt $MAX_SPEED ] ||
818 error "(4) Got speed $SPEED, expected less than $MAX_SPEED"
821 local BASE_SPEED2=300
823 do_facet $SINGLEMDS \
824 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
827 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase1/ { print $2 }')
828 # MIN_MARGIN = 0.8 = 8 / 10
829 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
830 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
831 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
832 # Account for slow ZFS performance - LU-4934
833 [ $SPEED -gt $MIN_SPEED ] || [ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
834 error "(5) Got speed $SPEED, expected more than $MIN_SPEED"
836 # MAX_MARGIN = 1.2 = 12 / 10
837 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
838 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
839 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
840 [ $SPEED -lt $MAX_SPEED ] ||
841 error "(6) Got speed $SPEED, expected less than $MAX_SPEED"
843 do_facet $SINGLEMDS \
844 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
846 wait_update_facet $SINGLEMDS \
847 "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\
848 awk '/^status/ { print \\\$2 }'" "completed" 30 ||
849 error "(7) Failed to get expected 'completed'"
851 run_test 9a "LFSCK speed control (1)"
854 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
855 skip "Testing on UP system, the speed may be inaccurate."
861 echo "Preparing another 50 * 50 files (with error) at $(date)."
862 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
863 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
864 createmany -d $DIR/$tdir/d 50
865 createmany -m $DIR/$tdir/f 50
866 for ((i = 0; i < 50; i++)); do
867 createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
870 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c
871 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
872 $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
873 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
874 mdd.${MDT_DEV}.lfsck_namespace |
875 awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
877 error "(5) unexpected status"
880 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
881 echo "Prepared at $(date)."
885 $START_NAMESPACE -s $BASE_SPEED1 || error "(6) Fail to start LFSCK!"
888 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
889 [ "$STATUS" == "scanning-phase2" ] ||
890 error "(7) Expect 'scanning-phase2', but got '$STATUS'"
892 local SPEED=$($SHOW_NAMESPACE |
893 awk '/^average_speed_phase2/ { print $2 }')
894 # There may be time error, normally it should be less than 2 seconds.
895 # We allow another 20% schedule error.
897 # MAX_MARGIN = 1.2 = 12 / 10
898 local MAX_SPEED=$((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) / \
899 RUN_TIME1 * 12 / 10))
900 [ $SPEED -lt $MAX_SPEED ] ||
901 error "(8) Got speed $SPEED, expected less than $MAX_SPEED"
904 local BASE_SPEED2=150
906 do_facet $SINGLEMDS \
907 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit $BASE_SPEED2
910 SPEED=$($SHOW_NAMESPACE | awk '/^average_speed_phase2/ { print $2 }')
911 # MIN_MARGIN = 0.8 = 8 / 10
912 local MIN_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
913 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
914 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
915 [ $SPEED -gt $MIN_SPEED ] ||[ $(facet_fstype $SINGLEMDS) -eq zfs ] ||
916 error "(9) Got speed $SPEED, expected more than $MIN_SPEED"
918 # MAX_MARGIN = 1.2 = 12 / 10
919 MAX_SPEED=$(((BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
920 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
921 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
922 [ $SPEED -lt $MAX_SPEED ] ||
923 error "(10) Got speed $SPEED, expected less than $MAX_SPEED"
925 do_facet $SINGLEMDS \
926 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
927 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
928 mdd.${MDT_DEV}.lfsck_namespace |
929 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
931 error "(11) unexpected status"
934 run_test 9b "LFSCK speed control (2)"
938 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
939 skip "lookup(..)/linkea on ZFS issue" && return
943 echo "Preparing more files with error at $(date)."
944 #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603
945 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
947 for ((i = 0; i < 1000; i = $((i+2)))); do
948 mkdir -p $DIR/$tdir/d${i}
949 touch $DIR/$tdir/f${i}
950 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
953 #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604
954 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
956 for ((i = 1; i < 1000; i = $((i+2)))); do
957 mkdir -p $DIR/$tdir/d${i}
958 touch $DIR/$tdir/f${i}
959 createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
962 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
963 echo "Prepared at $(date)."
965 ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
968 mount_client $MOUNT || error "(3) Fail to start client!"
970 $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
973 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
974 [ "$STATUS" == "scanning-phase1" ] ||
975 error "(6) Expect 'scanning-phase1', but got '$STATUS'"
977 ls -ailR $MOUNT > /dev/null || error "(7) Fail to ls!"
979 touch $DIR/$tdir/d198/a0 || error "(8) Fail to touch!"
981 mkdir $DIR/$tdir/d199/a1 || error "(9) Fail to mkdir!"
983 unlink $DIR/$tdir/f200 || error "(10) Fail to unlink!"
985 rm -rf $DIR/$tdir/d201 || error "(11) Fail to rmdir!"
987 mv $DIR/$tdir/f202 $DIR/$tdir/d203/ || error "(12) Fail to rename!"
989 ln $DIR/$tdir/f204 $DIR/$tdir/d205/a3 || error "(13) Fail to hardlink!"
991 ln -s $DIR/$tdir/d206 $DIR/$tdir/d207/a4 ||
992 error "(14) Fail to softlink!"
994 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
995 [ "$STATUS" == "scanning-phase1" ] ||
996 error "(15) Expect 'scanning-phase1', but got '$STATUS'"
998 do_facet $SINGLEMDS \
999 $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
1000 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1001 mdd.${MDT_DEV}.lfsck_namespace |
1002 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1004 error "(16) unexpected status"
1007 run_test 10 "System is available during LFSCK scanning"
1010 ost_remove_lastid() {
1013 local rcmd="do_facet ost${ost}"
1015 echo "remove LAST_ID on ost${ost}: idx=${idx}"
1017 # step 1: local mount
1018 mount_fstype ost${ost} || return 1
1019 # step 2: remove the specified LAST_ID
1020 ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0}
1022 unmount_fstype ost${ost} || return 2
1026 check_mount_and_prep
1027 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1028 createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files."
1033 ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID"
1035 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
1036 error "(2) Fail to start ost1"
1038 #define OBD_FAIL_LFSCK_DELAY4 0x160e
1039 do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e
1041 echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)"
1042 $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!"
1044 wait_update_facet ost1 "$LCTL get_param -n \
1045 obdfilter.${OST_DEV}.lfsck_layout |
1046 awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || {
1048 error "(5) unexpected status"
1051 do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0
1053 wait_update_facet ost1 "$LCTL get_param -n \
1054 obdfilter.${OST_DEV}.lfsck_layout |
1055 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1057 error "(6) unexpected status"
1060 echo "the LAST_ID(s) should have been rebuilt"
1061 FLAGS=$($SHOW_LAYOUT_ON_OST | awk '/^flags/ { print $2 }')
1062 [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'"
1064 run_test 11a "LFSCK can rebuild lost last_id"
1067 check_mount_and_prep
1068 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1070 echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk"
1071 #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d
1072 do_facet ost1 $LCTL set_param fail_loc=0x160d
1073 createmany -o $DIR/$tdir/f 64
1074 local lastid1=$(do_facet ost1 "lctl get_param -n \
1075 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1076 awk -F: '{ print $2 }')
1078 umount_client $MOUNT
1079 stop ost1 || error "(1) Fail to stop ost1"
1081 #define OBD_FAIL_OST_ENOSPC 0x215
1082 do_facet ost1 $LCTL set_param fail_loc=0x215
1084 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1085 error "(2) Fail to start ost1"
1087 for ((i = 0; i < 60; i++)); do
1088 lastid2=$(do_facet ost1 "lctl get_param -n \
1089 obdfilter.${ost1_svc}.last_id" | grep 0x100000000 |
1090 awk -F: '{ print $2 }')
1091 [ ! -z $lastid2 ] && break;
1095 echo "the on-disk LAST_ID should be smaller than the expected one"
1096 [ $lastid1 -gt $lastid2 ] ||
1097 error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
1099 echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
1100 $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
1102 wait_update_facet ost1 "$LCTL get_param -n \
1103 obdfilter.${OST_DEV}.lfsck_layout |
1104 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1106 error "(6) unexpected status"
1109 stop ost1 || error "(7) Fail to stop ost1"
1111 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1112 error "(8) Fail to start ost1"
1114 echo "the on-disk LAST_ID should have been rebuilt"
1115 wait_update_facet ost1 "$LCTL get_param -n \
1116 obdfilter.${ost1_svc}.last_id | grep 0x100000000 |
1117 awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
1118 $LCTL get_param -n obdfilter.${ost1_svc}.last_id
1119 error "(9) expect lastid1 0x100000000:$lastid1"
1122 do_facet ost1 $LCTL set_param fail_loc=0
1123 stopall || error "(10) Fail to stopall"
1125 run_test 11b "LFSCK can rebuild crashed last_id"
1128 [ $MDSCOUNT -lt 2 ] &&
1129 skip "We need at least 2 MDSes for test_12" && exit 0
1131 check_mount_and_prep
1132 for k in $(seq $MDSCOUNT); do
1133 $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k}
1134 createmany -o $DIR/$tdir/${k}/f 100 ||
1135 error "(0) Fail to create 100 files."
1138 echo "Start namespace LFSCK on all targets by single command (-s 1)."
1139 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1140 -s 1 -r || error "(2) Fail to start LFSCK on all devices!"
1142 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1143 for k in $(seq $MDSCOUNT); do
1144 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1145 mdd.$(facet_svc mds${k}).lfsck_namespace |
1146 awk '/^status/ { print $2 }')
1147 [ "$STATUS" == "scanning-phase1" ] ||
1148 error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1151 echo "Stop namespace LFSCK on all targets by single lctl command."
1152 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1153 error "(4) Fail to stop LFSCK on all devices!"
1155 echo "All the LFSCK targets should be in 'stopped' status."
1156 for k in $(seq $MDSCOUNT); do
1157 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1158 mdd.$(facet_svc mds${k}).lfsck_namespace |
1159 awk '/^status/ { print $2 }')
1160 [ "$STATUS" == "stopped" ] ||
1161 error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
1164 echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
1165 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
1166 -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
1168 echo "All the LFSCK targets should be in 'completed' status."
1169 for k in $(seq $MDSCOUNT); do
1170 wait_update_facet mds${k} "$LCTL get_param -n \
1171 mdd.$(facet_svc mds${k}).lfsck_namespace |
1172 awk '/^status/ { print \\\$2 }'" "completed" 8 ||
1173 error "(7) MDS${k} is not the expected 'completed'"
1176 echo "Start layout LFSCK on all targets by single command (-s 1)."
1177 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1178 -s 1 -r || error "(8) Fail to start LFSCK on all devices!"
1180 echo "All the LFSCK targets should be in 'scanning-phase1' status."
1181 for k in $(seq $MDSCOUNT); do
1182 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1183 mdd.$(facet_svc mds${k}).lfsck_layout |
1184 awk '/^status/ { print $2 }')
1185 [ "$STATUS" == "scanning-phase1" ] ||
1186 error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
1189 echo "Stop layout LFSCK on all targets by single lctl command."
1190 do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
1191 error "(10) Fail to stop LFSCK on all devices!"
1193 echo "All the LFSCK targets should be in 'stopped' status."
1194 for k in $(seq $MDSCOUNT); do
1195 local STATUS=$(do_facet mds${k} $LCTL get_param -n \
1196 mdd.$(facet_svc mds${k}).lfsck_layout |
1197 awk '/^status/ { print $2 }')
1198 [ "$STATUS" == "stopped" ] ||
1199 error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
1202 for k in $(seq $OSTCOUNT); do
1203 local STATUS=$(do_facet ost${k} $LCTL get_param -n \
1204 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1205 awk '/^status/ { print $2 }')
1206 [ "$STATUS" == "stopped" ] ||
1207 error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
1210 echo "Re-start layout LFSCK on all targets by single command (-s 0)."
1211 do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
1212 -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
1214 echo "All the LFSCK targets should be in 'completed' status."
1215 for k in $(seq $MDSCOUNT); do
1216 # The LFSCK status query internal is 30 seconds. For the case
1217 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1218 # time to guarantee the status sync up.
1219 wait_update_facet mds${k} "$LCTL get_param -n \
1220 mdd.$(facet_svc mds${k}).lfsck_layout |
1221 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1222 error "(14) MDS${k} is not the expected 'completed'"
1225 run_test 12 "single command to trigger LFSCK on all devices"
1229 echo "The lmm_oi in layout EA should be consistent with the MDT-object"
1230 echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the"
1231 echo "MDT-object FID."
1234 check_mount_and_prep
1236 echo "Inject failure stub to simulate bad lmm_oi"
1237 #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f
1238 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f
1239 createmany -o $DIR/$tdir/f 32
1240 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1242 echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them"
1243 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1245 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1246 mdd.${MDT_DEV}.lfsck_layout |
1247 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1249 error "(2) unexpected status"
1252 local repaired=$($SHOW_LAYOUT |
1253 awk '/^repaired_others/ { print $2 }')
1254 [ $repaired -eq 32 ] ||
1255 error "(3) Fail to repair crashed lmm_oi: $repaired"
1257 run_test 13 "LFSCK can repair crashed lmm_oi"
1261 echo "The OST-object referenced by the MDT-object should be there;"
1262 echo "otherwise, the LFSCK should re-create the missed OST-object."
1265 check_mount_and_prep
1266 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1268 local count=$(precreated_ost_obj_count 0 0)
1270 echo "Inject failure stub to simulate dangling referenced MDT-object"
1271 #define OBD_FAIL_LFSCK_DANGLING 0x1610
1272 do_facet ost1 $LCTL set_param fail_loc=0x1610
1273 createmany -o $DIR/$tdir/f $((count + 32))
1274 do_facet ost1 $LCTL set_param fail_loc=0
1276 start_full_debug_logging
1278 # exhaust other pre-created dangling cases
1279 count=$(precreated_ost_obj_count 0 0)
1280 createmany -o $DIR/$tdir/a $count ||
1281 error "(0) Fail to create $count files."
1283 echo "'ls' should fail because of dangling referenced MDT-object"
1284 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
1286 echo "Trigger layout LFSCK to find out dangling reference"
1287 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1289 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1290 mdd.${MDT_DEV}.lfsck_layout |
1291 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1293 error "(3) unexpected status"
1296 local repaired=$($SHOW_LAYOUT |
1297 awk '/^repaired_dangling/ { print $2 }')
1298 [ $repaired -ge 32 ] ||
1299 error "(4) Fail to repair dangling reference: $repaired"
1301 echo "'ls' should fail because it will not repair dangling by default"
1302 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
1304 echo "Trigger layout LFSCK to repair dangling reference"
1305 $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
1307 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1308 mdd.${MDT_DEV}.lfsck_layout |
1309 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1311 error "(7) unexpected status"
1314 repaired=$($SHOW_LAYOUT |
1315 awk '/^repaired_dangling/ { print $2 }')
1316 [ $repaired -ge 32 ] ||
1317 error "(8) Fail to repair dangling reference: $repaired"
1319 echo "'ls' should success after layout LFSCK repairing"
1320 ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success."
1321 stop_full_debug_logging
1323 run_test 14 "LFSCK can repair MDT-object with dangling reference"
1327 echo "If the OST-object referenced by the MDT-object back points"
1328 echo "to some non-exist MDT-object, then the LFSCK should repair"
1329 echo "the OST-object to back point to the right MDT-object."
1332 check_mount_and_prep
1333 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1335 echo "Inject failure stub to make the OST-object to back point to"
1336 echo "non-exist MDT-object."
1337 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
1339 do_facet ost1 $LCTL set_param fail_loc=0x1611
1340 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1341 cancel_lru_locks osc
1342 do_facet ost1 $LCTL set_param fail_loc=0
1344 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1345 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1347 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1348 mdd.${MDT_DEV}.lfsck_layout |
1349 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1351 error "(2) unexpected status"
1354 local repaired=$($SHOW_LAYOUT |
1355 awk '/^repaired_unmatched_pair/ { print $2 }')
1356 [ $repaired -eq 1 ] ||
1357 error "(3) Fail to repair unmatched pair: $repaired"
1359 run_test 15a "LFSCK can repair unmatched MDT-object/OST-object pairs (1)"
1363 echo "If the OST-object referenced by the MDT-object back points"
1364 echo "to other MDT-object that doesn't recognize the OST-object,"
1365 echo "then the LFSCK should repair it to back point to the right"
1366 echo "MDT-object (the first one)."
1369 check_mount_and_prep
1370 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1371 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1372 cancel_lru_locks osc
1374 echo "Inject failure stub to make the OST-object to back point to"
1375 echo "other MDT-object"
1377 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
1378 do_facet ost1 $LCTL set_param fail_loc=0x1612
1379 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1380 cancel_lru_locks osc
1381 do_facet ost1 $LCTL set_param fail_loc=0
1383 echo "Trigger layout LFSCK to find out unmatched pairs and fix them"
1384 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1386 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1387 mdd.${MDT_DEV}.lfsck_layout |
1388 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1390 error "(2) unexpected status"
1393 local repaired=$($SHOW_LAYOUT |
1394 awk '/^repaired_unmatched_pair/ { print $2 }')
1395 [ $repaired -eq 1 ] ||
1396 error "(3) Fail to repair unmatched pair: $repaired"
1398 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
1402 echo "If the OST-object's owner information does not match the owner"
1403 echo "information stored in the MDT-object, then the LFSCK trust the"
1404 echo "MDT-object and update the OST-object's owner information."
1407 check_mount_and_prep
1408 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1409 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
1410 cancel_lru_locks osc
1412 echo "Inject failure stub to skip OST-object owner changing"
1413 #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613
1414 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
1415 chown 1.1 $DIR/$tdir/f0
1416 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1418 echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
1421 $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!"
1423 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1424 mdd.${MDT_DEV}.lfsck_layout |
1425 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1427 error "(2) unexpected status"
1430 local repaired=$($SHOW_LAYOUT |
1431 awk '/^repaired_inconsistent_owner/ { print $2 }')
1432 [ $repaired -eq 1 ] ||
1433 error "(3) Fail to repair inconsistent owner: $repaired"
1435 run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
1439 echo "If more than one MDT-objects reference the same OST-object,"
1440 echo "and the OST-object only recognizes one MDT-object, then the"
1441 echo "LFSCK should create new OST-objects for such non-recognized"
1445 check_mount_and_prep
1446 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1448 echo "Inject failure stub to make two MDT-objects to refernce"
1449 echo "the OST-object"
1451 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614
1452 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614
1454 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
1455 cancel_lru_locks osc
1457 createmany -o $DIR/$tdir/f 1
1459 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
1461 cancel_lru_locks mdc
1462 cancel_lru_locks osc
1464 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
1465 local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
1466 [ $size -eq 1048576 ] ||
1467 error "(1) f0 (wrong) size should be 1048576, but got $size"
1469 echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
1472 $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
1474 wait_update_facet $SINGLEMDS "$LCTL get_param -n \
1475 mdd.${MDT_DEV}.lfsck_layout |
1476 awk '/^status/ { print \\\$2 }'" "completed" 6 || {
1478 error "(3) unexpected status"
1481 local repaired=$($SHOW_LAYOUT |
1482 awk '/^repaired_multiple_referenced/ { print $2 }')
1483 [ $repaired -eq 1 ] ||
1484 error "(4) Fail to repair multiple references: $repaired"
1486 echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
1487 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
1488 error "(5) Fail to write f0."
1489 size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
1490 [ $size -eq 1048576 ] ||
1491 error "(6) guard size should be 1048576, but got $size"
1493 run_test 17 "LFSCK can repair multiple references"
1497 echo "The target MDT-object is there, but related stripe information"
1498 echo "is lost or partly lost. The LFSCK should regenerate the missed"
1499 echo "layout EA entries."
1502 check_mount_and_prep
1503 $LFS mkdir -i 0 $DIR/$tdir/a1
1504 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1505 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1507 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1509 $LFS path2fid $DIR/$tdir/a1/f1
1510 $LFS getstripe $DIR/$tdir/a1/f1
1512 if [ $MDSCOUNT -ge 2 ]; then
1513 $LFS mkdir -i 1 $DIR/$tdir/a2
1514 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1515 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1516 $LFS path2fid $DIR/$tdir/a2/f2
1517 $LFS getstripe $DIR/$tdir/a2/f2
1520 cancel_lru_locks osc
1522 echo "Inject failure, to make the MDT-object lost its layout EA"
1523 #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615
1524 do_facet mds1 $LCTL set_param fail_loc=0x1615
1525 chown 1.1 $DIR/$tdir/a1/f1
1527 if [ $MDSCOUNT -ge 2 ]; then
1528 do_facet mds2 $LCTL set_param fail_loc=0x1615
1529 chown 1.1 $DIR/$tdir/a2/f2
1535 do_facet mds1 $LCTL set_param fail_loc=0
1536 if [ $MDSCOUNT -ge 2 ]; then
1537 do_facet mds2 $LCTL set_param fail_loc=0
1540 cancel_lru_locks mdc
1541 cancel_lru_locks osc
1543 echo "The file size should be incorrect since layout EA is lost"
1544 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1545 [ "$cur_size" != "$saved_size" ] ||
1546 error "(1) Expect incorrect file1 size"
1548 if [ $MDSCOUNT -ge 2 ]; then
1549 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1550 [ "$cur_size" != "$saved_size" ] ||
1551 error "(2) Expect incorrect file2 size"
1554 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1555 $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
1557 for k in $(seq $MDSCOUNT); do
1558 # The LFSCK status query internal is 30 seconds. For the case
1559 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1560 # time to guarantee the status sync up.
1561 wait_update_facet mds${k} "$LCTL get_param -n \
1562 mdd.$(facet_svc mds${k}).lfsck_layout |
1563 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1564 error "(4) MDS${k} is not the expected 'completed'"
1567 for k in $(seq $OSTCOUNT); do
1568 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1569 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1570 awk '/^status/ { print $2 }')
1571 [ "$cur_status" == "completed" ] ||
1572 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
1575 local repaired=$(do_facet mds1 $LCTL get_param -n \
1576 mdd.$(facet_svc mds1).lfsck_layout |
1577 awk '/^repaired_orphan/ { print $2 }')
1578 [ $repaired -eq 1 ] ||
1579 error "(6.1) Expect 1 fixed on mds1, but got: $repaired"
1581 if [ $MDSCOUNT -ge 2 ]; then
1582 repaired=$(do_facet mds2 $LCTL get_param -n \
1583 mdd.$(facet_svc mds2).lfsck_layout |
1584 awk '/^repaired_orphan/ { print $2 }')
1585 [ $repaired -eq 2 ] ||
1586 error "(6.2) Expect 2 fixed on mds2, but got: $repaired"
1589 $LFS path2fid $DIR/$tdir/a1/f1
1590 $LFS getstripe $DIR/$tdir/a1/f1
1592 if [ $MDSCOUNT -ge 2 ]; then
1593 $LFS path2fid $DIR/$tdir/a2/f2
1594 $LFS getstripe $DIR/$tdir/a2/f2
1597 echo "The file size should be correct after layout LFSCK scanning"
1598 cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1599 [ "$cur_size" == "$saved_size" ] ||
1600 error "(7) Expect file1 size $saved_size, but got $cur_size"
1602 if [ $MDSCOUNT -ge 2 ]; then
1603 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1604 [ "$cur_size" == "$saved_size" ] ||
1605 error "(8) Expect file2 size $saved_size, but got $cur_size"
1608 run_test 18a "Find out orphan OST-object and repair it (1)"
1612 echo "The target MDT-object is lost. The LFSCK should re-create the"
1613 echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should"
1614 echo "can move it back to normal namespace manually."
1617 check_mount_and_prep
1618 $LFS mkdir -i 0 $DIR/$tdir/a1
1619 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1620 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1621 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1622 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
1624 $LFS getstripe $DIR/$tdir/a1/f1
1626 if [ $MDSCOUNT -ge 2 ]; then
1627 $LFS mkdir -i 1 $DIR/$tdir/a2
1628 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1629 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1630 fid2=$($LFS path2fid $DIR/$tdir/a2/f2)
1632 $LFS getstripe $DIR/$tdir/a2/f2
1635 cancel_lru_locks osc
1637 echo "Inject failure, to simulate the case of missing the MDT-object"
1638 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1639 do_facet mds1 $LCTL set_param fail_loc=0x1616
1640 rm -f $DIR/$tdir/a1/f1
1642 if [ $MDSCOUNT -ge 2 ]; then
1643 do_facet mds2 $LCTL set_param fail_loc=0x1616
1644 rm -f $DIR/$tdir/a2/f2
1650 do_facet mds1 $LCTL set_param fail_loc=0
1651 if [ $MDSCOUNT -ge 2 ]; then
1652 do_facet mds2 $LCTL set_param fail_loc=0
1655 cancel_lru_locks mdc
1656 cancel_lru_locks osc
1658 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1659 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1661 for k in $(seq $MDSCOUNT); do
1662 # The LFSCK status query internal is 30 seconds. For the case
1663 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1664 # time to guarantee the status sync up.
1665 wait_update_facet mds${k} "$LCTL get_param -n \
1666 mdd.$(facet_svc mds${k}).lfsck_layout |
1667 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1668 error "(2) MDS${k} is not the expected 'completed'"
1671 for k in $(seq $OSTCOUNT); do
1672 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1673 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1674 awk '/^status/ { print $2 }')
1675 [ "$cur_status" == "completed" ] ||
1676 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1679 local repaired=$(do_facet mds1 $LCTL get_param -n \
1680 mdd.$(facet_svc mds1).lfsck_layout |
1681 awk '/^repaired_orphan/ { print $2 }')
1682 [ $repaired -eq 1 ] ||
1683 error "(4.1) Expect 1 fixed on mds1, but got: $repaired"
1685 if [ $MDSCOUNT -ge 2 ]; then
1686 repaired=$(do_facet mds2 $LCTL get_param -n \
1687 mdd.$(facet_svc mds2).lfsck_layout |
1688 awk '/^repaired_orphan/ { print $2 }')
1689 [ $repaired -eq 2 ] ||
1690 error "(4.2) Expect 2 fixed on mds2, but got: $repaired"
1693 echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
1694 mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
1695 error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
1697 if [ $MDSCOUNT -ge 2 ]; then
1698 local name=$MOUNT/.lustre/lost+found/MDT0001/${fid2}-R-0
1699 mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name"
1702 $LFS path2fid $DIR/$tdir/a1/f1
1703 $LFS getstripe $DIR/$tdir/a1/f1
1705 if [ $MDSCOUNT -ge 2 ]; then
1706 $LFS path2fid $DIR/$tdir/a2/f2
1707 $LFS getstripe $DIR/$tdir/a2/f2
1710 echo "The file size should be correct after layout LFSCK scanning"
1711 local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }')
1712 [ "$cur_size" == "$saved_size" ] ||
1713 error "(7) Expect file1 size $saved_size, but got $cur_size"
1715 if [ $MDSCOUNT -ge 2 ]; then
1716 cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }')
1717 [ "$cur_size" == "$saved_size" ] ||
1718 error "(8) Expect file2 size $saved_size, but got $cur_size"
1721 run_test 18b "Find out orphan OST-object and repair it (2)"
1725 echo "The target MDT-object is lost, and the OST-object FID is missing."
1726 echo "The LFSCK should re-create the MDT-object with new FID under the "
1727 echo "directory .lustre/lost+found/MDTxxxx."
1730 check_mount_and_prep
1731 $LFS mkdir -i 0 $DIR/$tdir/a1
1732 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1734 echo "Inject failure, to simulate the case of missing parent FID"
1735 #define OBD_FAIL_LFSCK_NOPFID 0x1617
1736 do_facet ost1 $LCTL set_param fail_loc=0x1617
1738 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2
1739 $LFS getstripe $DIR/$tdir/a1/f1
1741 if [ $MDSCOUNT -ge 2 ]; then
1742 $LFS mkdir -i 1 $DIR/$tdir/a2
1743 $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2
1744 do_facet ost2 $LCTL set_param fail_loc=0x1617
1745 dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2
1746 $LFS getstripe $DIR/$tdir/a2/f2
1749 cancel_lru_locks osc
1751 echo "Inject failure, to simulate the case of missing the MDT-object"
1752 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
1753 do_facet mds1 $LCTL set_param fail_loc=0x1616
1754 rm -f $DIR/$tdir/a1/f1
1756 if [ $MDSCOUNT -ge 2 ]; then
1757 do_facet mds2 $LCTL set_param fail_loc=0x1616
1758 rm -f $DIR/$tdir/a2/f2
1764 do_facet mds1 $LCTL set_param fail_loc=0
1765 if [ $MDSCOUNT -ge 2 ]; then
1766 do_facet mds2 $LCTL set_param fail_loc=0
1769 cancel_lru_locks mdc
1770 cancel_lru_locks osc
1772 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1773 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
1775 for k in $(seq $MDSCOUNT); do
1776 # The LFSCK status query internal is 30 seconds. For the case
1777 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1778 # time to guarantee the status sync up.
1779 wait_update_facet mds${k} "$LCTL get_param -n \
1780 mdd.$(facet_svc mds${k}).lfsck_layout |
1781 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1782 error "(2) MDS${k} is not the expected 'completed'"
1785 for k in $(seq $OSTCOUNT); do
1786 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1787 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1788 awk '/^status/ { print $2 }')
1789 [ "$cur_status" == "completed" ] ||
1790 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
1793 if [ $MDSCOUNT -ge 2 ]; then
1799 local repaired=$(do_facet mds1 $LCTL get_param -n \
1800 mdd.$(facet_svc mds1).lfsck_layout |
1801 awk '/^repaired_orphan/ { print $2 }')
1802 [ $repaired -eq $expected ] ||
1803 error "(4) Expect $expected fixed on mds1, but got: $repaired"
1805 if [ $MDSCOUNT -ge 2 ]; then
1806 repaired=$(do_facet mds2 $LCTL get_param -n \
1807 mdd.$(facet_svc mds2).lfsck_layout |
1808 awk '/^repaired_orphan/ { print $2 }')
1809 [ $repaired -eq 0 ] ||
1810 error "(5) Expect 0 fixed on mds2, but got: $repaired"
1813 echo "There should be some stub under .lustre/lost+found/MDT0001/"
1814 ls -ail $MOUNT/.lustre/lost+found/MDT0001/*-N-0 &&
1815 error "(6) .lustre/lost+found/MDT0001/ should be empty"
1817 echo "There should be some stub under .lustre/lost+found/MDT0000/"
1818 ls -ail $MOUNT/.lustre/lost+found/MDT0000/*-N-0 ||
1819 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
1821 run_test 18c "Find out orphan OST-object and repair it (3)"
1825 echo "The target MDT-object layout EA slot is occpuied by some new"
1826 echo "created OST-object when repair dangling reference case. Such"
1827 echo "conflict OST-object has never been modified. Then when found"
1828 echo "the orphan OST-object, LFSCK will replace it with the orphan"
1832 check_mount_and_prep
1834 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1835 echo "guard" > $DIR/$tdir/a1/f1
1836 echo "foo" > $DIR/$tdir/a1/f2
1837 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1838 $LFS path2fid $DIR/$tdir/a1/f1
1839 $LFS getstripe $DIR/$tdir/a1/f1
1840 $LFS path2fid $DIR/$tdir/a1/f2
1841 $LFS getstripe $DIR/$tdir/a1/f2
1842 cancel_lru_locks osc
1844 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1845 echo "to reference the same OST-object (which is f1's OST-obejct)."
1846 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1847 echo "dangling reference case, but f2's old OST-object is there."
1850 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1851 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1852 chown 1.1 $DIR/$tdir/a1/f2
1853 rm -f $DIR/$tdir/a1/f1
1856 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1858 echo "stopall to cleanup object cache"
1861 setupall > /dev/null
1863 echo "The file size should be incorrect since dangling referenced"
1864 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1865 [ "$cur_size" != "$saved_size" ] ||
1866 error "(1) Expect incorrect file2 size"
1868 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1869 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
1871 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1872 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1874 wait_update_facet mds1 "$LCTL get_param -n \
1875 mdd.$(facet_svc mds1).lfsck_layout |
1876 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1877 error "(3.0) MDS1 is not the expected 'scanning-phase2'"
1879 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1881 for k in $(seq $MDSCOUNT); do
1882 # The LFSCK status query internal is 30 seconds. For the case
1883 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1884 # time to guarantee the status sync up.
1885 wait_update_facet mds${k} "$LCTL get_param -n \
1886 mdd.$(facet_svc mds${k}).lfsck_layout |
1887 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1888 error "(3) MDS${k} is not the expected 'completed'"
1891 for k in $(seq $OSTCOUNT); do
1892 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1893 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1894 awk '/^status/ { print $2 }')
1895 [ "$cur_status" == "completed" ] ||
1896 error "(4) OST${k} Expect 'completed', but got '$cur_status'"
1899 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
1900 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
1901 awk '/^repaired_orphan/ { print $2 }')
1902 [ $repaired -eq 1 ] ||
1903 error "(5) Expect 1 orphan has been fixed, but got: $repaired"
1905 echo "The file size should be correct after layout LFSCK scanning"
1906 cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1907 [ "$cur_size" == "$saved_size" ] ||
1908 error "(6) Expect file2 size $saved_size, but got $cur_size"
1910 echo "The LFSCK should find back the original data."
1911 cat $DIR/$tdir/a1/f2
1912 $LFS path2fid $DIR/$tdir/a1/f2
1913 $LFS getstripe $DIR/$tdir/a1/f2
1915 run_test 18d "Find out orphan OST-object and repair it (4)"
1919 echo "The target MDT-object layout EA slot is occpuied by some new"
1920 echo "created OST-object when repair dangling reference case. Such"
1921 echo "conflict OST-object has been modified by others. To keep the"
1922 echo "new data, the LFSCK will create a new file to refernece this"
1923 echo "old orphan OST-object."
1926 check_mount_and_prep
1928 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1
1929 echo "guard" > $DIR/$tdir/a1/f1
1930 echo "foo" > $DIR/$tdir/a1/f2
1931 local saved_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1932 $LFS path2fid $DIR/$tdir/a1/f1
1933 $LFS getstripe $DIR/$tdir/a1/f1
1934 $LFS path2fid $DIR/$tdir/a1/f2
1935 $LFS getstripe $DIR/$tdir/a1/f2
1936 cancel_lru_locks osc
1938 echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2"
1939 echo "to reference the same OST-object (which is f1's OST-obejct)."
1940 echo "Then drop $DIR/$tdir/a1/f1 and its OST-object, so f2 becomes"
1941 echo "dangling reference case, but f2's old OST-object is there."
1944 #define OBD_FAIL_LFSCK_CHANGE_STRIPE 0x1618
1945 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1618
1946 chown 1.1 $DIR/$tdir/a1/f2
1947 rm -f $DIR/$tdir/a1/f1
1950 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
1952 echo "stopall to cleanup object cache"
1955 setupall > /dev/null
1957 echo "The file size should be incorrect since dangling referenced"
1958 local cur_size=$(ls -il $DIR/$tdir/a1/f2 | awk '{ print $6 }')
1959 [ "$cur_size" != "$saved_size" ] ||
1960 error "(1) Expect incorrect file2 size"
1962 #define OBD_FAIL_LFSCK_DELAY3 0x1602
1963 do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
1965 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
1966 $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
1968 wait_update_facet mds1 "$LCTL get_param -n \
1969 mdd.$(facet_svc mds1).lfsck_layout |
1970 awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
1971 error "(3) MDS1 is not the expected 'scanning-phase2'"
1973 # to guarantee all updates are synced.
1977 echo "Write new data to f2 to modify the new created OST-object."
1978 echo "dummy" >> $DIR/$tdir/a1/f2
1980 do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
1982 for k in $(seq $MDSCOUNT); do
1983 # The LFSCK status query internal is 30 seconds. For the case
1984 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
1985 # time to guarantee the status sync up.
1986 wait_update_facet mds${k} "$LCTL get_param -n \
1987 mdd.$(facet_svc mds${k}).lfsck_layout |
1988 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
1989 error "(4) MDS${k} is not the expected 'completed'"
1992 for k in $(seq $OSTCOUNT); do
1993 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
1994 obdfilter.$(facet_svc ost${k}).lfsck_layout |
1995 awk '/^status/ { print $2 }')
1996 [ "$cur_status" == "completed" ] ||
1997 error "(5) OST${k} Expect 'completed', but got '$cur_status'"
2000 local repaired=$(do_facet $SINGLEMDS $LCTL get_param -n \
2001 mdd.$(facet_svc $SINGLEMDS).lfsck_layout |
2002 awk '/^repaired_orphan/ { print $2 }')
2003 [ $repaired -eq 1 ] ||
2004 error "(6) Expect 1 orphan has been fixed, but got: $repaired"
2006 echo "There should be stub file under .lustre/lost+found/MDT0000/"
2007 local cname=$(ls $MOUNT/.lustre/lost+found/MDT0000/*-C-0)
2009 error "(7) .lustre/lost+found/MDT0000/ should not be empty"
2011 echo "The stub file should keep the original f2 data"
2012 cur_size=$(ls -il $cname | awk '{ print $6 }')
2013 [ "$cur_size" == "$saved_size" ] ||
2014 error "(8) Expect file2 size $saved_size, but got $cur_size"
2017 $LFS path2fid $cname
2018 $LFS getstripe $cname
2020 echo "The f2 should contains new data."
2021 cat $DIR/$tdir/a1/f2
2022 $LFS path2fid $DIR/$tdir/a1/f2
2023 $LFS getstripe $DIR/$tdir/a1/f2
2025 run_test 18e "Find out orphan OST-object and repair it (5)"
2028 check_mount_and_prep
2029 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2031 echo "foo" > $DIR/$tdir/a0
2032 echo "guard" > $DIR/$tdir/a1
2033 cancel_lru_locks osc
2035 echo "Inject failure, then client will offer wrong parent FID when read"
2036 do_facet ost1 $LCTL set_param -n \
2037 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2038 #define OBD_FAIL_LFSCK_INVALID_PFID 0x1619
2039 $LCTL set_param fail_loc=0x1619
2041 echo "Read RPC with wrong parent FID should be denied"
2042 cat $DIR/$tdir/a0 && error "(3) Read should be denied!"
2043 $LCTL set_param fail_loc=0
2045 run_test 19a "OST-object inconsistency self detect"
2048 check_mount_and_prep
2049 $LFS setstripe -c 1 -i 0 $DIR/$tdir
2051 echo "Inject failure stub to make the OST-object to back point to"
2052 echo "non-exist MDT-object"
2054 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
2055 do_facet ost1 $LCTL set_param fail_loc=0x1611
2056 echo "foo" > $DIR/$tdir/f0
2057 cancel_lru_locks osc
2058 do_facet ost1 $LCTL set_param fail_loc=0
2060 echo "Nothing should be fixed since self detect and repair is disabled"
2061 local repaired=$(do_facet ost1 $LCTL get_param -n \
2062 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2063 awk '/^repaired/ { print $2 }')
2064 [ $repaired -eq 0 ] ||
2065 error "(1) Expected 0 repaired, but got $repaired"
2067 echo "Read RPC with right parent FID should be accepted,"
2068 echo "and cause parent FID on OST to be fixed"
2070 do_facet ost1 $LCTL set_param -n \
2071 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid 1
2072 cat $DIR/$tdir/f0 || error "(2) Read should not be denied!"
2074 repaired=$(do_facet ost1 $LCTL get_param -n \
2075 obdfilter.${FSNAME}-OST0000.lfsck_verify_pfid |
2076 awk '/^repaired/ { print $2 }')
2077 [ $repaired -eq 1 ] ||
2078 error "(3) Expected 1 repaired, but got $repaired"
2080 run_test 19b "OST-object inconsistency self repair"
2083 [ $OSTCOUNT -lt 2 ] &&
2084 skip "The test needs at least 2 OSTs" && return
2087 echo "The target MDT-object and some of its OST-object are lost."
2088 echo "The LFSCK should find out the left OST-objects and re-create"
2089 echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
2090 echo "with the partial OST-objects (LOV EA hole)."
2092 echo "New client can access the file with LOV EA hole via normal"
2093 echo "system tools or commands without crash the system."
2095 echo "For old client, even though it cannot access the file with"
2096 echo "LOV EA hole, it should not cause the system crash."
2099 check_mount_and_prep
2100 $LFS mkdir -i 0 $DIR/$tdir/a1
2101 if [ $OSTCOUNT -gt 2 ]; then
2102 $LFS setstripe -c 3 -i 0 -s 1M $DIR/$tdir/a1
2105 $LFS setstripe -c 2 -i 0 -s 1M $DIR/$tdir/a1
2109 # 256 blocks on the stripe0.
2110 # 1 block on the stripe1 for 2 OSTs case.
2111 # 256 blocks on the stripe1 for other cases.
2112 # 1 block on the stripe2 if OSTs > 2
2113 dd if=/dev/zero of=$DIR/$tdir/a1/f0 bs=4096 count=$bcount
2114 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=4096 count=$bcount
2115 dd if=/dev/zero of=$DIR/$tdir/a1/f2 bs=4096 count=$bcount
2117 local fid0=$($LFS path2fid $DIR/$tdir/a1/f0)
2118 local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
2119 local fid2=$($LFS path2fid $DIR/$tdir/a1/f2)
2122 $LFS getstripe $DIR/$tdir/a1/f0
2124 $LFS getstripe $DIR/$tdir/a1/f1
2126 $LFS getstripe $DIR/$tdir/a1/f2
2128 if [ $OSTCOUNT -gt 2 ]; then
2129 dd if=/dev/zero of=$DIR/$tdir/a1/f3 bs=4096 count=$bcount
2130 fid3=$($LFS path2fid $DIR/$tdir/a1/f3)
2132 $LFS getstripe $DIR/$tdir/a1/f3
2135 cancel_lru_locks osc
2137 echo "Inject failure..."
2138 echo "To simulate f0 lost MDT-object"
2139 #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
2140 do_facet mds1 $LCTL set_param fail_loc=0x1616
2141 rm -f $DIR/$tdir/a1/f0
2143 echo "To simulate f1 lost MDT-object and OST-object0"
2144 #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
2145 do_facet mds1 $LCTL set_param fail_loc=0x161a
2146 rm -f $DIR/$tdir/a1/f1
2148 echo "To simulate f2 lost MDT-object and OST-object1"
2149 do_facet mds1 $LCTL set_param fail_val=1
2150 rm -f $DIR/$tdir/a1/f2
2152 if [ $OSTCOUNT -gt 2 ]; then
2153 echo "To simulate f3 lost MDT-object and OST-object2"
2154 do_facet mds1 $LCTL set_param fail_val=2
2155 rm -f $DIR/$tdir/a1/f3
2158 umount_client $MOUNT
2161 do_facet mds1 $LCTL set_param fail_loc=0 fail_val=0
2163 echo "Inject failure to slow down the LFSCK on OST0"
2164 #define OBD_FAIL_LFSCK_DELAY5 0x161b
2165 do_facet ost1 $LCTL set_param fail_loc=0x161b
2167 echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
2168 $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
2171 do_facet ost1 $LCTL set_param fail_loc=0
2173 for k in $(seq $MDSCOUNT); do
2174 # The LFSCK status query internal is 30 seconds. For the case
2175 # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
2176 # time to guarantee the status sync up.
2177 wait_update_facet mds${k} "$LCTL get_param -n \
2178 mdd.$(facet_svc mds${k}).lfsck_layout |
2179 awk '/^status/ { print \\\$2 }'" "completed" 32 ||
2180 error "(2) MDS${k} is not the expected 'completed'"
2183 for k in $(seq $OSTCOUNT); do
2184 local cur_status=$(do_facet ost${k} $LCTL get_param -n \
2185 obdfilter.$(facet_svc ost${k}).lfsck_layout |
2186 awk '/^status/ { print $2 }')
2187 [ "$cur_status" == "completed" ] ||
2188 error "(3) OST${k} Expect 'completed', but got '$cur_status'"
2191 local repaired=$(do_facet mds1 $LCTL get_param -n \
2192 mdd.$(facet_svc mds1).lfsck_layout |
2193 awk '/^repaired_orphan/ { print $2 }')
2194 if [ $OSTCOUNT -gt 2 ]; then
2195 [ $repaired -eq 9 ] ||
2196 error "(4.1) Expect 9 fixed on mds1, but got: $repaired"
2198 [ $repaired -eq 4 ] ||
2199 error "(4.2) Expect 4 fixed on mds1, but got: $repaired"
2202 mount_client $MOUNT || error "(5.0) Fail to start client!"
2204 LOV_PATTERN_F_HOLE=0x40000000
2207 # ${fid0}-R-0 is the old f0
2209 local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
2210 echo "Check $name, which is the old f0"
2212 $LFS getstripe -v $name || error "(5.1) cannot getstripe on $name"
2214 local pattern=0x$($LFS getstripe -L $name)
2215 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2216 error "(5.2) NOT expect pattern flag hole, but got $pattern"
2218 local stripes=$($LFS getstripe -c $name)
2219 if [ $OSTCOUNT -gt 2 ]; then
2220 [ $stripes -eq 3 ] ||
2221 error "(5.3.1) expect the stripe count is 3, but got $stripes"
2223 [ $stripes -eq 2 ] ||
2224 error "(5.3.2) expect the stripe count is 2, but got $stripes"
2227 local size=$(stat $name | awk '/Size:/ { print $2 }')
2228 [ $size -eq $((4096 * $bcount)) ] ||
2229 error "(5.4) expect the size $((4096 * $bcount)), but got $size"
2231 cat $name > /dev/null || error "(5.5) cannot read $name"
2233 echo "dummy" >> $name || error "(5.6) cannot write $name"
2235 chown $RUNAS_ID:$RUNAS_GID $name || error "(5.7) cannot chown on $name"
2237 touch $name || error "(5.8) cannot touch $name"
2239 rm -f $name || error "(5.9) cannot unlink $name"
2242 # ${fid1}-R-0 contains the old f1's stripe1 (and stripe2 if OSTs > 2)
2244 name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
2245 if [ $OSTCOUNT -gt 2 ]; then
2246 echo "Check $name, it contains the old f1's stripe1 and stripe2"
2248 echo "Check $name, it contains the old f1's stripe1"
2251 $LFS getstripe -v $name || error "(6.1) cannot getstripe on $name"
2253 pattern=0x$($LFS getstripe -L $name)
2254 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2255 error "(6.2) expect pattern flag hole, but got $pattern"
2257 stripes=$($LFS getstripe -c $name)
2258 if [ $OSTCOUNT -gt 2 ]; then
2259 [ $stripes -eq 3 ] ||
2260 error "(6.3.1) expect the stripe count is 3, but got $stripes"
2262 [ $stripes -eq 2 ] ||
2263 error "(6.3.2) expect the stripe count is 2, but got $stripes"
2266 size=$(stat $name | awk '/Size:/ { print $2 }')
2267 [ $size -eq $((4096 * $bcount)) ] ||
2268 error "(6.4) expect the size $((4096 * $bcount)), but got $size"
2270 cat $name > /dev/null && error "(6.5) normal read $name should fail"
2272 local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2273 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2276 [ $failures -eq 256 ] ||
2277 error "(6.6) expect 256 IO failures, but get $failures"
2279 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2280 [ $size -eq $((4096 * $bcount)) ] ||
2281 error "(6.7) expect the size $((4096 * $bcount)), but got $size"
2283 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
2284 error "(6.8) write to the LOV EA hole should fail"
2286 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
2287 error "(6.9) write to normal stripe should NOT fail"
2289 echo "foo" >> $name && error "(6.10) append write $name should fail"
2291 chown $RUNAS_ID:$RUNAS_GID $name || error "(6.11) cannot chown on $name"
2293 touch $name || error "(6.12) cannot touch $name"
2295 rm -f $name || error "(6.13) cannot unlink $name"
2298 # ${fid2}-R-0 it contains the old f2's stripe0 (and stripe2 if OSTs > 2)
2300 name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
2301 if [ $OSTCOUNT -gt 2 ]; then
2302 echo "Check $name, it contains the old f2's stripe0 and stripe2"
2304 echo "Check $name, it contains the old f2's stripe0"
2307 $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
2309 pattern=0x$($LFS getstripe -L $name)
2310 stripes=$($LFS getstripe -c $name)
2311 size=$(stat $name | awk '/Size:/ { print $2 }')
2312 if [ $OSTCOUNT -gt 2 ]; then
2313 [[ $((pattern & LOV_PATTERN_F_HOLE)) -ne 0 ]] ||
2314 error "(7.2.1) expect pattern flag hole, but got $pattern"
2316 [ $stripes -eq 3 ] ||
2317 error "(7.3.1) expect the stripe count is 3, but got $stripes"
2319 [ $size -eq $((4096 * $bcount)) ] ||
2320 error "(7.4.1) expect size $((4096 * $bcount)), but got $size"
2322 cat $name > /dev/null &&
2323 error "(7.5.1) normal read $name should fail"
2325 failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
2326 bs=4096 2>&1 | grep "Input/output error" | wc -l)
2328 [ $failures -eq 256 ] ||
2329 error "(7.6) expect 256 IO failures, but get $failures"
2331 size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
2332 [ $size -eq $((4096 * $bcount)) ] ||
2333 error "(7.7) expect the size $((4096 * $bcount)), but got $size"
2335 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
2336 seek=300 && error "(7.8.0) write to the LOV EA hole should fail"
2338 dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
2339 error "(7.8.1) write to normal stripe should NOT fail"
2341 echo "foo" >> $name &&
2342 error "(7.8.3) append write $name should fail"
2344 chown $RUNAS_ID:$RUNAS_GID $name ||
2345 error "(7.9.1) cannot chown on $name"
2347 touch $name || error "(7.10.1) cannot touch $name"
2349 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2350 error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
2352 [ $stripes -eq 1 ] ||
2353 error "(7.3.2) expect the stripe count is 1, but got $stripes"
2356 [ $size -eq $((4096 * (256 + 0))) ] ||
2357 error "(7.4.2) expect the size $((4096 * 256)), but got $size"
2359 cat $name > /dev/null || error "(7.5.2) cannot read $name"
2361 echo "dummy" >> $name || error "(7.8.2) cannot write $name"
2363 chown $RUNAS_ID:$RUNAS_GID $name ||
2364 error "(7.9.2) cannot chown on $name"
2366 touch $name || error "(7.10.2) cannot touch $name"
2369 rm -f $name || error "(7.11) cannot unlink $name"
2371 [ $OSTCOUNT -le 2 ] && return
2374 # ${fid3}-R-0 should contains the old f3's stripe0 and stripe1
2376 name="$MOUNT/.lustre/lost+found/MDT0000/${fid3}-R-0"
2377 echo "Check $name, which contains the old f3's stripe0 and stripe1"
2379 $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
2381 pattern=0x$($LFS getstripe -L $name)
2382 [[ $((pattern & LOV_PATTERN_F_HOLE)) -eq 0 ]] ||
2383 error "(8.2) NOT expect pattern flag hole, but got $pattern"
2385 stripes=$($LFS getstripe -c $name)
2386 # LFSCK does not know the old f3 had 3 stripes.
2387 # It only tries to find as much as possible.
2388 # The stripe count depends on the last stripe's offset.
2389 [ $stripes -eq 2 ] ||
2390 error "(8.3) expect the stripe count is 2, but got $stripes"
2392 size=$(stat $name | awk '/Size:/ { print $2 }')
2394 [ $size -eq $((4096 * (256 + 256 + 0))) ] ||
2395 error "(8.4) expect the size $((4096 * 512)), but got $size"
2397 cat $name > /dev/null || error "(8.5) cannot read $name"
2399 echo "dummy" >> $name || error "(8.6) cannot write $name"
2401 chown $RUNAS_ID:$RUNAS_GID $name ||
2402 error "(8.7) cannot chown on $name"
2404 touch $name || error "(8.8) cannot touch $name"
2406 rm -f $name || error "(8.9) cannot unlink $name"
2408 run_test 20 "Handle the orphan with dummy LOV EA slot properly"
2410 $LCTL set_param debug=-lfsck > /dev/null || true
2412 # restore MDS/OST size
2413 MDSSIZE=${SAVED_MDSSIZE}
2414 OSTSIZE=${SAVED_OSTSIZE}
2415 OSTCOUNT=${SAVED_OSTCOUNT}
2417 # cleanup the system at last