umount_client $MOUNT
stop ost1 || error "(1) Fail to stop ost1"
+ # stop MDS to forget last precreated object
+ echo "stop $SINGLEMDS"
+ stop $SINGLEMDS > /dev/null || error "(11) Fail to stop MDS!"
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+ echo "start $SINGLEMDS"
+ start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+ error "(12) Fail to start MDS!"
+
#define OBD_FAIL_OST_ENOSPC 0x215
do_facet ost1 $LCTL set_param fail_loc=0x215
}
run_test 11b "LFSCK can rebuild crashed last_id"
-test_12() {
+test_12a() {
[ $MDSCOUNT -lt 2 ] &&
- skip "We need at least 2 MDSes for test_12" && return
+ skip "We need at least 2 MDSes for test_12a" && return
check_mount_and_prep
for k in $(seq $MDSCOUNT); do
stop_full_debug_logging
}
-run_test 12 "single command to trigger LFSCK on all devices"
+run_test 12a "single command to trigger LFSCK on all devices"
+
+test_12b() {
+ check_mount_and_prep
+
+ echo "Start LFSCK without '-M' specified."
+ do_facet mds1 $LCTL lfsck_start -A -r ||
+ error "(0) Fail to start LFSCK without '-M'"
+
+ wait_all_targets_blocked namespace completed 1
+ wait_all_targets_blocked layout completed 2
+
+ local count=$(do_facet mds1 $LCTL dl |
+ awk '{ print $3 }' | grep mdt | wc -l)
+ if [ $count -gt 1 ]; then
+ echo
+ echo "Start layout LFSCK on the node with multipe targets,"
+ echo "but not specify '-M'/'-A' option. Should get failure."
+ echo
+ do_facet mds1 $LCTL lfsck_start -t layout -r &&
+ error "(3) Start layout LFSCK should fail" || true
+ fi
+}
+run_test 12b "auto detect Lustre device"
test_13() {
echo "#####"
[ "$cur_size" != "$saved_size" ] ||
error "(1) Expect incorrect file2 size"
- #define OBD_FAIL_LFSCK_DELAY3 0x1602
- do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602
-
echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
$START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!"
- wait_update_facet mds1 "$LCTL get_param -n \
- mdd.$(facet_svc mds1).lfsck_layout |
- awk '/^status/ { print \\\$2 }'" "scanning-phase2" $LTIME ||
- error "(3.0) MDS1 is not the expected 'scanning-phase2'"
-
- do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
-
for k in $(seq $MDSCOUNT); do
# The LFSCK status query internal is 30 seconds. For the case
# of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
}
run_test 18f "Skip the failed OST(s) when handle orphan OST-objects"
+test_18g() {
+ echo "#####"
+ echo "The target MDT-object is lost, but related OI mapping is there"
+ echo "The LFSCK should recreate the lost MDT-object without affected"
+ echo "by the stale OI mapping."
+ echo "#####"
+
+ check_mount_and_prep
+ $LFS mkdir -i 0 $DIR/$tdir/a1
+ $LFS setstripe -c -1 -i 0 -S 1M $DIR/$tdir/a1
+ dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=$OSTCOUNT
+ local fid1=$($LFS path2fid $DIR/$tdir/a1/f1)
+ echo ${fid1}
+ $LFS getstripe $DIR/$tdir/a1/f1
+ cancel_lru_locks osc
+
+ echo "Inject failure to simulate lost MDT-object but keep OI mapping"
+ #define OBD_FAIL_LFSCK_LOST_MDTOBJ2 0x162e
+ do_facet mds1 $LCTL set_param fail_loc=0x162e
+ rm -f $DIR/$tdir/a1/f1
+
+ do_facet mds1 $LCTL set_param fail_loc=0
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+
+ echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+ $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!"
+
+ for k in $(seq $MDSCOUNT); do
+ # The LFSCK status query internal is 30 seconds. For the case
+ # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+ # time to guarantee the status sync up.
+ wait_update_facet mds${k} "$LCTL get_param -n \
+ mdd.$(facet_svc mds${k}).lfsck_layout |
+ awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
+ error "(2) MDS${k} is not the expected 'completed'"
+ done
+
+ for k in $(seq $OSTCOUNT); do
+ local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+ obdfilter.$(facet_svc ost${k}).lfsck_layout |
+ awk '/^status/ { print $2 }')
+ [ "$cur_status" == "completed" ] ||
+ error "(3) OST${k} Expect 'completed', but got '$cur_status'"
+ done
+
+ local repaired=$(do_facet mds1 $LCTL get_param -n \
+ mdd.$(facet_svc mds1).lfsck_layout |
+ awk '/^repaired_orphan/ { print $2 }')
+ [ $repaired -eq $OSTCOUNT ] ||
+ error "(4) Expect $OSTCOUNT fixed, but got: $repaired"
+
+ echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace"
+ mv $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0 $DIR/$tdir/a1/f1 ||
+ error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
+
+ $LFS path2fid $DIR/$tdir/a1/f1
+ $LFS getstripe $DIR/$tdir/a1/f1
+}
+run_test 18g "Find out orphan OST-object and repair it (7)"
+
$LCTL set_param debug=-cache > /dev/null
test_19a() {
check_mount_and_prep
+ [[ -d $MOUNT/.lustre/lost+found/MDT0000 ]] || {
+ # Trigger LFSCK firstly, that will generate the
+ # .lustre/lost+found/MDTxxxx in advance to avoid
+ # reusing the local object for the dangling name
+ # entry. LU-7429
+ $START_NAMESPACE -r ||
+ error "(0) Fail to start LFSCK for namespace"
+
+ wait_all_targets_blocked namespace completed 0.1
+ }
+
$LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
+ $LFS path2fid $DIR/$tdir/d0
+
echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f0
+
echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f1
local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}')
OID=$(printf %d $OID)
error "(3.1) Fail to unlink $DIR/$tdir/d0/f0"
echo "dummy" > $DIR/$tdir/d0/f0 ||
error "(3.2) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f0
fi
echo "Inject failure stub on MDT0 to simulate dangling name entry"
echo "LFSCK cannot replace it."
echo "#####"
+ start_full_debug_logging
+
check_mount_and_prep
+ [[ -d $MOUNT/.lustre/lost+found/MDT0000 ]] || {
+ # Trigger LFSCK firstly, that will generate the
+ # .lustre/lost+found/MDTxxxx in advance to avoid
+ # reusing the local object for the dangling name
+ # entry. LU-7429
+ $START_NAMESPACE -r ||
+ error "(0) Fail to start LFSCK for namespace"
+
+ wait_all_targets_blocked namespace completed 0.1
+ }
+
$LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
+ $LFS path2fid $DIR/$tdir/d0
+
echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f0
+
echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f1
local OID=$($LFS path2fid $DIR/$tdir/d0/f1 | awk -F':' '{print $2}')
OID=$(printf %d $OID)
error "(3.1) Fail to unlink $DIR/$tdir/d0/f0"
echo "dummy" > $DIR/$tdir/d0/f0 ||
error "(3.2) Fail to touch on MDT0"
+ $LFS path2fid $DIR/$tdir/d0/f0
fi
echo "Inject failure stub on MDT0 to simulate dangling name entry"
error "(10) unexpected status"
}
+ stop_full_debug_logging
+
local repaired=$($SHOW_NAMESPACE |
awk '/^dangling_repaired/ { print $2 }')
[ $repaired -eq 1 ] ||
echo "Inject failure stub on MDT0 to simulate the case that"
echo "foo's hard links exceed the object's linkEA limitation."
- #define OBD_FAIL_LFSCK_LINKEA_OVERFLOW 0x1627
- do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1627
ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/h2 ||
error "(4) Fail to hard link to $DIR/$tdir/d0/foo"
wait_all_targets_blocked namespace completed 8
- do_facet $SINGLEMDS $LCTL set_param fail_loc=0
local repaired=$($SHOW_NAMESPACE |
awk '/^nlinks_repaired/ { print $2 }')
[ $repaired -eq 0 ] ||
[ $count2 -eq 2 ] ||
error "(11) Repaired something unexpectedly: $count2"
}
-run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
+# disable test_29c temporarily, it will be re-enabled in subsequent patch.
+#run_test 29c "Not verify nlink attr if hard links exceed linkEA limitation"
test_30() {
[ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&