+run_test 20a "Handle the orphan with dummy LOV EA slot properly"
+
+test_20b() {
+ [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return
+
+ echo "#####"
+ echo "The target MDT-object and some of its OST-object are lost."
+ echo "The LFSCK should find out the left OST-objects and re-create"
+ echo "the MDT-object under the direcotry .lustre/lost+found/MDTxxxx/"
+ echo "with the partial OST-objects (LOV EA hole)."
+
+ echo "New client can access the file with LOV EA hole via normal"
+ echo "system tools or commands without crash the system - PFL case."
+ echo "#####"
+
+ check_mount_and_prep
+
+ $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f0 ||
+ error "(0) Fail to create PFL file $DIR/$tdir/f0"
+ $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f1 ||
+ error "(1) Fail to create PFL file $DIR/$tdir/f1"
+ $LFS setstripe -E 2M -S 1M -c 2 -E -1 -S 1M -c 2 $DIR/$tdir/f2 ||
+ error "(2) Fail to create PFL file $DIR/$tdir/f2"
+
+ local bcount=$((256 * 3 + 1))
+
+ dd if=/dev/zero of=$DIR/$tdir/f0 bs=4096 count=$bcount
+ dd if=/dev/zero of=$DIR/$tdir/f1 bs=4096 count=$bcount
+ dd if=/dev/zero of=$DIR/$tdir/f2 bs=4096 count=$bcount
+
+ local fid0=$($LFS path2fid $DIR/$tdir/f0)
+ local fid1=$($LFS path2fid $DIR/$tdir/f1)
+ local fid2=$($LFS path2fid $DIR/$tdir/f2)
+
+ echo ${fid0}
+ $LFS getstripe $DIR/$tdir/f0
+ echo ${fid1}
+ $LFS getstripe $DIR/$tdir/f1
+ echo ${fid2}
+ $LFS getstripe $DIR/$tdir/f2
+
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+
+ echo "Inject failure..."
+ echo "To simulate f0 lost MDT-object"
+ #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1616
+ rm -f $DIR/$tdir/f0
+
+ echo "To simulate the case of f1 lost MDT-object and "
+ echo "the first OST-object in each PFL component"
+ #define OBD_FAIL_LFSCK_LOST_SPEOBJ 0x161a
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161a
+ rm -f $DIR/$tdir/f1
+
+ echo "To simulate the case of f2 lost MDT-object and "
+ echo "the second OST-object in each PFL component"
+ do_facet $SINGLEMDS $LCTL set_param fail_val=1
+ rm -f $DIR/$tdir/f2
+
+ sync
+ sleep 2
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+
+ echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+ $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!"
+
+ for k in $(seq $MDSCOUNT); do
+ # The LFSCK status query internal is 30 seconds. For the case
+ # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+ # time to guarantee the status sync up.
+ wait_update_facet mds${k} "$LCTL get_param -n \
+ mdd.$(facet_svc mds${k}).lfsck_layout |
+ awk '/^status/ { print \\\$2 }'" "completed" 32 ||
+ error "(4) MDS${k} is not the expected 'completed'"
+ done
+
+ for k in $(seq $OSTCOUNT); do
+ local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+ obdfilter.$(facet_svc ost${k}).lfsck_layout |
+ awk '/^status/ { print $2 }')
+ [ "$cur_status" == "completed" ] ||
+ error "(5) OST${k} Expect 'completed', but got '$cur_status'"
+ done
+
+ local repaired=$(do_facet mds1 $LCTL get_param -n \
+ mdd.$(facet_svc mds1).lfsck_layout |
+ awk '/^repaired_orphan/ { print $2 }')
+ [ $repaired -eq 8 ] ||
+ error "(6) Expect 8 fixed on mds1, but got: $repaired"
+
+ #
+ # ${fid0}-R-0 is the old f0
+ #
+ local name="$MOUNT/.lustre/lost+found/MDT0000/${fid0}-R-0"
+ echo "Check $name, which is the old f0"
+
+ $LFS getstripe -v $name || error "(7.1) cannot getstripe on $name"
+
+ local pattern=$($LFS getstripe -L -I1 $name)
+ [[ "$pattern" = "$PATTERN_WITHOUT_HOLE" ]] ||
+ error "(7.2.1) NOT expect pattern flag hole, but got $pattern"
+
+ pattern=$($LFS getstripe -L -I2 $name)
+ [[ "$pattern" = "$PATTERN_WITHOUT_HOLE" ]] ||
+ error "(7.2.2) NOT expect pattern flag hole, but got $pattern"
+
+ local stripes=$($LFS getstripe -c -I1 $name)
+ [ $stripes -eq 2 ] ||
+ error "(7.3.1) expect 2 stripes, but got $stripes"
+
+ stripes=$($LFS getstripe -c -I2 $name)
+ [ $stripes -eq 2 ] ||
+ error "(7.3.2) expect 2 stripes, but got $stripes"
+
+ local e_start=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 0 ] ||
+ error "(7.4.1) expect the COMP1 start at 0, got $e_start"
+
+ local e_end=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ $e_end -eq 2097152 ] ||
+ error "(7.4.2) expect the COMP1 end at 2097152, got $e_end"
+
+ e_start=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 2097152 ] ||
+ error "(7.5.1) expect the COMP2 start at 2097152, got $e_start"
+
+ e_end=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ "$e_end" = "EOF" ] ||
+ error "(7.5.2) expect the COMP2 end at (EOF), got $e_end"
+
+ local size=$(stat $name | awk '/Size:/ { print $2 }')
+ [ $size -eq $((4096 * $bcount)) ] ||
+ error "(7.6) expect the size $((4096 * $bcount)), but got $size"
+
+ cat $name > /dev/null || error "(7.7) cannot read $name"
+
+ echo "dummy" >> $name || error "(7.8) cannot write $name"
+
+ chown $RUNAS_ID:$RUNAS_GID $name || error "(7.9) cannot chown on $name"
+
+ touch $name || error "(7.10) cannot touch $name"
+
+ rm -f $name || error "(7.11) cannot unlink $name"
+
+ #
+ # ${fid1}-R-0 contains the old f1's second stripe in each COMP
+ #
+ name="$MOUNT/.lustre/lost+found/MDT0000/${fid1}-R-0"
+ echo "Check $name, it contains f1's second OST-object in each COMP"
+
+ $LFS getstripe -v $name || error "(8.1) cannot getstripe on $name"
+
+ pattern=$($LFS getstripe -L -I1 $name)
+ [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
+ error "(8.2.1) expect pattern flag hole, but got $pattern"
+
+ pattern=$($LFS getstripe -L -I2 $name)
+ [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
+ error "(8.2.2) expect pattern flag hole, but got $pattern"
+
+ stripes=$($LFS getstripe -c -I1 $name)
+ [ $stripes -eq 2 ] ||
+ error "(8.3.2) expect 2 stripes, but got $stripes"
+
+ stripes=$($LFS getstripe -c -I2 $name)
+ [ $stripes -eq 2 ] ||
+ error "(8.3.2) expect 2 stripes, but got $stripes"
+
+ e_start=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 0 ] ||
+ error "(8.4.1) expect the COMP1 start at 0, got $e_start"
+
+ e_end=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ $e_end -eq 2097152 ] ||
+ error "(8.4.2) expect the COMP1 end at 2097152, got $e_end"
+
+ e_start=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 2097152 ] ||
+ error "(8.5.1) expect the COMP2 start at 2097152, got $e_start"
+
+ e_end=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ "$e_end" = "EOF" ] ||
+ error "(8.5.2) expect the COMP2 end at (EOF), got $e_end"
+
+ size=$(stat $name | awk '/Size:/ { print $2 }')
+ [ $size -eq $((4096 * $bcount)) ] ||
+ error "(8.6) expect the size $((4096 * $bcount)), but got $size"
+
+ cat $name > /dev/null && error "(8.7) normal read $name should fail"
+
+ local failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
+ bs=4096 2>&1 | grep "Input/output error" | wc -l)
+
+ # The first stripe in each COMP was lost
+ [ $failures -eq 512 ] ||
+ error "(8.8) expect 512 IO failures, but get $failures"
+
+ size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
+ [ $size -eq $((4096 * $bcount)) ] ||
+ error "(8.9) expect the size $((4096 * $bcount)), but got $size"
+
+ dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 &&
+ error "(8.10) write to the LOV EA hole should fail"
+
+ dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 seek=300 ||
+ error "(8.11) write to normal stripe should NOT fail"
+
+ echo "foo" >> $name && error "(8.12) append write $name should fail"
+
+ chown $RUNAS_ID:$RUNAS_GID $name || error "(8.13) cannot chown on $name"
+
+ touch $name || error "(8.14) cannot touch $name"
+
+ rm -f $name || error "(8.15) cannot unlink $name"
+
+ #
+ # ${fid2}-R-0 contains the old f2's first stripe in each COMP
+ #
+ name="$MOUNT/.lustre/lost+found/MDT0000/${fid2}-R-0"
+ echo "Check $name, it contains f2's first stripe in each COMP"
+
+ $LFS getstripe -v $name || error "(9.1) cannot getstripe on $name"
+
+ pattern=$($LFS getstripe -L -I1 $name)
+ [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
+ error "(9.2.1) expect pattern flag hole, but got $pattern"
+
+ pattern=$($LFS getstripe -L -I2 $name)
+ [[ "$pattern" = "$PATTERN_WITH_HOLE" ]] ||
+ error "(9.2.2) expect pattern flag hole, but got $pattern"
+
+ stripes=$($LFS getstripe -c -I1 $name)
+ [ $stripes -eq 2 ] ||
+ error "(9.3.2) expect 2 stripes, but got $stripes"
+
+ stripes=$($LFS getstripe -c -I2 $name)
+ [ $stripes -eq 2 ] ||
+ error "(9.3.2) expect 2 stripes, but got $stripes"
+
+ e_start=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 0 ] ||
+ error "(9.4.1) expect the COMP1 start at 0, got $e_start"
+
+ e_end=$($LFS getstripe -I1 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ $e_end -eq 2097152 ] ||
+ error "(9.4.2) expect the COMP1 end at 2097152, got $e_end"
+
+ e_start=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_start:/ { print $2 }')
+ [ $e_start -eq 2097152 ] ||
+ error "(9.5.1) expect the COMP2 start at 2097152, got $e_start"
+
+ e_end=$($LFS getstripe -I2 $name |
+ awk '/lcme_extent.e_end:/ { print $2 }')
+ [ "$e_end" = "EOF" ] ||
+ error "(9.5.2) expect the COMP2 end at (EOF), got $e_end"
+
+ size=$(stat $name | awk '/Size:/ { print $2 }')
+ # The second stripe in COMP was lost, so we do not know there
+ # have ever been some data before. 'stat' will regard it as
+ # no data on the lost stripe.
+ bcount=$((256 * 3))
+ [ $size -eq $((4096 * $bcount)) ] ||
+ error "(9.6) expect size $((4096 * $bcount)), but got $size"
+
+ cat $name > /dev/null &&
+ error "(9.7) normal read $name should fail"
+
+ failures=$(dd if=$name of=$DIR/$tdir/dump conv=sync,noerror \
+ bs=4096 2>&1 | grep "Input/output error" | wc -l)
+ [ $failures -eq 512 ] ||
+ error "(9.8) expect 256 IO failures, but get $failures"
+
+ size=$(stat $DIR/$tdir/dump | awk '/Size:/ { print $2 }')
+ # The second stripe in COMP was lost, so we do not know there
+ # have ever been some data before. Since 'dd' skip failure,
+ # it will regard the lost stripe contains data.
+ bcount=$((256 * 4))
+ [ $size -eq $((4096 * $bcount)) ] ||
+ error "(9.9) expect the size $((4096 * $bcount)), but got $size"
+
+ dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 \
+ seek=300 && error "(9.10) write to the LOV EA hole should fail"
+
+ dd if=/dev/zero of=$name conv=sync,notrunc bs=4096 count=1 ||
+ error "(9.11) write to normal stripe should NOT fail"
+
+ echo "foo" >> $name &&
+ error "(9.12) append write $name should fail"
+
+ chown $RUNAS_ID:$RUNAS_GID $name ||
+ error "(9.13) cannot chown on $name"
+
+ touch $name || error "(9.14) cannot touch $name"
+
+ rm -f $name || error "(7.15) cannot unlink $name"
+}
+run_test 20b "Handle the orphan with dummy LOV EA slot properly - PFL case"