sub_io->ci_lock_no_expand = io->ci_lock_no_expand;
sub_io->ci_ndelay = io->ci_ndelay;
sub_io->ci_layout_version = io->ci_layout_version;
+ sub_io->ci_tried_all_mirrors = io->ci_tried_all_mirrors;
result = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj);
found = true;
break;
}
- }
-
+ } /* each component of the mirror */
if (found) {
index = (index + i) % comp->lo_mirror_count;
break;
}
- }
+ } /* each mirror */
+
if (i == comp->lo_mirror_count) {
CERROR(DFID": failed to find a component covering "
"I/O region at %llu\n",
* of this client has been partitioned. We should relinquish CPU for
* a while before trying again.
*/
- ++io->ci_ndelay_tried;
- if (io->ci_ndelay && io->ci_ndelay_tried >= comp->lo_mirror_count) {
+ if (io->ci_ndelay && io->ci_ndelay_tried > 0 &&
+ (io->ci_ndelay_tried % comp->lo_mirror_count == 0)) {
set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(1)); /* 10ms */
+ schedule_timeout(cfs_time_seconds(1) / 100); /* 10ms */
if (signal_pending(current))
RETURN(-EINTR);
- /* reset retry counter */
- io->ci_ndelay_tried = 1;
+ /**
+ * we'd set ci_tried_all_mirrors to turn off fast mirror
+ * switching for read after we've tried all mirrors several
+ * rounds.
+ */
+ io->ci_tried_all_mirrors = io->ci_ndelay_tried %
+ (comp->lo_mirror_count * 4) == 0;
}
+ ++io->ci_ndelay_tried;
CDEBUG(D_VFSTRACE, "use %sdelayed RPC state for this IO\n",
io->ci_ndelay ? "non-" : "");
case CIT_READ:
case CIT_WRITE: {
io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent);
+ io->ci_tried_all_mirrors = parent->ci_tried_all_mirrors;
if (cl_io_is_append(parent)) {
io->u.ci_wr.wr_append = 1;
} else {
}
EXPORT_SYMBOL(osc_io_commit_async);
+static bool osc_import_not_healthy(struct obd_import *imp)
+{
+ return imp->imp_invalid || imp->imp_deactive ||
+ !(imp->imp_state == LUSTRE_IMP_FULL ||
+ imp->imp_state == LUSTRE_IMP_IDLE);
+}
+
int osc_io_iter_init(const struct lu_env *env, const struct cl_io_slice *ios)
{
struct osc_object *osc = cl2osc(ios->cis_obj);
struct obd_import *imp = osc_cli(osc)->cl_import;
struct osc_io *oio = osc_env_io(env);
int rc = -EIO;
+ ENTRY;
spin_lock(&imp->imp_lock);
- if (likely(!imp->imp_invalid)) {
+ /**
+ * check whether this OSC device is available for non-delay read,
+ * fast switching mirror if we haven't tried all mirrors.
+ */
+ if (ios->cis_io->ci_type == CIT_READ && ios->cis_io->ci_ndelay &&
+ !ios->cis_io->ci_tried_all_mirrors && osc_import_not_healthy(imp)) {
+ rc = -EWOULDBLOCK;
+ } else if (likely(!imp->imp_invalid)) {
atomic_inc(&osc->oo_nr_ios);
oio->oi_is_active = 1;
rc = 0;
if (cfs_capable(CFS_CAP_SYS_RESOURCE))
oio->oi_cap_sys_resource = 1;
- return rc;
+ RETURN(rc);
}
EXPORT_SYMBOL(osc_io_iter_init);
verify_comp_attr_with_parent pool $tf $comp_id
}
+verify_flr_state()
+{
+ local tf=$1
+ local expected_state=$2
+
+ local state=$($LFS getstripe -v $tf | awk '/lcm_flags/{ print $2 }')
+ [ $expected_state = $state ] ||
+ error "expected: $expected_state, actual $state"
+}
+
# command line test cases
test_0a() {
local td=$DIR/$tdir
}
run_test 32 "data should be mirrored to newly created mirror"
-test_33() {
+test_33a() {
[[ $OSTCOUNT -lt 2 ]] && skip "need >= 2 OSTs" && return
rm -f $DIR/$tfile $DIR/$tfile-2
start_osts 2
}
-run_test 33 "read can choose available mirror to read"
+run_test 33a "read can choose available mirror to read"
+
+test_33b() {
+ [[ $OSTCOUNT -lt 2 ]] && skip "need >= 2 OSTs" && return
+
+ rm -f $DIR/$tfile
+
+ stack_trap "rm -f $DIR/$tfile" EXIT
+
+ # create a file with two mirrors on OST0000 and OST0001
+ $LFS setstripe -N -Eeof -o0 -N -Eeof -o1 $DIR/$tfile
+
+ # make sure that $tfile has two mirrors
+ [ $($LFS getstripe -N $DIR/$tfile) -eq 2 ] ||
+ { $LFS getstripe $DIR/$tfile; error "expected count 2"; }
+
+ # write 50M
+ dd if=/dev/urandom of=$DIR/$tfile bs=2M count=25 ||
+ error "write failed for $DIR/$tfile"
+ $LFS mirror resync $DIR/$tfile || error "resync failed for $DIR/$tfile"
+ verify_flr_state $DIR/$tfile "ro"
+ drop_client_cache
+
+ ls -l $DIR/$tfile
+
+ # read file - all OSTs are available
+ echo "reading file (data can be provided by any ost)... "
+ local t1=$SECONDS
+ time cat $DIR/$tfile > /dev/null || error "read all"
+ local t2=$SECONDS
+ ra=$((t2 - t1))
+
+ # read file again with ost1 {OST0000} failed
+ stop_osts 1
+ drop_client_cache
+ echo "reading file (data should be provided by ost2)..."
+ t1=$SECONDS
+ time cat $DIR/$tfile > /dev/null || error "read ost2"
+ t2=$SECONDS
+ r1=$((t2 - t1))
+
+ # remount ost1
+ start_osts 1
+
+ # read file again with ost2 {OST0001} failed
+ stop_osts 2
+ drop_client_cache
+
+ echo "reading file (data should be provided by ost1)..."
+ t1=$SECONDS
+ time cat $DIR/$tfile > /dev/null || error "read ost1"
+ t2=$SECONDS
+ r2=$((t2 - t1))
+
+ # remount ost2
+ start_osts 2
+
+ [ $((r1 * 100)) -gt $((ra * 105)) -a $r1 -gt $((ra + 2)) ] &&
+ error "read mirror too slow without ost1, from $ra to $r1"
+ [ $((r2 * 100)) -gt $((ra * 105)) -a $r2 -gt $((ra + 2)) ] &&
+ error "read mirror too slow without ost2, from $ra to $r2"
+
+ wait_osc_import_ready client ost2
+}
+run_test 33b "avoid reading from unhealthy mirror"
+
+test_33c() {
+ [[ $OSTCOUNT -lt 3 ]] && skip "need >= 3 OSTs" && return
+
+ rm -f $DIR/$tfile
+
+ stack_trap "rm -f $DIR/$tfile" EXIT
+
+ # create a file with two mirrors
+ # mirror1: {OST0000, OST0001}
+ # mirror2: {OST0001, OST0002}
+ $LFS setstripe -N -Eeof -c2 -o0,1 -N -Eeof -c2 -o1,2 $DIR/$tfile
+
+ # make sure that $tfile has two mirrors
+ [ $($LFS getstripe -N $DIR/$tfile) -eq 2 ] ||
+ { $LFS getstripe $DIR/$tfile; error "expected count 2"; }
+
+ # write 50M
+ dd if=/dev/urandom of=$DIR/$tfile bs=2M count=25 ||
+ error "write failed for $DIR/$tfile"
+ $LFS mirror resync $DIR/$tfile || error "resync failed for $DIR/$tfile"
+ verify_flr_state $DIR/$tfile "ro"
+ drop_client_cache
+
+ ls -l $DIR/$tfile
+
+ # read file - all OSTs are available
+ echo "reading file (data can be provided by any ost)... "
+ time cat $DIR/$tfile > /dev/null || error "read all"
+
+ # read file again with ost2 (OST0001) failed
+ stop_osts 2
+ drop_client_cache
+
+ echo "reading file (data should be provided by ost1 and ost3)..."
+ time cat $DIR/$tfile > /dev/null || error "read ost1 & ost3"
+
+ # remount ost2
+ start_osts 2
+
+ wait_osc_import_ready client ost2
+}
+run_test 33c "keep reading among unhealthy mirrors"
test_34a() {
[[ $OSTCOUNT -lt 4 ]] && skip "need >= 4 OSTs" && return
}
run_test 37 "mirror I/O API verification"
-verify_flr_state()
-{
- local tf=$1
- local expected_state=$2
-
- local state=$($LFS getstripe -v $tf | awk '/lcm_flags/{ print $2 }')
- [ $expected_state = $state ] ||
- error "expected: $expected_state, actual $state"
-}
-
test_38() {
local tf=$DIR/$tfile
local ref=$DIR/${tfile}-ref