From 03b988a9a30ead5b360caef88ef130284869c27c Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Wed, 10 Oct 2012 12:32:38 +0400 Subject: [PATCH] LU-2093 lod: fall back to RR allocation when QoS fails lod_alloc_qos() checks is there enough OSPs to satisfy the request checking OSP state with dt_statfs(), then it tries to reserve objects on some of them. during the reservation the state of OSP can change (due to broken connection, for example), then QoS code might found less ready OSPs than required. this is a valid situation and LOD should fallback to RR allocation. sanity/116a added to verify this: dt_statfs() are still reporting OSPs are good, but no actual object can be created on OSP with index 1. Signed-off-by: Alex Zhuravlev Change-Id: Iae6916f998070960eb47c71f2bc1e48adb2ac080 Reviewed-on: http://review.whamcloud.com/4241 Tested-by: Hudson Tested-by: Maloo Tested-by: Oleg Drokin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/lod/lod_qos.c | 43 +++++++++++++++++++++++++++++++------------ lustre/osp/osp_object.c | 6 ++++++ lustre/tests/sanity.sh | 18 ++++++++++++++++-- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 28e31e4..2a70c09 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -699,6 +699,10 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo, if (rc) GOTO(out, rc); + rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr); + if (rc) + GOTO(out, rc); + if (--lqr->lqr_start_count <= 0) { lqr->lqr_start_idx = cfs_rand() % osts->op_count; lqr->lqr_start_count = @@ -1030,6 +1034,10 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo, if (rc) GOTO(out, rc); + rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr); + if (rc) + GOTO(out, rc); + good_osts = 0; /* Find all the OSTs that are valid stripe candidates */ for (i = 0; i < osts->op_count; i++) { @@ -1140,27 +1148,42 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo, */ if (lod_qos_is_ost_used(env, idx, nfound)) continue; + lod_qos_ost_in_use(env, nfound, idx); o = lod_qos_declare_object_on(env, m, idx, th); if (IS_ERR(o)) { - CERROR("can't declare new object on #%u: %d\n", - idx, (int) PTR_ERR(o)); + QOS_DEBUG("can't declare object on #%u: %d\n", + idx, (int) PTR_ERR(o)); continue; } - lod_qos_ost_in_use(env, nfound, idx); lo->ldo_stripe[nfound++] = o; lod_qos_used(m, osts, idx, &total_weight); rc = 0; break; } + } - /* should never satisfy below condition */ - if (rc) { - CERROR("Didn't find any OSTs?\n"); - break; + if (unlikely(nfound != stripe_cnt)) { + /* + * when the decision to use weighted algorithm was made + * we had enough appropriate OSPs, but this state can + * change anytime (no space on OST, broken connection, etc) + * so it's possible OSP won't be able to provide us with + * an object due to just changed state + */ + LCONSOLE_INFO("wanted %d, found %d\n", stripe_cnt, nfound); + for (i = 0; i < nfound; i++) { + LASSERT(lo->ldo_stripe[i]); + lu_object_put(env, &lo->ldo_stripe[i]->do_lu); + lo->ldo_stripe[i] = NULL; } + + /* makes sense to rebalance next time */ + m->lod_qos.lq_dirty = 1; + m->lod_qos.lq_same_space = 0; + + rc = -EAGAIN; } - LASSERT(nfound == stripe_cnt); out: cfs_up_write(&m->lod_qos.lq_rw_sem); @@ -1376,10 +1399,6 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, GOTO(out, rc = -ENOMEM); lo->ldo_stripes_allocated = lo->ldo_stripenr; - rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr); - if (rc) - GOTO(out, rc); - lod_getref(d); /* XXX: support for non-0 files w/o objects */ if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) { diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 61a7588..d2bcde5 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -171,6 +171,12 @@ static int osp_declare_object_create(const struct lu_env *env, ENTRY; + /* should happen to non-0 OSP only so that at least one object + * has been already declared in the scenario and LOD should + * cleanup that */ + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1) + RETURN(-ENOSPC); + LASSERT(d->opd_last_used_file); fid = lu_object_fid(&dt->do_lu); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d9c0afa..27970e0 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -5963,7 +5963,7 @@ free_min_max () { echo Max free space: OST $MAXI: $MAXV } -test_116() { +test_116a() { # was previously test_116() [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs" && return echo -n "Free space priority " @@ -6046,7 +6046,21 @@ test_116() { rm -rf $DIR/$tdir } -run_test 116 "stripe QOS: free space balance ===================" +run_test 116a "stripe QOS: free space balance ===================" + +test_116b() { # LU-2093 +#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147 + local old_rr + old_rr=$(do_facet $SINGLEMDS lctl get_param -n lov.*mdtlov*.qos_threshold_rr) + do_facet $SINGLEMDS lctl set_param lov.*mdtlov*.qos_threshold_rr 0 + mkdir -p $DIR/$tdir + do_facet $SINGLEMDS lctl set_param fail_loc=0x147 + createmany -o $DIR/$tdir/f- 20 || error "can't create" + do_facet $SINGLEMDS lctl set_param fail_loc=0 + rm -rf $DIR/$tdir + do_facet $SINGLEMDS lctl set_param lov.*mdtlov*.qos_threshold_rr $old_rr +} +run_test 116b "QoS shouldn't LBUG if not enough OSTs found on the 2nd pass" test_117() # bug 10891 { -- 1.8.3.1