Whamcloud - gitweb
LU-2093 lod: fall back to RR allocation when QoS fails
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Wed, 10 Oct 2012 08:32:38 +0000 (12:32 +0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 13 Oct 2012 23:23:17 +0000 (19:23 -0400)
lod_alloc_qos() checks is there enough OSPs to satisfy the request
checking OSP state with dt_statfs(), then it tries to reserve
objects on some of them. during the reservation the state of OSP
can change (due to broken connection, for example), then QoS code
might found less ready OSPs than required. this is a valid situation
and LOD should fallback to RR allocation.

sanity/116a added to verify this: dt_statfs() are still reporting
OSPs are good, but no actual object can be created on OSP with
index 1.

Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Change-Id: Iae6916f998070960eb47c71f2bc1e48adb2ac080
Reviewed-on: http://review.whamcloud.com/4241
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Tested-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lod/lod_qos.c
lustre/osp/osp_object.c
lustre/tests/sanity.sh

index 28e31e4..2a70c09 100644 (file)
@@ -699,6 +699,10 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
        if (rc)
                GOTO(out, rc);
 
+       rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+       if (rc)
+               GOTO(out, rc);
+
        if (--lqr->lqr_start_count <= 0) {
                lqr->lqr_start_idx = cfs_rand() % osts->op_count;
                lqr->lqr_start_count =
@@ -1030,6 +1034,10 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        if (rc)
                GOTO(out, rc);
 
+       rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+       if (rc)
+               GOTO(out, rc);
+
        good_osts = 0;
        /* Find all the OSTs that are valid stripe candidates */
        for (i = 0; i < osts->op_count; i++) {
@@ -1140,27 +1148,42 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                         */
                        if (lod_qos_is_ost_used(env, idx, nfound))
                                continue;
+                       lod_qos_ost_in_use(env, nfound, idx);
 
                        o = lod_qos_declare_object_on(env, m, idx, th);
                        if (IS_ERR(o)) {
-                               CERROR("can't declare new object on #%u: %d\n",
-                                      idx, (int) PTR_ERR(o));
+                               QOS_DEBUG("can't declare object on #%u: %d\n",
+                                         idx, (int) PTR_ERR(o));
                                continue;
                        }
-                       lod_qos_ost_in_use(env, nfound, idx);
                        lo->ldo_stripe[nfound++] = o;
                        lod_qos_used(m, osts, idx, &total_weight);
                        rc = 0;
                        break;
                }
+       }
 
-               /* should never satisfy below condition */
-               if (rc) {
-                       CERROR("Didn't find any OSTs?\n");
-                       break;
+       if (unlikely(nfound != stripe_cnt)) {
+               /*
+                * when the decision to use weighted algorithm was made
+                * we had enough appropriate OSPs, but this state can
+                * change anytime (no space on OST, broken connection, etc)
+                * so it's possible OSP won't be able to provide us with
+                * an object due to just changed state
+                */
+               LCONSOLE_INFO("wanted %d, found %d\n", stripe_cnt, nfound);
+               for (i = 0; i < nfound; i++) {
+                       LASSERT(lo->ldo_stripe[i]);
+                       lu_object_put(env, &lo->ldo_stripe[i]->do_lu);
+                       lo->ldo_stripe[i] = NULL;
                }
+
+               /* makes sense to rebalance next time */
+               m->lod_qos.lq_dirty = 1;
+               m->lod_qos.lq_same_space = 0;
+
+               rc = -EAGAIN;
        }
-       LASSERT(nfound == stripe_cnt);
 
 out:
        cfs_up_write(&m->lod_qos.lq_rw_sem);
@@ -1376,10 +1399,6 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                        GOTO(out, rc = -ENOMEM);
                lo->ldo_stripes_allocated = lo->ldo_stripenr;
 
-               rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
-               if (rc)
-                       GOTO(out, rc);
-
                lod_getref(d);
                /* XXX: support for non-0 files w/o objects */
                if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
index 61a7588..d2bcde5 100644 (file)
@@ -171,6 +171,12 @@ static int osp_declare_object_create(const struct lu_env *env,
 
        ENTRY;
 
+       /* should happen to non-0 OSP only so that at least one object
+        * has been already declared in the scenario and LOD should
+        * cleanup that */
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1)
+               RETURN(-ENOSPC);
+
        LASSERT(d->opd_last_used_file);
        fid = lu_object_fid(&dt->do_lu);
 
index d9c0afa..27970e0 100644 (file)
@@ -5963,7 +5963,7 @@ free_min_max () {
        echo Max free space: OST $MAXI: $MAXV
 }
 
-test_116() {
+test_116a() { # was previously test_116()
        [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs" && return
 
        echo -n "Free space priority "
@@ -6046,7 +6046,21 @@ test_116() {
 
        rm -rf $DIR/$tdir
 }
-run_test 116 "stripe QOS: free space balance ==================="
+run_test 116a "stripe QOS: free space balance ==================="
+
+test_116b() { # LU-2093
+#define OBD_FAIL_MDS_OSC_CREATE_FAIL     0x147
+       local old_rr
+       old_rr=$(do_facet $SINGLEMDS lctl get_param -n lov.*mdtlov*.qos_threshold_rr)
+       do_facet $SINGLEMDS lctl set_param lov.*mdtlov*.qos_threshold_rr 0
+       mkdir -p $DIR/$tdir
+       do_facet $SINGLEMDS lctl set_param fail_loc=0x147
+       createmany -o $DIR/$tdir/f- 20 || error "can't create"
+       do_facet $SINGLEMDS lctl set_param fail_loc=0
+       rm -rf $DIR/$tdir
+       do_facet $SINGLEMDS lctl set_param lov.*mdtlov*.qos_threshold_rr $old_rr
+}
+run_test 116b "QoS shouldn't LBUG if not enough OSTs found on the 2nd pass"
 
 test_117() # bug 10891
 {