Whamcloud - gitweb
LU-977 lod: Patch to protect lqr_start_idx 36/14636/6
authorRahul Deshmkuh <rahul.deshmukh@seagate.com>
Mon, 18 May 2015 05:21:53 +0000 (10:51 +0530)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 8 Jul 2015 17:19:49 +0000 (17:19 +0000)
Protect lqr_start_idx to avoid the imblance in allocating objects
on OSTs with round-robin algorithm

Signed-off-by: Rahul Deshmukh <rahul.deshmukh@seagate.com>
Change-Id: I689ba1c4e8c9224cc67badba7fd6cf45e64dd7b6
Seagate-bug-id: MRP-2471
Reviewed-on: http://review.whamcloud.com/14636
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_pool.c
lustre/lod/lod_qos.c

index 6f1bff1..cd9f3a1 100644 (file)
 #define LOV_OFFSET_DEFAULT             ((__u16)-1)
 
 struct lod_qos_rr {
+       spinlock_t               lqr_alloc;     /* protect allocation index */
        __u32                    lqr_start_idx; /* start index of new inode */
-       __u32                    lqr_offset_idx; /* aliasing for start_idx */
-       int                      lqr_start_count; /* reseed counter */
+       __u32                    lqr_offset_idx;/* aliasing for start_idx */
+       int                      lqr_start_count;/* reseed counter */
        struct ost_pool          lqr_pool;      /* round-robin optimized list */
        unsigned long            lqr_dirty:1;   /* recalc round-robin list */
 };
@@ -474,6 +475,7 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                        struct thandle *th);
 int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *);
 int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *);
+void lod_qos_rr_init(struct lod_qos_rr *lqr);
 
 /* lproc_lod.c */
 int lod_procfs_init(struct lod_device *lod);
index 63b5018..0ae2f5d 100644 (file)
@@ -1314,6 +1314,7 @@ int lod_pools_init(struct lod_device *lod, struct lustre_cfg *lcfg)
        rc = lod_ost_pool_init(&lod->lod_pool_info, 0);
        if (rc)
                GOTO(out_hash, rc);
+       lod_qos_rr_init(&lod->lod_qos.lq_rr);
        rc = lod_ost_pool_init(&lod->lod_qos.lq_rr.lqr_pool, 0);
        if (rc)
                GOTO(out_pool_info, rc);
index 5caef4c..da5389c 100644 (file)
@@ -661,7 +661,7 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        if (rc)
                GOTO(out_err, rc);
 
-       memset(&new_pool->pool_rr, 0, sizeof(new_pool->pool_rr));
+       lod_qos_rr_init(&new_pool->pool_rr);
        rc = lod_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
        if (rc)
                GOTO(out_free_pool_obds, rc);
index d447d31..1cd3452 100644 (file)
@@ -525,6 +525,13 @@ static int lod_qos_used(struct lod_device *lod, struct ost_pool *osts,
        RETURN(0);
 }
 
+void lod_qos_rr_init(struct lod_qos_rr *lqr)
+{
+       spin_lock_init(&lqr->lqr_alloc);
+       lqr->lqr_dirty = 1;
+}
+
+
 #define LOV_QOS_EMPTY ((__u32)-1)
 
 /**
@@ -825,6 +832,74 @@ static int lod_qos_is_ost_used(const struct lu_env *env, int ost, __u32 stripes)
        return 0;
 }
 
+static int lod_check_and_reserve_ost(const struct lu_env *env,
+                                    struct lod_device *m,
+                                    struct obd_statfs *sfs, __u32 ost_idx,
+                                    __u32 speed, __u32 *s_idx,
+                                    struct dt_object **stripe,
+                                    struct thandle *th)
+{
+       struct dt_object   *o;
+       __u32 stripe_idx = *s_idx;
+       int rc;
+
+       rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+       if (rc) {
+               /* this OSP doesn't feel well */
+               goto out_return;
+       }
+
+       /*
+        * skip full devices
+        */
+       if (lod_qos_dev_is_full(sfs)) {
+               QOS_DEBUG("#%d is full\n", ost_idx);
+               goto out_return;
+       }
+
+       /*
+        * We expect number of precreated objects in f_ffree at
+        * the first iteration, skip OSPs with no objects ready
+        */
+       if (sfs->os_fprecreated == 0 && speed == 0) {
+               QOS_DEBUG("#%d: precreation is empty\n", ost_idx);
+               goto out_return;
+       }
+
+       /*
+        * try to use another OSP if this one is degraded
+        */
+       if (sfs->os_state & OS_STATE_DEGRADED && speed < 2) {
+               QOS_DEBUG("#%d: degraded\n", ost_idx);
+               goto out_return;
+       }
+
+       /*
+        * do not put >1 objects on a single OST
+        */
+       if (speed && lod_qos_is_ost_used(env, ost_idx, stripe_idx))
+               goto out_return;
+
+       o = lod_qos_declare_object_on(env, m, ost_idx, th);
+       if (IS_ERR(o)) {
+               CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
+                      ost_idx, (int) PTR_ERR(o));
+               rc = PTR_ERR(o);
+               goto out_return;
+       }
+
+       /*
+        * We've successfully declared (reserved) an object
+        */
+       lod_qos_ost_in_use(env, stripe_idx, ost_idx);
+       stripe[stripe_idx] = o;
+       stripe_idx++;
+       *s_idx = stripe_idx;
+
+out_return:
+       return rc;
+}
+
 /**
  * Allocate a striping using round-robin algorithm.
  *
@@ -858,7 +933,6 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
        struct pool_desc  *pool = NULL;
        struct ost_pool   *osts;
        struct lod_qos_rr *lqr;
-       struct dt_object  *o;
        unsigned int       i, array_idx;
        int                rc;
        __u32              ost_start_idx_temp;
@@ -889,6 +963,8 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
        if (rc)
                GOTO(out, rc);
 
+       down_read(&m->lod_qos.lq_rw_sem);
+       spin_lock(&lqr->lqr_alloc);
        if (--lqr->lqr_start_count <= 0) {
                lqr->lqr_start_idx = cfs_rand() % osts->op_count;
                lqr->lqr_start_count =
@@ -903,22 +979,19 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
                if (stripe_cnt > 1 && (osts->op_count % stripe_cnt) != 1)
                        ++lqr->lqr_offset_idx;
        }
-       down_read(&m->lod_qos.lq_rw_sem);
        ost_start_idx_temp = lqr->lqr_start_idx;
 
 repeat_find:
-       array_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) %
-                       osts->op_count;
 
        QOS_DEBUG("pool '%s' want %d startidx %d startcnt %d offset %d "
-                 "active %d count %d arrayidx %d\n",
+                 "active %d count %d\n",
                  lo->ldo_pool ? lo->ldo_pool : "",
                  stripe_cnt, lqr->lqr_start_idx, lqr->lqr_start_count,
-                 lqr->lqr_offset_idx, osts->op_count, osts->op_count,
-                 array_idx);
+                 lqr->lqr_offset_idx, osts->op_count, osts->op_count);
 
-       for (i = 0; i < osts->op_count && stripe_idx < lo->ldo_stripenr;
-            i++, array_idx = (array_idx + 1) % osts->op_count) {
+       for (i = 0; i < osts->op_count && stripe_idx < lo->ldo_stripenr; i++) {
+               array_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) %
+                               osts->op_count;
                ++lqr->lqr_start_idx;
                ost_idx = lqr->lqr_pool.op_array[array_idx];
 
@@ -935,58 +1008,10 @@ repeat_find:
                if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0)
                        continue;
 
-               rc = lod_statfs_and_check(env, m, ost_idx, sfs);
-               if (rc) {
-                       /* this OSP doesn't feel well */
-                       continue;
-               }
-
-               /*
-                * skip full devices
-                */
-               if (lod_qos_dev_is_full(sfs)) {
-                       QOS_DEBUG("#%d is full\n", ost_idx);
-                       continue;
-               }
-
-               /*
-                * We expect number of precreated objects in f_ffree at
-                * the first iteration, skip OSPs with no objects ready
-                */
-               if (sfs->os_fprecreated == 0 && speed == 0) {
-                       QOS_DEBUG("#%d: precreation is empty\n", ost_idx);
-                       continue;
-               }
-
-               /*
-                * try to use another OSP if this one is degraded
-                */
-               if (sfs->os_state & OS_STATE_DEGRADED && speed < 2) {
-                       QOS_DEBUG("#%d: degraded\n", ost_idx);
-                       continue;
-               }
-
-               /*
-                * do not put >1 objects on a single OST
-                */
-               if (speed && lod_qos_is_ost_used(env, ost_idx, stripe_idx))
-                       continue;
-
-               o = lod_qos_declare_object_on(env, m, ost_idx, th);
-               if (IS_ERR(o)) {
-                       CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
-                              ost_idx, (int) PTR_ERR(o));
-                       rc = PTR_ERR(o);
-                       continue;
-               }
-
-               /*
-                * We've successfully declared (reserved) an object
-                */
-               lod_qos_ost_in_use(env, stripe_idx, ost_idx);
-               stripe[stripe_idx] = o;
-               stripe_idx++;
-
+               spin_unlock(&lqr->lqr_alloc);
+               rc = lod_check_and_reserve_ost(env, m, sfs, ost_idx, speed,
+                                              &stripe_idx, stripe, th);
+               spin_lock(&lqr->lqr_alloc);
        }
        if ((speed < 2) && (stripe_idx < stripe_cnt_min)) {
                /* Try again, allowing slower OSCs */
@@ -995,6 +1020,7 @@ repeat_find:
                goto repeat_find;
        }
 
+       spin_unlock(&lqr->lqr_alloc);
        up_read(&m->lod_qos.lq_rw_sem);
 
        if (stripe_idx) {