Whamcloud - gitweb
LU-13073 osp: don't block waiting for new objects 74/40274/42
authorAlex Zhuravlev <bzzz@whamcloud.com>
Fri, 16 Oct 2020 16:09:04 +0000 (19:09 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 10 Mar 2021 08:03:52 +0000 (08:03 +0000)
if OST is down, then it's possible that few threads trying
to get already precreated object will get stuck. even worse
that all QoS-based allocations then are serialized by the
single semaphore, even those that wouldn't try to allocate
on failed OST.

the patch introduces noblock flag in the allocation hint
which is passed to OSP. then QoS code tries to allocate
objects in a non-blocking manner.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I38e66d7569aefecf800dbc32f1049ac87853439e
Reviewed-on: https://review.whamcloud.com/40274
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Yingjin Qian <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/dt_object.h
lustre/lod/lod_internal.h
lustre/lod/lod_qos.c
lustre/osp/osp_internal.h
lustre/osp/osp_object.c
lustre/osp/osp_precreate.c
lustre/tests/sanity.sh

index 5cc8b26..5d3878a 100644 (file)
@@ -391,6 +391,7 @@ struct dt_allocation_hint {
        int                     dah_eadata_len;
        __u32                   dah_mode;
        int                     dah_append_stripes;
+       bool                    dah_can_block;
        char                    *dah_append_pool;
 };
 
index 68d890d..3ab3e3f 100644 (file)
@@ -393,6 +393,7 @@ struct lod_thread_info {
        /* object allocation avoid guide info */
        struct lod_avoid_guide          lti_avoid;
        union lmv_mds_md                lti_lmv;
+       struct dt_allocation_hint       lti_ah;
 };
 
 extern const struct lu_device_operations lod_lu_ops;
index f4a7783..40376a0 100644 (file)
@@ -370,8 +370,10 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd,
 static struct dt_object *lod_qos_declare_object_on(const struct lu_env *env,
                                                   struct lod_device *d,
                                                   __u32 ost_idx,
+                                                  bool can_block,
                                                   struct thandle *th)
 {
+       struct dt_allocation_hint *ah = &lod_env_info(env)->lti_ah;
        struct lod_tgt_desc *ost;
        struct lu_object *o, *n;
        struct lu_device *nd;
@@ -405,7 +407,8 @@ static struct dt_object *lod_qos_declare_object_on(const struct lu_env *env,
 
        dt = container_of(n, struct dt_object, do_lu);
 
-       rc = lod_sub_declare_create(env, dt, NULL, NULL, NULL, th);
+       ah->dah_can_block = can_block;
+       rc = lod_sub_declare_create(env, dt, NULL, ah, NULL, th);
        if (rc < 0) {
                CDEBUG(D_OTHER, "can't declare creation on #%u: %d\n",
                       ost_idx, rc);
@@ -683,7 +686,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
                        RETURN(rc);
        }
 
-       o = lod_qos_declare_object_on(env, lod, ost_idx, th);
+       o = lod_qos_declare_object_on(env, lod, ost_idx, true, th);
        if (IS_ERR(o)) {
                CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
                       ost_idx, (int) PTR_ERR(o));
@@ -1162,7 +1165,7 @@ static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
                if (rc < 0) /* this OSP doesn't feel well */
                        break;
 
-               o = lod_qos_declare_object_on(env, m, ost_idx, th);
+               o = lod_qos_declare_object_on(env, m, ost_idx, true, th);
                if (IS_ERR(o)) {
                        rc = PTR_ERR(o);
                        CDEBUG(D_OTHER,
@@ -1321,7 +1324,7 @@ repeat_find:
                if (i && !tgt->ltd_statfs.os_fprecreated && !speed)
                        continue;
 
-               o = lod_qos_declare_object_on(env, m, ost_idx, th);
+               o = lod_qos_declare_object_on(env, m, ost_idx, true, th);
                if (IS_ERR(o)) {
                        CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
                               ost_idx, (int) PTR_ERR(o));
@@ -1423,6 +1426,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        __u32 nfound, good_osts, stripe_count, stripe_count_min;
        bool overstriped = false;
        int stripes_per_ost = 1;
+       bool slow = false;
        int rc = 0;
        ENTRY;
 
@@ -1528,6 +1532,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                 */
                for (i = 0; i < osts->op_count; i++) {
                        __u32 idx = osts->op_array[i];
+                       struct lod_tgt_desc *ost = OST_TGT(lod, idx);
 
                        if (lod_should_avoid_ost(lo, lag, idx))
                                continue;
@@ -1563,7 +1568,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                                        continue;
                        }
 
-                       o = lod_qos_declare_object_on(env, lod, idx, th);
+                       o = lod_qos_declare_object_on(env, lod, idx, slow, th);
                        if (IS_ERR(o)) {
                                QOS_DEBUG("can't declare object on #%u: %d\n",
                                          idx, (int) PTR_ERR(o));
@@ -1580,6 +1585,13 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                        break;
                }
 
+               if (rc && !slow && nfound < stripe_count) {
+                       /* couldn't allocate using precreated objects
+                        * so try to wait for new precreations */
+                       slow = true;
+                       rc = 0;
+               }
+
                if (rc) {
                        /* no OST found on this iteration, give up */
                        break;
index 7a4418e..6d66ceb 100644 (file)
@@ -850,7 +850,8 @@ extern const struct dt_index_operations osp_md_index_ops;
 
 /* osp_precreate.c */
 int osp_init_precreate(struct osp_device *d);
-int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d);
+int osp_precreate_reserve(const struct lu_env *env,
+                         struct osp_device *d, bool can_block);
 __u64 osp_precreate_get_id(struct osp_device *d);
 int osp_precreate_get_fid(const struct lu_env *env, struct osp_device *d,
                          struct lu_fid *fid);
index 494cc4c..77bd82f 100644 (file)
@@ -1457,7 +1457,7 @@ static int osp_declare_create(const struct lu_env *env, struct dt_object *dt,
         * in declaration we need to reserve object so that we don't block
         * awaiting precreation RPC to complete
         */
-       rc = osp_precreate_reserve(env, d);
+       rc = osp_precreate_reserve(env, d, !hint || hint->dah_can_block);
        /*
         * we also need to declare update to local "last used id" file for
         * recovery if object isn't used for a reason, we need to release
index 6a0a435..c25bbe9 100644 (file)
@@ -1412,7 +1412,8 @@ static int osp_precreate_ready_condition(const struct lu_env *env,
  * \retval             -EAGAIN try later, slow precreation in progress
  * \retval             -EIO when no access to OST
  */
-int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
+int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d,
+                         bool can_block)
 {
        time64_t expire = ktime_get_seconds() + obd_timeout;
        int precreated, rc, synced = 0;
@@ -1500,6 +1501,12 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
                        break;
                }
 
+               if (!can_block) {
+                       LASSERT(d->opd_pre);
+                       rc = -ENOBUFS;
+                       break;
+               }
+
                if (wait_event_idle_timeout(
                            d->opd_pre_user_waitq,
                            osp_precreate_ready_condition(env, d),
index f23237a..d49d244 100755 (executable)
@@ -1908,6 +1908,57 @@ test_27o() {
 }
 run_test 27o "create file with all full OSTs (should error)"
 
+function create_and_checktime() {
+       local fname=$1
+       local loops=$2
+       local i
+
+       for ((i=0; i < $loops; i++)); do
+               local start=$SECONDS
+               multiop $fname-$i Oc
+               ((SECONDS-start < TIMEOUT)) ||
+                       error "creation took " $((SECONDS-$start)) && return 1
+       done
+}
+
+test_27oo() {
+       local mdts=$(comma_list $(mdts_nodes))
+
+       [ $MDS1_VERSION -lt $(version_code 2.13.57) ] &&
+               skip "Need MDS version at least 2.13.57"
+
+       local f0=$DIR/${tfile}-0
+       local f1=$DIR/${tfile}-1
+
+       wait_delete_completed
+
+       # refill precreated objects
+       $LFS setstripe -i0 -c1 $f0
+
+       saved=$(do_facet mds1 $LCTL get_param -n lov.*0000*.qos_threshold_rr)
+       # force QoS allocation policy
+       do_nodes $mdts $LCTL set_param lov.*.qos_threshold_rr=0%
+       stack_trap "do_nodes $mdts $LCTL set_param \
+               lov.*.qos_threshold_rr=$saved" EXIT
+       sleep_maxage
+
+       # one OST is unavailable, but still have few objects preallocated
+       stop ost1
+       stack_trap "start ost1 $(ostdevname 1) $OST_MOUNT_OPTS; \
+               rm -rf $f1 $DIR/$tdir*" EXIT
+
+       for ((i=0; i < 7; i++)); do
+               mkdir $DIR/$tdir$i || error "can't create dir"
+               $LFS setstripe -c$((OSTCOUNT-1)) $DIR/$tdir$i ||
+                       error "can't set striping"
+       done
+       for ((i=0; i < 7; i++)); do
+               create_and_checktime $DIR/$tdir$i/$tfile 100 &
+       done
+       wait
+}
+run_test 27oo "don't let few threads to reserve too many objects"
+
 test_27p() {
        [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs"
        [ $PARALLEL == "yes" ] && skip "skip parallel run"