Whamcloud - gitweb
LU-16623 lod: handle object allocation consistently 50/50250/13
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 8 Mar 2023 23:40:21 +0000 (16:40 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 14 Jun 2023 21:39:57 +0000 (21:39 +0000)
Consistently handle the various OS_STATFS_* flags that indicate
an OST or MDT is full or otherwise marked ineligible for use.

Fix lod_statfs_check() so it skips MDTs with OS_STATFS_ENOINO
for allocating dir stripes instead of only checking OST targets.

In the LOD code, ltd_active=0 indicates that the device is not
usable for new object allocations for a variety of reasons. That
includes out of space or inodes, read-only, max_create_count=0,
or disconnected export, not *only* that the OSP is disconnected
from the OST as with imp_deactive.  Targets marked ltd_active=0
will not be counted in ld_active_tgt_count, so these OSTs will
not count toward stripe_count for stripe_count=-1 files.

Set flags = LOD_USES_DEFAULT_STRIPE in lod_qos_prep_create() for
stripe_count = -1 layouts and pass it to lod_stripe_count_min()
to avoid use of *all* OSTs when free space is imbalanced or OSTs
are not available, and be happy with allocations on 3/4 of OSTs.
It looks like this functionality was missed when object allocations
transitioned from the LOV to LOD module.  Put the LOV_USES_* into
an enum and rename to LOD_USES_* for consistency with current code.

Apply the lod.*.max_stripe_count limits to PFL components as well
as plain file layouts in lod_comp_entry_stripe_count().

Rename ltd_connecting to ltd_discon, since there is no guarantee
that this target is actually *connecting*, only that it is currently
disconnected.  Use ltd_discon in places that checked ltd_active to
decide if the OSP was disconnected from the OST, which shouldn't be
skipped just because the OST is full or has creates disabled.

Fixes: 7b124fef76 ("LU-4277 lod: handle os_state as a flag, check READONLY")
Fixes: 5b147e47de ("LU-11115 lod: skip max_create_count=0 OST in QoS and RR algorithms")
Fixes: c7f2e70a27 ("LU-1303 lod: QoS allocation policy")
Fixes: c1d0a355a6 ("LU-12624 lod: alloc dir stripes by QoS")
Fixes: 3c9580931d ("LU-9162 lod: option to set max stripe count per filesystem")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Sergey Cheremencev <scherementsev@ddn.com>
Change-Id: Ifb9443fe6c80b4d7f82b442060db7ac8423ebbe5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50250
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
lustre/include/lu_object.h
lustre/lod/lod_dev.c
lustre/lod/lod_internal.h
lustre/lod/lod_object.c
lustre/lod/lod_pool.c
lustre/lod/lod_qos.c
lustre/osp/osp_dev.c
lustre/tests/sanity-flr.sh
lustre/tests/sanity-pfl.sh

index fe03fd2..7075042 100644 (file)
@@ -1669,11 +1669,11 @@ struct lu_tgt_desc {
        struct lu_tgt_qos  ltd_qos; /* qos info per target */
        struct obd_statfs  ltd_statfs;
        time64_t           ltd_statfs_age;
-       unsigned long      ltd_active:1,/* is this target up for requests */
-                          ltd_activate:1,/* should target be activated */
+       unsigned long      ltd_active:1,/* is target available for requests */
+                          ltd_activate:1,/* should LOV target be connected */
                           ltd_reap:1,  /* should this target be deleted */
                           ltd_got_update_log:1, /* Already got update log */
-                          ltd_connecting:1; /* target is connecting */
+                          ltd_discon:1; /* LOD target disconnected from OST */
 };
 
 static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt)
index 0cd2ea3..647dbcf 100644 (file)
@@ -1104,8 +1104,7 @@ static int lod_process_config(const struct lu_env *env,
                                rc = lod_sub_prep_llog(env, lod,
                                                       sub_tgt->ltd_tgt,
                                                       sub_tgt->ltd_index);
-                               if (rc == 0)
-                                       sub_tgt->ltd_active = 1;
+                               sub_tgt->ltd_active = !rc;
                        } else {
                                lod_sub_fini_llog(env, sub_tgt->ltd_tgt,
                                                  NULL);
@@ -1865,7 +1864,7 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev)
 
        lod_getref(&lod->lod_ost_descs);
        lod_foreach_ost(lod, tgt) {
-               if (!tgt->ltd_active)
+               if (tgt->ltd_discon)
                        continue;
                rc = dt_sync(env, tgt->ltd_tgt);
                if (rc) {
@@ -1885,7 +1884,7 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev)
 
        lod_getref(&lod->lod_mdt_descs);
        lod_foreach_mdt(lod, tgt) {
-               if (!tgt->ltd_active)
+               if (tgt->ltd_discon)
                        continue;
                rc = dt_sync(env, tgt->ltd_tgt);
                if (rc) {
@@ -2493,7 +2492,7 @@ static int lod_obd_set_info_async(const struct lu_env *env,
        d = lu2lod_dev(obd->obd_lu_dev);
        lod_getref(&d->lod_ost_descs);
        lod_foreach_ost(d, tgt) {
-               if (!tgt->ltd_active)
+               if (tgt->ltd_discon)
                        continue;
 
                rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
@@ -2505,8 +2504,9 @@ static int lod_obd_set_info_async(const struct lu_env *env,
 
        lod_getref(&d->lod_mdt_descs);
        lod_foreach_mdt(d, tgt) {
-               if (!tgt->ltd_active)
+               if (tgt->ltd_discon)
                        continue;
+
                rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
                                         vallen, val, set);
                if (rc2 != 0 && rc == 0)
index 8e41dd4..e158a25 100644 (file)
 #include <obd.h>
 #include <dt_object.h>
 
-#define LOV_USES_ASSIGNED_STRIPE        0
-#define LOV_USES_DEFAULT_STRIPE         1
+enum lod_uses_hint {
+       LOD_USES_ASSIGNED_STRIPE = 0,
+       LOD_USES_DEFAULT_STRIPE,
+};
 
 /* Special values to remove LOV EA from disk */
 #define LOVEA_DELETE_VALUES(size, count, offset, pool)                 \
@@ -719,8 +721,12 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                        int comp_idx, __u64 reserve);
 __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
                                  int comp_idx, bool is_dir);
+__u16 lod_get_stripe_count_plain(struct lod_device *lod, struct lod_object *lo,
+                                __u16 stripe_count, bool overstriping,
+                                enum lod_uses_hint *flags);
 __u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
-                          int comp_idx, __u16 stripe_count, bool overstriping);
+                          int comp_idx, __u16 stripe_count, bool overstriping,
+                          enum lod_uses_hint *flags);
 void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod,
                           struct lu_tgt_descs *ltd);
 
index bf1982d..8404932 100644 (file)
@@ -2641,11 +2641,12 @@ static int lod_replace_parent_fid(const struct lu_env *env,
        RETURN(rc);
 }
 
-__u16 lod_comp_entry_stripe_count(struct lod_object *lo,
-                                 int comp_idx, bool is_dir)
+__u16 lod_comp_entry_stripe_count(struct lod_object *lo, int comp_idx,
+                                 bool is_dir)
 {
        struct lod_device *lod = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
        struct lod_layout_component *entry;
+       enum lod_uses_hint flags = LOD_USES_ASSIGNED_STRIPE;
 
        if (is_dir)
                return  0;
@@ -2653,13 +2654,16 @@ __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
        entry = &lo->ldo_comp_entries[comp_idx];
        if (lod_comp_inited(entry))
                return entry->llc_stripe_count;
-       else if ((__u16)-1 == entry->llc_stripe_count)
-               return lod->lod_ost_count;
-       else
-               return lod_get_stripe_count(lod, lo, comp_idx,
-                                           entry->llc_stripe_count,
-                                           entry->llc_pattern &
-                                           LOV_PATTERN_OVERSTRIPING);
+       if (entry->llc_stripe_count == (__u16)-1)
+               return lod_get_stripe_count_plain(lod, lo,
+                                                 entry->llc_stripe_count,
+                                                 entry->llc_pattern &
+                                                     LOV_PATTERN_OVERSTRIPING,
+                                                 &flags);
+
+       return lod_get_stripe_count(lod, lo, comp_idx, entry->llc_stripe_count,
+                                entry->llc_pattern & LOV_PATTERN_OVERSTRIPING,
+                                &flags);
 }
 
 static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
@@ -7668,12 +7672,9 @@ static inline int lod_check_ost_avail(const struct lu_env *env,
        }
 
        ost = OST_TGT(lod, idx);
-       if (ost->ltd_statfs.os_state &
-               (OS_STATFS_READONLY | OS_STATFS_ENOSPC | OS_STATFS_ENOINO |
-                OS_STATFS_NOPRECREATE) ||
-           ost->ltd_active == 0) {
-               CDEBUG(D_LAYOUT, DFID ": mirror %d OST%d unavail, rc = %d\n",
-                      PFID(lod_object_fid(lo)), index, idx, rc);
+       if (ost->ltd_active == 0) {
+               CDEBUG(D_LAYOUT, DFID ": mirror %d OST%d unavail\n",
+                      PFID(lod_object_fid(lo)), index, idx);
                return 0;
        }
 
index a2e7df4..3f05f3f 100644 (file)
@@ -794,7 +794,7 @@ void lod_spill_target_refresh(const struct lu_env *env, struct lod_device *lod,
                if (!test_bit(idx, lod->lod_ost_bitmap))
                        continue;
                tgt = OST_TGT(lod, idx);
-               if (tgt->ltd_active == 0)
+               if (!tgt->ltd_active)
                        continue;
                sfs = &tgt->ltd_statfs;
 
index 978e81c..c68a843 100644 (file)
 #define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \
                       OST_TGT(lod,i)->ltd_statfs.os_bsize)
 
+/* check whether a target is available for new object allocation */
 static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
                                   struct lu_tgt_desc *tgt)
 {
        struct obd_statfs *sfs = &tgt->ltd_statfs;
 
-       if (((sfs->os_state & OS_STATFS_ENOSPC) ||
-           (!ltd->ltd_is_mdt && sfs->os_state & OS_STATFS_ENOINO &&
-            sfs->os_fprecreated == 0)))
+       if (sfs->os_state & OS_STATFS_ENOSPC ||
+           (sfs->os_state & OS_STATFS_ENOINO &&
+            /* OST allocation allowed while precreated objects available */
+            (ltd->ltd_is_mdt || sfs->os_fprecreated == 0)))
                return -ENOSPC;
 
        /* If the OST is readonly then we can't allocate objects there */
        if (sfs->os_state & OS_STATFS_READONLY)
                return -EROFS;
 
-       /* object precreation is skipped on the OST with max_create_count=0 */
-       if (!ltd->ltd_is_mdt && sfs->os_state & OS_STATFS_NOPRECREATE)
+       /* object precreation is skipped on targets with max_create_count=0 */
+       if (sfs->os_state & OS_STATFS_NOPRECREATE)
                return -ENOBUFS;
 
        return 0;
@@ -112,27 +114,25 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
        info.os_enable_pre = 1;
        rc = dt_statfs_info(env, tgt->ltd_tgt, &tgt->ltd_statfs, &info);
        if (rc && rc != -ENOTCONN)
-               CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
+               CERROR("%s: statfs error: rc = %d\n", lod2obd(d)->obd_name, rc);
 
-       if (!rc) {
+       if (!rc)
                rc = lod_statfs_check(ltd, tgt);
-               if (rc == -ENOSPC)
-                       return rc;
-       }
 
+       /* reserving space shouldn't be enough to mark an OST inactive */
        if (reserve &&
            (reserve + (info.os_reserved_mb_low << 20) >
             tgt->ltd_statfs.os_bavail * tgt->ltd_statfs.os_bsize))
                return -ENOSPC;
 
        /* check whether device has changed state (active, inactive) */
-       if (rc != 0 && tgt->ltd_active) {
+       if (rc && tgt->ltd_active) {
                /* turned inactive? */
                spin_lock(&d->lod_lock);
                if (tgt->ltd_active) {
                        tgt->ltd_active = 0;
                        if (rc == -ENOTCONN)
-                               tgt->ltd_connecting = 1;
+                               tgt->ltd_discon = 1;
 
                        LASSERT(desc->ld_active_tgt_count > 0);
                        desc->ld_active_tgt_count--;
@@ -141,15 +141,15 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
                               tgt->ltd_exp->exp_obd->obd_name);
                }
                spin_unlock(&d->lod_lock);
-       } else if (rc == 0 && tgt->ltd_active == 0) {
+       } else if (rc == 0 && !tgt->ltd_active) {
                /* turned active? */
+               spin_lock(&d->lod_lock);
                LASSERTF(desc->ld_active_tgt_count < desc->ld_tgt_count,
                         "active tgt count %d, tgt nr %d\n",
                         desc->ld_active_tgt_count, desc->ld_tgt_count);
-               spin_lock(&d->lod_lock);
-               if (tgt->ltd_active == 0) {
+               if (!tgt->ltd_active) {
                        tgt->ltd_active = 1;
-                       tgt->ltd_connecting = 0;
+                       tgt->ltd_discon = 0;
                        desc->ld_active_tgt_count++;
                        set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
                        CDEBUG(D_CONFIG, "%s: turns active\n",
@@ -168,20 +168,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
        RETURN(rc);
 }
 
-static int lod_is_tgt_usable(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
-{
-       int rc;
-
-       rc = lod_statfs_check(ltd, tgt);
-       if (rc)
-               return rc;
-
-       if (!tgt->ltd_active)
-               return -ENOTCONN;
-
-       return 0;
-}
-
 /**
  * Maintain per-target statfs data.
  *
@@ -424,17 +410,18 @@ out:
 /**
  * Calculate a minimum acceptable stripe count.
  *
- * Return an acceptable stripe count depending on flag LOV_USES_DEFAULT_STRIPE:
- * all stripes or 3/4 of stripes.
+ * Return an acceptable stripe count depending on flag LOD_USES_DEFAULT_STRIPE:
+ * all stripes or 3/4 of stripes.  The code is written this way to avoid
+ * returning 0 for stripe_count < 4, like "stripe_count * 3 / 4" would do.
  *
  * \param[in] stripe_count     number of stripes requested
- * \param[in] flags            0 or LOV_USES_DEFAULT_STRIPE
+ * \param[in] flags            0 or LOD_USES_DEFAULT_STRIPE
  *
  * \retval                     acceptable stripecount
  */
-static int min_stripe_count(__u32 stripe_count, int flags)
+static int lod_stripe_count_min(__u32 stripe_count, enum lod_uses_hint flags)
 {
-       return (flags & LOV_USES_DEFAULT_STRIPE ?
+       return (flags & LOD_USES_DEFAULT_STRIPE ?
                stripe_count - (stripe_count / 4) : stripe_count);
 }
 
@@ -716,7 +703,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
  * all the internal structures (statfs cache, array of available OSTs sorted
  * with regard to OSS, etc). The number of stripes required is taken from the
  * object (must be prepared by the caller), but can change if the flag
- * LOV_USES_DEFAULT_STRIPE is supplied. The caller should ensure nobody else
+ * LOD_USES_DEFAULT_STRIPE is supplied. The caller should ensure nobody else
  * is trying to create a striping on the object in parallel. All the internal
  * structures (like pools, etc) are protected and no additional locking is
  * required. The function succeeds even if a single stripe is allocated. To save
@@ -727,7 +714,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
  * \param[in] lo               LOD object
  * \param[out] stripe          striping created
  * \param[out] ost_indices     ost indices of striping created
- * \param[in] flags            allocation flags (0 or LOV_USES_DEFAULT_STRIPE)
+ * \param[in] flags            allocation flags (0 or LOD_USES_DEFAULT_STRIPE)
  * \param[in] th               transaction handle
  * \param[in] comp_idx         index of ldo_comp_entries
  *
@@ -737,8 +724,8 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
  */
 static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
                            struct dt_object **stripe, __u32 *ost_indices,
-                           int flags, struct thandle *th, int comp_idx,
-                           __u64 reserve)
+                           enum lod_uses_hint flags, struct thandle *th,
+                           int comp_idx, __u64 reserve)
 {
        struct lod_layout_component *lod_comp;
        struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@ -756,7 +743,7 @@ static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
        LASSERT(lo->ldo_comp_cnt > comp_idx && lo->ldo_comp_entries != NULL);
        lod_comp = &lo->ldo_comp_entries[comp_idx];
        stripe_count = lod_comp->llc_stripe_count;
-       stripe_count_min = min_stripe_count(stripe_count, flags);
+       stripe_count_min = lod_stripe_count_min(stripe_count, flags);
 
        if (lod_comp->llc_pool != NULL)
                pool = lod_find_pool(m, lod_comp->llc_pool);
@@ -843,7 +830,7 @@ repeat_find:
                                               ost_indices, th, &overstriped,
                                               reserve);
 
-               if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
+               if (rc != 0 && OST_TGT(m, ost_idx)->ltd_discon)
                        ost_connecting = 1;
        }
        if ((speed < 2) && (stripe_idx < stripe_count_min)) {
@@ -1020,13 +1007,14 @@ repeat_find:
                if (lod_qos_is_tgt_used(env, mdt_idx, stripe_idx))
                        continue;
 
-               rc = lod_is_tgt_usable(ltd, mdt);
-               if (rc) {
-                       if (mdt->ltd_connecting)
-                               tgt_connecting = 1;
+               if (mdt->ltd_discon) {
+                       tgt_connecting = 1;
                        continue;
                }
 
+               if (lod_statfs_check(ltd, mdt))
+                       continue;
+
                /* try to use another OSP if this one is degraded */
                if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED &&
                    !use_degraded) {
@@ -1048,7 +1036,7 @@ repeat_find:
                        QOS_DEBUG("can't alloc stripe on #%u: %d\n",
                                  mdt->ltd_index, (int) PTR_ERR(dto));
 
-                       if (mdt->ltd_connecting)
+                       if (mdt->ltd_discon)
                                tgt_connecting = 1;
                        continue;
                }
@@ -1217,8 +1205,8 @@ static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
 static int lod_ost_alloc_specific(const struct lu_env *env,
                                  struct lod_object *lo,
                                  struct dt_object **stripe, __u32 *ost_indices,
-                                 int flags, struct thandle *th, int comp_idx,
-                                 __u64 reserve)
+                                 enum lod_uses_hint flags, struct thandle *th,
+                                 int comp_idx, __u64 reserve)
 {
        struct lod_layout_component *lod_comp;
        struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@ -1497,7 +1485,7 @@ out:
  * configuration (# of stripes, offset, pool) is taken from the object and
  * is prepared by the caller.
  *
- * If LOV_USES_DEFAULT_STRIPE is not passed and prepared configuration can't
+ * If LOD_USES_DEFAULT_STRIPE is not passed and prepared configuration can't
  * be met due to too few OSTs, then allocation fails. If the flag is passed
  * fewer than 3/4 of the requested number of stripes can be allocated, then
  * allocation fails.
@@ -1514,7 +1502,7 @@ out:
  * \param[in] lo               LOD object
  * \param[out] stripe          striping created
  * \param[out] ost_indices     ost indices of striping created
- * \param[in] flags            0 or LOV_USES_DEFAULT_STRIPE
+ * \param[in] flags            0 or LOD_USES_DEFAULT_STRIPE
  * \param[in] th               transaction handle
  * \param[in] comp_idx         index of ldo_comp_entries
  *
@@ -1525,8 +1513,8 @@ out:
  */
 static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                             struct dt_object **stripe, __u32 *ost_indices,
-                            int flags, struct thandle *th, int comp_idx,
-                            __u64 reserve)
+                            enum lod_uses_hint flags, struct thandle *th,
+                            int comp_idx, __u64 reserve)
 {
        struct lod_layout_component *lod_comp;
        struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@ -1551,7 +1539,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        LASSERT(lo->ldo_comp_cnt > comp_idx && lo->ldo_comp_entries != NULL);
        lod_comp = &lo->ldo_comp_entries[comp_idx];
        stripe_count = lod_comp->llc_stripe_count;
-       stripe_count_min = min_stripe_count(stripe_count, flags);
+       stripe_count_min = lod_stripe_count_min(stripe_count, flags);
        if (stripe_count_min < 1)
                RETURN(-EINVAL);
 
@@ -1695,11 +1683,13 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
 
                        QOS_DEBUG("stripe=%d to idx=%d\n", nfound, idx);
                        /*
-                        * do not put >1 objects on a single OST, except for
-                        * overstriping
+                        * In case of QOS it makes sense to check components
+                        * only for FLR and if current component doesn't support
+                        * overstriping.
                         */
-                       if ((lod_comp_is_ost_used(env, lo, idx)) &&
-                           !(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING))
+                       if (lo->ldo_mirror_count > 1 &&
+                           !(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+                           && lod_comp_is_ost_used(env, lo, idx))
                                continue;
 
                        if (lod_qos_is_tgt_used(env, idx, nfound)) {
@@ -1740,7 +1730,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                }
        }
 
-       if (unlikely(nfound != stripe_count)) {
+       if (unlikely(nfound < stripe_count_min)) {
                /*
                 * when the decision to use weighted algorithm was made
                 * we had enough appropriate OSPs, but this state can
@@ -1880,8 +1870,7 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                mdt = LTD_TGT(ltd, pool->op_array[i]);
                mdt->ltd_qos.ltq_usable = 0;
 
-               rc = lod_is_tgt_usable(ltd, mdt);
-               if (rc)
+               if (mdt->ltd_discon || lod_statfs_check(ltd, mdt))
                        continue;
 
                if (mdt->ltd_statfs.os_state & OS_STATFS_DEGRADED)
@@ -2014,28 +2003,45 @@ unlock:
  *
  * \retval             the maximum usable stripe count
  */
+__u16 lod_get_stripe_count_plain(struct lod_device *lod, struct lod_object *lo,
+                                __u16 stripe_count, bool overstriping,
+                                enum lod_uses_hint *flags)
+{
+       struct lov_desc *lov_desc = &lod->lod_ost_descs.ltd_lov_desc;
+
+       if (!stripe_count)
+               stripe_count = lov_desc->ld_default_stripe_count;
+
+       /* Overstriping allows more stripes than targets */
+       if (stripe_count > lov_desc->ld_active_tgt_count && !overstriping) {
+               *flags |= LOD_USES_DEFAULT_STRIPE;
+               if (stripe_count == LOV_ALL_STRIPES && lod->lod_max_stripecount)
+                       stripe_count = lod->lod_max_stripecount;
+               else
+                       stripe_count = lov_desc->ld_active_tgt_count;
+       }
+       if (!stripe_count)
+               stripe_count = 1;
+
+       if (overstriping && stripe_count > LOV_MAX_STRIPE_COUNT)
+               stripe_count = LOV_MAX_STRIPE_COUNT;
+
+       return stripe_count;
+}
+
 __u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
-                          int comp_idx, __u16 stripe_count, bool overstriping)
+                          int comp_idx, __u16 stripe_count, bool overstriping,
+                          enum lod_uses_hint *flags)
 {
        __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
        /* max stripe count is based on OSD ea size */
        unsigned int easize = lod->lod_osd_max_easize;
        int i;
+
        ENTRY;
 
-       if (stripe_count == (__u16)(-1) && lod->lod_max_stripecount)
-               stripe_count = lod->lod_max_stripecount;
-       if (!stripe_count)
-               stripe_count =
-                       lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_count;
-       if (!stripe_count)
-               stripe_count = 1;
-       /* Overstriping allows more stripes than targets */
-       if (stripe_count >
-               lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count &&
-           !overstriping)
-               stripe_count =
-                       lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count;
+       stripe_count = lod_get_stripe_count_plain(lod, lo, stripe_count,
+                                                 overstriping, flags);
 
        if (lo->ldo_is_composite) {
                struct lod_layout_component *lod_comp;
@@ -2666,13 +2672,13 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                        int comp_idx, __u64 reserve)
 {
        struct lod_layout_component *lod_comp;
-       struct lod_device      *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
-       int                     stripe_len;
-       int                     flag = LOV_USES_ASSIGNED_STRIPE;
-       int                     i, rc = 0;
+       struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
        struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
        struct dt_object **stripe = NULL;
        __u32 *ost_indices = NULL;
+       enum lod_uses_hint flags = LOD_USES_ASSIGNED_STRIPE;
+       int stripe_len;
+       int i, rc = 0;
        ENTRY;
 
        LASSERT(lo);
@@ -2704,7 +2710,8 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                stripe_len = lod_get_stripe_count(d, lo, comp_idx,
                                                  lod_comp->llc_stripe_count,
                                                  lod_comp->llc_pattern &
-                                                 LOV_PATTERN_OVERSTRIPING);
+                                                 LOV_PATTERN_OVERSTRIPING,
+                                                 &flags);
 
                if (stripe_len == 0)
                        GOTO(out, rc = -ERANGE);
@@ -2740,14 +2747,14 @@ repeat:
                        lod_collect_avoidance(lo, lag, comp_idx);
 
                        rc = lod_ost_alloc_qos(env, lo, stripe, ost_indices,
-                                              flag, th, comp_idx, reserve);
+                                              flags, th, comp_idx, reserve);
                        if (rc == -EAGAIN)
                                rc = lod_ost_alloc_rr(env, lo, stripe,
-                                                     ost_indices, flag, th,
+                                                     ost_indices, flags, th,
                                                      comp_idx, reserve);
                } else {
                        rc = lod_ost_alloc_specific(env, lo, stripe,
-                                                   ost_indices, flag, th,
+                                                   ost_indices, flags, th,
                                                    comp_idx, reserve);
                }
 put_ldts:
index 9d5b758..d310403 100644 (file)
@@ -807,12 +807,12 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
                info->os_reserved_mb_high = d->opd_reserved_mb_high;
        }
 
-       CDEBUG(D_OTHER, "%s: %llu blocks, %llu free, %llu avail, "
-              "%u bsize, %u reserved mb low, %u reserved mb high, "
-              "%llu files, %llu free files\n", d->opd_obd->obd_name,
+       CDEBUG(D_OTHER,
+              "%s: blocks=%llu, bfree=%llu, bavail=%llu, bsize=%u, reserved_mb_low=%u, reserved_mb_high=%u, files=%llu, ffree=%llu, state=%x\n",
+              d->opd_obd->obd_name,
               sfs->os_blocks, sfs->os_bfree, sfs->os_bavail, sfs->os_bsize,
               d->opd_reserved_mb_low, d->opd_reserved_mb_high,
-              sfs->os_files, sfs->os_ffree);
+              sfs->os_files, sfs->os_ffree, sfs->os_state);
 
        if (d->opd_pre == NULL || (info && !info->os_enable_pre))
                RETURN(0);
index e0c4730..1ba3dd5 100644 (file)
@@ -3599,10 +3599,10 @@ test_202() {
        ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | tr '\n' ' '))
        verify_comp_attr stripe-count $tf ${ids[0]} 1
 
-       $LFS setstripe --component-add -E 2M -c -1 $tf
+       $LFS setstripe --component-add -E 2M -c $OSTCOUNT $tf
        ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | tr '\n' ' '))
        verify_comp_attr stripe-count $tf ${ids[0]} 1
-       verify_comp_attr stripe-count $tf ${ids[1]} -1
+       verify_comp_attr stripe-count $tf ${ids[1]} $OSTCOUNT
 
        dd if=/dev/zero of=$tf bs=1M count=2
        ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | tr '\n' ' '))
index 5000d52..5ab45c4 100644 (file)
@@ -824,8 +824,8 @@ test_15() {
        $LFS setstripe -E 1M -S 1M -E 10M -E eof $parent/f1 || error "create f1"
        $LFS setstripe -E 4M -E 20M -E eof $parent/f2 || error "create f2"
        test_mkdir $parent/subdir
-       $LFS setstripe -E 6M -S 1M -c1 -E 30M -c4 -E eof -c -1 $parent/subdir ||
-               error "setstripe to subdir"
+       $LFS setstripe -E 6M -S 1M -c1 -E 30M -c4 -E eof -c $OSTCOUNT \
+               $parent/subdir || error "setstripe to subdir"
        $LFS setstripe -E 8M -E eof $parent/subdir/f3 || error "create f3"
        $LFS setstripe -c 1 $parent/subdir/f4 || error "create f4"