Whamcloud - gitweb
LU-4665 utils: lfs setstripe to specify OSTs
[fs/lustre-release.git] / lustre / lod / lod_qos.c
index 1ac8b28..065ade0 100644 (file)
@@ -1022,6 +1022,107 @@ out:
 }
 
 /**
+ * Allocate a specific striping layout on a user defined set of OSTs.
+ *
+ * Allocates new striping using the OST index range provided by the data from
+ * the lmm_obejcts contained in the lov_user_md passed to this method. Full
+ * OSTs are not considered. The exact order of OSTs requested by the user
+ * is respected as much as possible depending on OST status. The number of
+ * stripes needed and stripe offset are taken from the object. If that number
+ * can not be met, then the function returns a failure and then it's the
+ * caller's responsibility to release the stripes allocated. All the internal
+ * structures are protected, but no concurrent allocation is allowed on the
+ * same objects.
+ *
+ * \param[in] env      execution environment for this thread
+ * \param[in] lo       LOD object
+ * \param[out] stripe  striping created
+ * \param[in] lum      stripe md to specify list of OSTs
+ * \param[in] th       transaction handle
+ *
+ * \retval 0           on success
+ * \retval -ENODEV     OST index does not exist on file system
+ * \retval -EINVAL     requested OST index is invalid
+ * \retval negative    negated errno on error
+ */
+static int lod_alloc_ost_list(const struct lu_env *env,
+                             struct lod_object *lo, struct dt_object **stripe,
+                             struct lov_user_md *lum, struct thandle *th)
+{
+       struct lod_device       *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+       struct obd_statfs       *sfs = &lod_env_info(env)->lti_osfs;
+       struct dt_object        *o;
+       struct lov_user_md_v3   *v3;
+       unsigned int            array_idx = 0;
+       int                     stripe_count = 0;
+       int                     i;
+       int                     rc;
+       ENTRY;
+
+       /* for specific OSTs layout */
+       LASSERT(lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC);
+       lustre_print_user_md(D_OTHER, lum, __func__);
+
+       rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+       if (rc < 0)
+               RETURN(rc);
+
+       v3 = (struct lov_user_md_v3 *)lum;
+       for (i = 0; i < lo->ldo_stripenr; i++) {
+               if (v3->lmm_objects[i].l_ost_idx == lo->ldo_def_stripe_offset) {
+                       array_idx = i;
+                       break;
+               }
+       }
+       if (i == lo->ldo_stripenr) {
+               CDEBUG(D_OTHER,
+                      "%s: start index %d not in the specified list of OSTs\n",
+                      lod2obd(m)->obd_name, lo->ldo_def_stripe_offset);
+               RETURN(-EINVAL);
+       }
+
+       for (i = 0; i < lo->ldo_stripenr;
+            i++, array_idx = (array_idx + 1) % lo->ldo_stripenr) {
+               __u32 ost_idx = v3->lmm_objects[array_idx].l_ost_idx;
+
+               if (!cfs_bitmap_check(m->lod_ost_bitmap, ost_idx)) {
+                       rc = -ENODEV;
+                       break;
+               }
+
+               /*
+                * do not put >1 objects on a single OST
+                */
+               if (lod_qos_is_ost_used(env, ost_idx, stripe_count)) {
+                       rc = -EINVAL;
+                       break;
+               }
+
+               rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+               if (rc < 0) /* this OSP doesn't feel well */
+                       break;
+
+               o = lod_qos_declare_object_on(env, m, ost_idx, th);
+               if (IS_ERR(o)) {
+                       rc = PTR_ERR(o);
+                       CDEBUG(D_OTHER,
+                              "%s: can't declare new object on #%u: %d\n",
+                              lod2obd(m)->obd_name, ost_idx, rc);
+                       break;
+               }
+
+               /*
+                * We've successfuly declared (reserved) an object
+                */
+               lod_qos_ost_in_use(env, stripe_count, ost_idx);
+               stripe[stripe_count] = o;
+               stripe_count++;
+       }
+
+       RETURN(rc);
+}
+
+/**
  * Allocate a striping on a predefined set of OSTs.
  *
  * Allocates new striping starting from OST provided lo->ldo_def_stripe_offset.
@@ -1562,97 +1663,122 @@ static int lod_qos_parse_config(const struct lu_env *env,
        struct lod_device     *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
        struct lov_user_md_v1 *v1 = NULL;
        struct lov_user_md_v3 *v3 = NULL;
-       struct pool_desc      *pool;
+       char                  *pool_name = NULL;
        __u32                  magic;
        int                    rc;
+       unsigned int           size;
        ENTRY;
 
        if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
                RETURN(0);
 
+       v3 = buf->lb_buf;
        v1 = buf->lb_buf;
        magic = v1->lmm_magic;
 
-       if (magic == __swab32(LOV_USER_MAGIC_V1)) {
+       if (unlikely(magic == LOV_MAGIC_V1_DEF || magic == LOV_MAGIC_V3_DEF)) {
+               /* try to use as fully defined striping */
+               rc = lod_use_defined_striping(env, lo, buf);
+               RETURN(rc);
+       }
+
+       switch (magic) {
+       case __swab32(LOV_USER_MAGIC_V1):
                lustre_swab_lov_user_md_v1(v1);
                magic = v1->lmm_magic;
-       } else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
-               v3 = buf->lb_buf;
+               /* fall through */
+       case LOV_USER_MAGIC_V1:
+               size = sizeof(*v1);
+               break;
+
+       case __swab32(LOV_USER_MAGIC_V3):
                lustre_swab_lov_user_md_v3(v3);
                magic = v3->lmm_magic;
-       }
+               /* fall through */
+       case LOV_USER_MAGIC_V3:
+               size = sizeof(*v3);
+               pool_name = v3->lmm_pool_name;
+               break;
 
-       if (unlikely(magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)) {
-               /* try to use as fully defined striping */
-               rc = lod_use_defined_striping(env, lo, buf);
-               RETURN(rc);
+       case __swab32(LOV_USER_MAGIC_SPECIFIC):
+               lustre_swab_lov_user_md_v3(v3);
+               lustre_swab_lov_user_md_objects(v3->lmm_objects,
+                                               v3->lmm_stripe_count);
+               magic = v3->lmm_magic;
+               /* fall through */
+       case LOV_USER_MAGIC_SPECIFIC:
+               if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
+                       v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx;
+               if (v3->lmm_pool_name[0] != '\0')
+                       pool_name = v3->lmm_pool_name;
+               size = lov_user_md_size(v3->lmm_stripe_count,
+                                       LOV_USER_MAGIC_SPECIFIC);
+               break;
+
+       default:
+               CERROR("%s: unrecognized magic %X\n",
+                      lod2obd(d)->obd_name, magic);
+               RETURN(-EINVAL);
        }
 
-       if (unlikely(buf->lb_len < sizeof(*v1))) {
-               CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
+       if (unlikely(buf->lb_len < size)) {
+               CERROR("%s: wrong size: %zd, expect: %u\n",
+                      lod2obd(d)->obd_name, buf->lb_len, size);
                RETURN(-EINVAL);
        }
 
+       lustre_print_user_md(D_OTHER, v1, "parse config");
+
        v1->lmm_magic = magic;
        if (v1->lmm_pattern == 0)
                v1->lmm_pattern = LOV_PATTERN_RAID0;
        if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0) {
-               CERROR("invalid pattern: %x\n", v1->lmm_pattern);
+               CERROR("%s: invalid pattern: %x\n",
+                      lod2obd(d)->obd_name, v1->lmm_pattern);
                RETURN(-EINVAL);
        }
        lo->ldo_pattern = v1->lmm_pattern;
 
-       if (v1->lmm_stripe_size)
+       if (v1->lmm_stripe_size > 0)
                lo->ldo_stripe_size = v1->lmm_stripe_size;
+
        if (lo->ldo_stripe_size & (LOV_MIN_STRIPE_SIZE - 1))
                lo->ldo_stripe_size = LOV_MIN_STRIPE_SIZE;
 
-       if (v1->lmm_stripe_count)
+       if (v1->lmm_stripe_count > 0)
                lo->ldo_stripenr = v1->lmm_stripe_count;
 
-       if ((v1->lmm_stripe_offset >= d->lod_desc.ld_tgt_count) &&
-           (v1->lmm_stripe_offset != (typeof(v1->lmm_stripe_offset))(-1))) {
-               CERROR("invalid offset: %x\n", v1->lmm_stripe_offset);
-               RETURN(-EINVAL);
-       }
        lo->ldo_def_stripe_offset = v1->lmm_stripe_offset;
 
-       CDEBUG(D_OTHER, "lsm: %u size, %u stripes, %u offset\n",
-              v1->lmm_stripe_size, v1->lmm_stripe_count,
-              v1->lmm_stripe_offset);
-
-       if (v1->lmm_magic == LOV_MAGIC_V3) {
-               if (buf->lb_len < sizeof(*v3)) {
-                       CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
-                       RETURN(-EINVAL);
-               }
-
-               v3 = buf->lb_buf;
-               lod_object_set_pool(lo, v3->lmm_pool_name);
+       lod_object_set_pool(lo, NULL);
+       if (pool_name != NULL) {
+               struct pool_desc *pool;
 
                /* In the function below, .hs_keycmp resolves to
                 * pool_hashkey_keycmp() */
                /* coverity[overrun-buffer-val] */
-               pool = lod_find_pool(d, v3->lmm_pool_name);
+               pool = lod_find_pool(d, pool_name);
                if (pool != NULL) {
-                       if (lo->ldo_def_stripe_offset !=
-                           (typeof(v1->lmm_stripe_offset))(-1)) {
-                               rc = lo->ldo_def_stripe_offset;
-                               rc = lod_check_index_in_pool(rc, pool);
+                       if (lo->ldo_def_stripe_offset != LOV_OFFSET_DEFAULT) {
+                               rc = lod_check_index_in_pool(
+                                              lo->ldo_def_stripe_offset, pool);
                                if (rc < 0) {
                                        lod_pool_putref(pool);
-                                       CERROR("invalid offset\n");
+                                       CERROR("%s: invalid offset, %u\n",
+                                              lod2obd(d)->obd_name,
+                                              lo->ldo_def_stripe_offset);
                                        RETURN(-EINVAL);
                                }
                        }
 
                        if (lo->ldo_stripenr > pool_tgt_count(pool))
-                               lo->ldo_stripenr= pool_tgt_count(pool);
+                               lo->ldo_stripenr = pool_tgt_count(pool);
 
                        lod_pool_putref(pool);
                }
-       } else
-               lod_object_set_pool(lo, NULL);
+
+               lod_object_set_pool(lo, pool_name);
+       }
 
        /* fixup for released file */
        if (lo->ldo_pattern & LOV_PATTERN_F_RELEASED) {
@@ -1718,6 +1844,8 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                GOTO(out, rc = 0);
 
        if (likely(lo->ldo_stripe == NULL)) {
+               struct lov_user_md *lum = NULL;
+
                /*
                 * no striping has been created so far
                 */
@@ -1728,7 +1856,7 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                 */
                lod_qos_statfs_update(env, d);
                lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
-                               lo->ldo_stripenr);
+                                                    lo->ldo_stripenr);
 
                stripe_len = lo->ldo_stripenr;
                OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_len);
@@ -1739,7 +1867,13 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                /* XXX: support for non-0 files w/o objects */
                CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
                                d->lod_desc.ld_tgt_count, stripe_len);
-               if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
+
+               if (buf != NULL && buf->lb_buf != NULL)
+                       lum = buf->lb_buf;
+
+               if (lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+                       rc = lod_alloc_ost_list(env, lo, stripe, lum, th);
+               } else if (lo->ldo_def_stripe_offset == LOV_OFFSET_DEFAULT) {
                        rc = lod_alloc_qos(env, lo, stripe, flag, th);
                        if (rc == -EAGAIN)
                                rc = lod_alloc_rr(env, lo, stripe, flag, th);