#define D_QOS D_OTHER
-#if 0
-#define QOS_DEBUG(fmt, ...) CDEBUG(D_OTHER, fmt, ## __VA_ARGS__)
-#define QOS_CONSOLE(fmt, ...) LCONSOLE(D_OTHER, fmt, ## __VA_ARGS__)
-#else
-#define QOS_DEBUG(fmt, ...)
-#define QOS_CONSOLE(fmt, ...)
-#endif
+#define QOS_DEBUG(fmt, ...) CDEBUG(D_QOS, fmt, ## __VA_ARGS__)
+#define QOS_CONSOLE(fmt, ...) LCONSOLE(D_QOS, fmt, ## __VA_ARGS__)
#define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \
OST_TGT(lod,i)->ltd_statfs.os_bsize)
{
struct lod_tgt_desc *ost;
int rc;
+ ENTRY;
LASSERT(d);
ost = OST_TGT(d,index);
struct obd_device *obd = lod2obd(lod);
struct ost_pool *osts = &(lod->lod_pool_info);
unsigned int i;
- int idx, rc = 0;
+ int idx;
__u64 max_age, avail;
ENTRY;
down_write(&lod->lod_qos.lq_rw_sem);
if (cfs_time_beforeq_64(max_age, obd->obd_osfs_age))
- GOTO(out, rc = 0);
+ goto out;
for (i = 0; i < osts->op_count; i++) {
idx = osts->op_array[i];
avail = OST_TGT(lod,idx)->ltd_statfs.os_bavail;
- rc = lod_statfs_and_check(env, lod, idx,
- &OST_TGT(lod,idx)->ltd_statfs);
- if (rc)
- break;
+ if (lod_statfs_and_check(env, lod, idx,
+ &OST_TGT(lod, idx)->ltd_statfs))
+ continue;
if (OST_TGT(lod,idx)->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
lod->lod_qos.lq_dirty = 1;
}
/**
+ * Allocate a specific striping layout on a user defined set of OSTs.
+ *
+ * Allocates new striping using the OST index range provided by the data from
+ * the lmm_obejcts contained in the lov_user_md passed to this method. Full
+ * OSTs are not considered. The exact order of OSTs requested by the user
+ * is respected as much as possible depending on OST status. The number of
+ * stripes needed and stripe offset are taken from the object. If that number
+ * can not be met, then the function returns a failure and then it's the
+ * caller's responsibility to release the stripes allocated. All the internal
+ * structures are protected, but no concurrent allocation is allowed on the
+ * same objects.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripe striping created
+ * \param[in] lum stripe md to specify list of OSTs
+ * \param[in] th transaction handle
+ *
+ * \retval 0 on success
+ * \retval -ENODEV OST index does not exist on file system
+ * \retval -EINVAL requested OST index is invalid
+ * \retval negative negated errno on error
+ */
+static int lod_alloc_ost_list(const struct lu_env *env,
+ struct lod_object *lo, struct dt_object **stripe,
+ struct lov_user_md *lum, struct thandle *th)
+{
+ struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
+ struct dt_object *o;
+ struct lov_user_md_v3 *v3;
+ unsigned int array_idx = 0;
+ int stripe_count = 0;
+ int i;
+ int rc;
+ ENTRY;
+
+ /* for specific OSTs layout */
+ LASSERT(lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC);
+ lustre_print_user_md(D_OTHER, lum, __func__);
+
+ rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+ if (rc < 0)
+ RETURN(rc);
+
+ v3 = (struct lov_user_md_v3 *)lum;
+ for (i = 0; i < lo->ldo_stripenr; i++) {
+ if (v3->lmm_objects[i].l_ost_idx == lo->ldo_def_stripe_offset) {
+ array_idx = i;
+ break;
+ }
+ }
+ if (i == lo->ldo_stripenr) {
+ CDEBUG(D_OTHER,
+ "%s: start index %d not in the specified list of OSTs\n",
+ lod2obd(m)->obd_name, lo->ldo_def_stripe_offset);
+ RETURN(-EINVAL);
+ }
+
+ for (i = 0; i < lo->ldo_stripenr;
+ i++, array_idx = (array_idx + 1) % lo->ldo_stripenr) {
+ __u32 ost_idx = v3->lmm_objects[array_idx].l_ost_idx;
+
+ if (!cfs_bitmap_check(m->lod_ost_bitmap, ost_idx)) {
+ rc = -ENODEV;
+ break;
+ }
+
+ /*
+ * do not put >1 objects on a single OST
+ */
+ if (lod_qos_is_ost_used(env, ost_idx, stripe_count)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+ if (rc < 0) /* this OSP doesn't feel well */
+ break;
+
+ o = lod_qos_declare_object_on(env, m, ost_idx, th);
+ if (IS_ERR(o)) {
+ rc = PTR_ERR(o);
+ CDEBUG(D_OTHER,
+ "%s: can't declare new object on #%u: %d\n",
+ lod2obd(m)->obd_name, ost_idx, rc);
+ break;
+ }
+
+ /*
+ * We've successfuly declared (reserved) an object
+ */
+ lod_qos_ost_in_use(env, stripe_count, ost_idx);
+ stripe[stripe_count] = o;
+ stripe_count++;
+ }
+
+ RETURN(rc);
+}
+
+/**
* Allocate a striping on a predefined set of OSTs.
*
* Allocates new striping starting from OST provided lo->ldo_def_stripe_offset.
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
struct lov_user_md_v1 *v1 = NULL;
struct lov_user_md_v3 *v3 = NULL;
- struct pool_desc *pool;
+ char *pool_name = NULL;
__u32 magic;
int rc;
+ unsigned int size;
ENTRY;
if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
RETURN(0);
+ v3 = buf->lb_buf;
v1 = buf->lb_buf;
magic = v1->lmm_magic;
- if (magic == __swab32(LOV_USER_MAGIC_V1)) {
+ if (unlikely(magic == LOV_MAGIC_V1_DEF || magic == LOV_MAGIC_V3_DEF)) {
+ /* try to use as fully defined striping */
+ rc = lod_use_defined_striping(env, lo, buf);
+ RETURN(rc);
+ }
+
+ switch (magic) {
+ case __swab32(LOV_USER_MAGIC_V1):
lustre_swab_lov_user_md_v1(v1);
magic = v1->lmm_magic;
- } else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
- v3 = buf->lb_buf;
+ /* fall through */
+ case LOV_USER_MAGIC_V1:
+ size = sizeof(*v1);
+ break;
+
+ case __swab32(LOV_USER_MAGIC_V3):
lustre_swab_lov_user_md_v3(v3);
magic = v3->lmm_magic;
- }
+ /* fall through */
+ case LOV_USER_MAGIC_V3:
+ size = sizeof(*v3);
+ pool_name = v3->lmm_pool_name;
+ break;
- if (unlikely(magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)) {
- /* try to use as fully defined striping */
- rc = lod_use_defined_striping(env, lo, buf);
- RETURN(rc);
+ case __swab32(LOV_USER_MAGIC_SPECIFIC):
+ lustre_swab_lov_user_md_v3(v3);
+ lustre_swab_lov_user_md_objects(v3->lmm_objects,
+ v3->lmm_stripe_count);
+ magic = v3->lmm_magic;
+ /* fall through */
+ case LOV_USER_MAGIC_SPECIFIC:
+ if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
+ v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx;
+ if (v3->lmm_pool_name[0] != '\0')
+ pool_name = v3->lmm_pool_name;
+ size = lov_user_md_size(v3->lmm_stripe_count,
+ LOV_USER_MAGIC_SPECIFIC);
+ break;
+
+ default:
+ CERROR("%s: unrecognized magic %X\n",
+ lod2obd(d)->obd_name, magic);
+ RETURN(-EINVAL);
}
- if (unlikely(buf->lb_len < sizeof(*v1))) {
- CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
+ if (unlikely(buf->lb_len < size)) {
+ CERROR("%s: wrong size: %zd, expect: %u\n",
+ lod2obd(d)->obd_name, buf->lb_len, size);
RETURN(-EINVAL);
}
+ lustre_print_user_md(D_OTHER, v1, "parse config");
+
v1->lmm_magic = magic;
if (v1->lmm_pattern == 0)
v1->lmm_pattern = LOV_PATTERN_RAID0;
if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0) {
- CERROR("invalid pattern: %x\n", v1->lmm_pattern);
+ CERROR("%s: invalid pattern: %x\n",
+ lod2obd(d)->obd_name, v1->lmm_pattern);
RETURN(-EINVAL);
}
lo->ldo_pattern = v1->lmm_pattern;
- if (v1->lmm_stripe_size)
+ if (v1->lmm_stripe_size > 0)
lo->ldo_stripe_size = v1->lmm_stripe_size;
+
if (lo->ldo_stripe_size & (LOV_MIN_STRIPE_SIZE - 1))
lo->ldo_stripe_size = LOV_MIN_STRIPE_SIZE;
- if (v1->lmm_stripe_count)
+ if (v1->lmm_stripe_count > 0)
lo->ldo_stripenr = v1->lmm_stripe_count;
- if ((v1->lmm_stripe_offset >= d->lod_desc.ld_tgt_count) &&
- (v1->lmm_stripe_offset != (typeof(v1->lmm_stripe_offset))(-1))) {
- CERROR("invalid offset: %x\n", v1->lmm_stripe_offset);
- RETURN(-EINVAL);
- }
lo->ldo_def_stripe_offset = v1->lmm_stripe_offset;
- CDEBUG(D_OTHER, "lsm: %u size, %u stripes, %u offset\n",
- v1->lmm_stripe_size, v1->lmm_stripe_count,
- v1->lmm_stripe_offset);
-
- if (v1->lmm_magic == LOV_MAGIC_V3) {
- if (buf->lb_len < sizeof(*v3)) {
- CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
- RETURN(-EINVAL);
- }
-
- v3 = buf->lb_buf;
- lod_object_set_pool(lo, v3->lmm_pool_name);
+ lod_object_set_pool(lo, NULL);
+ if (pool_name != NULL) {
+ struct pool_desc *pool;
/* In the function below, .hs_keycmp resolves to
* pool_hashkey_keycmp() */
/* coverity[overrun-buffer-val] */
- pool = lod_find_pool(d, v3->lmm_pool_name);
+ pool = lod_find_pool(d, pool_name);
if (pool != NULL) {
- if (lo->ldo_def_stripe_offset !=
- (typeof(v1->lmm_stripe_offset))(-1)) {
- rc = lo->ldo_def_stripe_offset;
- rc = lod_check_index_in_pool(rc, pool);
+ if (lo->ldo_def_stripe_offset != LOV_OFFSET_DEFAULT) {
+ rc = lod_check_index_in_pool(
+ lo->ldo_def_stripe_offset, pool);
if (rc < 0) {
lod_pool_putref(pool);
- CERROR("invalid offset\n");
+ CERROR("%s: invalid offset, %u\n",
+ lod2obd(d)->obd_name,
+ lo->ldo_def_stripe_offset);
RETURN(-EINVAL);
}
}
if (lo->ldo_stripenr > pool_tgt_count(pool))
- lo->ldo_stripenr= pool_tgt_count(pool);
+ lo->ldo_stripenr = pool_tgt_count(pool);
lod_pool_putref(pool);
}
- } else
- lod_object_set_pool(lo, NULL);
+
+ lod_object_set_pool(lo, pool_name);
+ }
/* fixup for released file */
if (lo->ldo_pattern & LOV_PATTERN_F_RELEASED) {
GOTO(out, rc = 0);
if (likely(lo->ldo_stripe == NULL)) {
+ struct lov_user_md *lum = NULL;
+
/*
* no striping has been created so far
*/
*/
lod_qos_statfs_update(env, d);
lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
- lo->ldo_stripenr);
+ lo->ldo_stripenr);
stripe_len = lo->ldo_stripenr;
OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_len);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
d->lod_desc.ld_tgt_count, stripe_len);
- if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
+
+ if (buf != NULL && buf->lb_buf != NULL)
+ lum = buf->lb_buf;
+
+ if (lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+ rc = lod_alloc_ost_list(env, lo, stripe, lum, th);
+ } else if (lo->ldo_def_stripe_offset == LOV_OFFSET_DEFAULT) {
rc = lod_alloc_qos(env, lo, stripe, flag, th);
if (rc == -EAGAIN)
rc = lod_alloc_rr(env, lo, stripe, flag, th);