goto next;
}
- len = snprintf(name, sizeof(name),
- DFID":", PFID(&ent->lde_fid));
+ len = scnprintf(name, sizeof(name),
+ DFID":", PFID(&ent->lde_fid));
/* The ent->lde_name is composed of ${FID}:${index} */
if (ent->lde_namelen < len + 1 ||
memcmp(ent->lde_name, name, len) != 0) {
RETURN(rc = rc > 0 ? -EINVAL : rc);
if (buf->lb_buf == NULL || buf->lb_len == 0) {
- CLASSERT(sizeof(*lmv1) <= sizeof(info->lti_key));
+ BUILD_BUG_ON(sizeof(*lmv1) > sizeof(info->lti_key));
/* lti_buf is large enough for *lmv1 or a short
* (<= sizeof(struct lmv_mds_md_v1)) foreign LMV
RETURN(rc);
}
-static int lod_prep_md_striped_create(const struct lu_env *env,
- struct dt_object *dt,
- struct lu_attr *attr,
- const struct lmv_user_md_v1 *lum,
- struct dt_object_format *dof,
- struct thandle *th)
+/**
+ * Allocate a striping on a predefined set of MDTs.
+ *
+ * Allocates new striping using the MDT index range provided by the data from
+ * the lum_obejcts contained in the lmv_user_md passed to this method if
+ * \a is_specific is true; or allocates new layout starting from MDT index in
+ * lo->ldo_dir_stripe_offset. The exact order of MDTs is not important and
+ * varies depending on MDT status. The number of stripes needed and stripe
+ * offset are taken from the object. If that number cannot be met, then the
+ * function returns an error and then it's the caller's responsibility to
+ * release the stripes allocated. All the internal structures are protected,
+ * but no concurrent allocation is allowed on the same objects.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripes striping created
+ * \param[out] mdt_indices MDT indices of striping created
+ * \param[in] is_specific true if the MDTs are provided by lum; false if
+ * only the starting MDT index is provided
+ *
+ * \retval positive stripes allocated, including the first stripe allocated
+ * outside
+ * \retval negative errno on failure
+ */
+static int lod_mdt_alloc_specific(const struct lu_env *env,
+ struct lod_object *lo,
+ struct dt_object **stripes,
+ __u32 *mdt_indices, bool is_specific)
{
struct lod_thread_info *info = lod_env_info(env);
- struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lod_object *lo = lod_dt_obj(dt);
- struct dt_object **stripe;
- __u32 stripe_count;
- int *idx_array;
- __u32 master_index;
- int rc = 0;
- __u32 i;
- __u32 j;
- bool is_specific = false;
- ENTRY;
-
- /* The lum has been verifed in lod_verify_md_striping */
- LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
- le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC);
-
- stripe_count = lo->ldo_dir_stripe_count;
-
- OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
- if (idx_array == NULL)
- RETURN(-ENOMEM);
-
- OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_count);
- if (stripe == NULL)
- GOTO(out_free, rc = -ENOMEM);
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lu_tgt_desc *tgt = NULL;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct dt_device *tgt_dt = NULL;
+ struct lu_fid fid = { 0 };
+ struct dt_object *dto;
+ u32 master_index;
+ u32 stripe_count = lo->ldo_dir_stripe_count;
+ int stripe_idx = 1;
+ int j;
+ int idx;
+ int rc;
- /* Start index must be the master MDT */
master_index = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id;
- idx_array[0] = master_index;
- if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
- is_specific = true;
- for (i = 1; i < stripe_count; i++)
- idx_array[i] = le32_to_cpu(lum->lum_objects[i].lum_mds);
- }
-
- for (i = 0; i < stripe_count; i++) {
- struct lod_tgt_desc *tgt = NULL;
- struct dt_object *dto;
- struct lu_fid fid = { 0 };
- int idx;
- struct lu_object_conf conf = { 0 };
- struct dt_device *tgt_dt = NULL;
+ if (stripe_count > 1)
+ /* Set the start index for the 2nd stripe allocation */
+ mdt_indices[1] = (mdt_indices[0] + 1) %
+ (lod->lod_remote_mdt_count + 1);
+ for (; stripe_idx < stripe_count; stripe_idx++) {
/* Try to find next avaible target */
- idx = idx_array[i];
+ idx = mdt_indices[stripe_idx];
for (j = 0; j < lod->lod_remote_mdt_count;
j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
bool already_allocated = false;
__u32 k;
CDEBUG(D_INFO, "try idx %d, mdt cnt %u, allocated %u\n",
- idx, lod->lod_remote_mdt_count + 1, i);
+ idx, lod->lod_remote_mdt_count + 1, stripe_idx);
if (likely(!is_specific &&
!OBD_FAIL_CHECK(OBD_FAIL_LARGE_STRIPE))) {
/* check whether the idx already exists
* in current allocated array */
- for (k = 0; k < i; k++) {
- if (idx_array[k] == idx) {
+ for (k = 0; k < stripe_idx; k++) {
+ if (mdt_indices[k] == idx) {
already_allocated = true;
break;
}
rc = obd_fid_alloc(env, lod->lod_child_exp,
&fid, NULL);
if (rc < 0)
- GOTO(out_put, rc);
+ continue;
tgt_dt = lod->lod_child;
break;
}
/* check the status of the OSP */
tgt = LTD_TGT(ltd, idx);
- if (tgt == NULL)
+ if (!tgt)
continue;
tgt_dt = tgt->ltd_tgt;
rc = dt_statfs(env, tgt_dt, &info->lti_osfs);
- if (rc) {
+ if (rc)
/* this OSP doesn't feel well */
- rc = 0;
continue;
- }
rc = obd_fid_alloc(env, tgt->ltd_exp, &fid, NULL);
- if (rc < 0) {
- rc = 0;
+ if (rc < 0)
continue;
- }
break;
}
/* Can not allocate more stripes */
if (j == lod->lod_remote_mdt_count) {
CDEBUG(D_INFO, "%s: require stripes %u only get %d\n",
- lod2obd(lod)->obd_name, stripe_count, i);
+ lod2obd(lod)->obd_name, stripe_count,
+ stripe_idx);
break;
}
CDEBUG(D_INFO, "Get idx %d, for stripe %d "DFID"\n",
- idx, i, PFID(&fid));
- idx_array[i] = idx;
+ idx, stripe_idx, PFID(&fid));
+ mdt_indices[stripe_idx] = idx;
/* Set the start index for next stripe allocation */
- if (!is_specific && i < stripe_count - 1) {
+ if (!is_specific && stripe_idx < stripe_count - 1) {
/*
* for large dir test, put all other slaves on one
* remote MDT, otherwise we may save too many local
*/
if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_LARGE_STRIPE)))
idx = master_index;
- idx_array[i + 1] = (idx + 1) %
+ mdt_indices[stripe_idx + 1] = (idx + 1) %
(lod->lod_remote_mdt_count + 1);
}
/* tgt_dt and fid must be ready after search avaible OSP
LASSERT(fid_is_sane(&fid));
/* fail a remote stripe FID allocation */
- if (i && OBD_FAIL_CHECK(OBD_FAIL_MDS_STRIPE_FID))
+ if (stripe_idx && OBD_FAIL_CHECK(OBD_FAIL_MDS_STRIPE_FID))
continue;
- conf.loc_flags = LOC_F_NEW;
dto = dt_locate_at(env, tgt_dt, &fid,
- dt->do_lu.lo_dev->ld_site->ls_top_dev,
- &conf);
- if (IS_ERR(dto))
- GOTO(out_put, rc = PTR_ERR(dto));
- stripe[i] = dto;
+ lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
+ &conf);
+ if (IS_ERR(dto)) {
+ rc = PTR_ERR(dto);
+ goto error;
+ }
+
+ stripes[stripe_idx] = dto;
+ }
+
+ return stripe_idx;
+
+error:
+ for (j = 1; j < stripe_idx; j++) {
+ LASSERT(stripes[j] != NULL);
+ dt_object_put(env, stripes[j]);
+ stripes[j] = NULL;
+ }
+ return rc;
+}
+
+static int lod_prep_md_striped_create(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lu_attr *attr,
+ const struct lmv_user_md_v1 *lum,
+ struct dt_object_format *dof,
+ struct thandle *th)
+{
+ struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct dt_object **stripes;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct lu_fid fid = { 0 };
+ __u32 stripe_count;
+ int i;
+ int rc = 0;
+
+ ENTRY;
+
+ /* The lum has been verifed in lod_verify_md_striping */
+ LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC);
+
+ stripe_count = lo->ldo_dir_stripe_count;
+
+ OBD_ALLOC(stripes, sizeof(stripes[0]) * stripe_count);
+ if (!stripes)
+ RETURN(-ENOMEM);
+
+ /* Allocate the first stripe locally */
+ rc = obd_fid_alloc(env, lod->lod_child_exp, &fid, NULL);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ stripes[0] = dt_locate_at(env, lod->lod_child, &fid,
+ dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
+ if (IS_ERR(stripes[0]))
+ GOTO(out, rc = PTR_ERR(stripes[0]));
+
+ if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
+ lod_qos_statfs_update(env, lod, &lod->lod_mdt_descs);
+ rc = lod_mdt_alloc_qos(env, lo, stripes);
+ if (rc == -EAGAIN)
+ rc = lod_mdt_alloc_rr(env, lo, stripes);
+ } else {
+ int *idx_array;
+ bool is_specific = false;
+
+ OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
+ if (!idx_array)
+ GOTO(out, rc = -ENOMEM);
+
+ if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
+ is_specific = true;
+ for (i = 0; i < stripe_count; i++)
+ idx_array[i] =
+ le32_to_cpu(lum->lum_objects[i].lum_mds);
+ }
+
+ /* stripe 0 is local */
+ idx_array[0] =
+ lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id;
+ rc = lod_mdt_alloc_specific(env, lo, stripes, idx_array,
+ is_specific);
+ OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
}
+ if (rc < 0)
+ GOTO(out, rc);
+
+ LASSERT(rc > 0);
+
lo->ldo_dir_striped = 1;
- lo->ldo_stripe = stripe;
- lo->ldo_dir_stripe_count = i;
+ lo->ldo_stripe = stripes;
+ lo->ldo_dir_stripe_count = rc;
lo->ldo_dir_stripes_allocated = stripe_count;
smp_mb();
lo->ldo_dir_stripe_loaded = 1;
- if (lo->ldo_dir_stripe_count == 0)
- GOTO(out_put, rc = -ENOSPC);
-
rc = lod_dir_declare_create_stripes(env, dt, attr, dof, th);
- if (rc != 0)
- GOTO(out_put, rc);
+ if (rc < 0)
+ lod_striping_free(env, lo);
-out_put:
- if (rc < 0) {
- for (i = 0; i < stripe_count; i++)
- if (stripe[i] != NULL)
- dt_object_put(env, stripe[i]);
- OBD_FREE(stripe, sizeof(stripe[0]) * stripe_count);
- lo->ldo_dir_stripe_count = 0;
- lo->ldo_dir_stripes_allocated = 0;
- lo->ldo_stripe = NULL;
- }
+ RETURN(rc);
-out_free:
- OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
+out:
+ LASSERT(rc < 0);
+ if (!IS_ERR_OR_NULL(stripes[0]))
+ dt_object_put(env, stripes[0]);
+ for (i = 1; i < stripe_count; i++)
+ LASSERT(!stripes[i]);
+ OBD_FREE(stripes, sizeof(stripes[0]) * stripe_count);
- RETURN(rc);
+ return rc;
}
/**
* the '$field' can only be 'flags' now. The xattr value is binary
* lov_comp_md_v1 which contains the component ID(s) and the value of
* the field to be modified.
+ * Please update allowed_lustre_lov macro if $field groks more values
+ * in the future.
*
* \param[in] env execution environment
* \param[in] dt dt_object to be modified
bool changed = false;
ENTRY;
+ /* Please update allowed_lustre_lov macro if op
+ * groks more values in the future
+ */
if (strcmp(op, "set.flags") != 0) {
CDEBUG(D_LAYOUT, "%s: operation (%s) not supported.\n",
lod2obd(d)->obd_name, op);
lcme->lcme_id = cpu_to_le32(id);
}
- id = MAX(le32_to_cpu(lcme->lcme_id), id);
+ id = max(le32_to_cpu(lcme->lcme_id), id);
}
mirror_id = mirror_id_of(id) + 1;
strcmp(name, XATTR_LUSTRE_LOV) == 0);
rc = lod_declare_layout_split(env, dt, buf, th);
} else if (S_ISREG(mode) &&
- strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 &&
- strncmp(name, XATTR_LUSTRE_LOV,
- strlen(XATTR_LUSTRE_LOV)) == 0) {
+ strlen(name) >= sizeof(XATTR_LUSTRE_LOV) + 3 &&
+ allowed_lustre_lov(name)) {
/*
* this is a request to modify object's striping.
* add/set/del component(s).
rc = lod_declare_modify_layout(env, dt, name, buf, th);
} else if (strncmp(name, XATTR_NAME_LMV, strlen(XATTR_NAME_LMV)) == 0 &&
- strlen(name) > strlen(XATTR_NAME_LMV) + 1) {
- const char *op = name + strlen(XATTR_NAME_LMV) + 1;
+ strlen(name) > strlen(XATTR_NAME_LMV)) {
+ const char *op = name + strlen(XATTR_NAME_LMV);
rc = -ENOTSUPP;
- if (strcmp(op, "add") == 0)
+ if (strcmp(op, ".add") == 0)
rc = lod_dir_declare_layout_add(env, dt, buf, th);
- else if (strcmp(op, "del") == 0)
+ else if (strcmp(op, ".del") == 0)
rc = lod_dir_declare_layout_delete(env, dt, buf, th);
- else if (strcmp(op, "set") == 0)
+ else if (strcmp(op, ".set") == 0)
rc = lod_sub_declare_xattr_set(env, next, buf,
XATTR_NAME_LMV, fl, th);
if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
le32_to_cpu(lum->lum_stripe_offset)) &&
- le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC &&
- !(le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_SPACE)) {
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
rc = lod_xattr_del_internal(env, dt, name, th);
if (rc == -ENODATA)
rc = 0;
lu_object_put(env, &obj->do_lu);
lod_comp->llc_stripe[j] = NULL;
}
- OBD_FREE(lod_comp->llc_stripe, sizeof(struct dt_object *) *
+ OBD_FREE(lod_comp->llc_stripe, sizeof(*lod_comp->llc_stripe) *
lod_comp->llc_stripes_allocated);
lod_comp->llc_stripe = NULL;
OBD_FREE(lod_comp->llc_ost_indices,
RETURN(rc);
} else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) &&
strncmp(name, XATTR_NAME_LMV, strlen(XATTR_NAME_LMV)) == 0 &&
- strlen(name) > strlen(XATTR_NAME_LMV) + 1) {
- const char *op = name + strlen(XATTR_NAME_LMV) + 1;
+ strlen(name) > strlen(XATTR_NAME_LMV)) {
+ const char *op = name + strlen(XATTR_NAME_LMV);
rc = -ENOTSUPP;
/*
* XATTR_NAME_LMV".add" is never called, but only declared,
* because lod_xattr_set_lmv() will do the addition.
*/
- if (strcmp(op, "del") == 0)
+ if (strcmp(op, ".del") == 0)
rc = lod_dir_layout_delete(env, dt, buf, th);
- else if (strcmp(op, "set") == 0)
+ else if (strcmp(op, ".set") == 0)
rc = lod_sub_xattr_set(env, next, buf, XATTR_NAME_LMV,
fl, th);
RETURN(rc);
} else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) &&
- strcmp(name, XATTR_NAME_LOV) == 0) {
+ strcmp(name, XATTR_NAME_LOV) == 0) {
struct lod_default_striping *lds = lod_lds_buf_get(env);
struct lov_user_md_v1 *v1 = buf->lb_buf;
char pool[LOV_MAXPOOLNAME + 1];
th);
RETURN(rc);
} else if (S_ISREG(dt->do_lu.lo_header->loh_attr) &&
- (!strcmp(name, XATTR_NAME_LOV) ||
- !strncmp(name, XATTR_LUSTRE_LOV,
- strlen(XATTR_LUSTRE_LOV)))) {
+ (strcmp(name, XATTR_NAME_LOV) == 0 ||
+ strcmp(name, XATTR_LUSTRE_LOV) == 0 ||
+ allowed_lustre_lov(name))) {
/* in case of lov EA swap, just set it
* if not, it is a replay so check striping match what we
* already have during req replay, declare_xattr_set()
lo->ldo_dir_stripe_offset =
lds->lds_dir_def_stripe_offset;
if (lo->ldo_dir_hash_type == 0)
- lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type &
- ~LMV_HASH_FLAG_SPACE;
+ lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type;
CDEBUG(D_LAYOUT, "striping from default dir: count:%hu, "
"offset:%u, hash_type:%u\n",
return -EINVAL;
lmu = info->lti_ea_store;
- return !!(le32_to_cpu(lmu->lum_hash_type) & LMV_HASH_FLAG_SPACE);
+ return le32_to_cpu(lmu->lum_stripe_offset) == LMV_OFFSET_DEFAULT;
}
/**
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT))
GOTO(out, rc = -EREMOTE);
- if (lo->ldo_dir_stripe_offset == -1) {
+ if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
+ struct lod_default_striping *lds;
+
+ lds = lo->ldo_def_striping;
/*
- * child and parent should be in the same MDT,
- * but if parent has plain layout, it's allowed.
+ * child and parent should be on the same MDT,
+ * but if parent has default LMV, and the start
+ * MDT offset is -1, it's allowed. This check
+ * is not necessary after 2.12.22 because client
+ * follows this already, but old client may not.
*/
if (hint->dah_parent &&
- dt_object_remote(hint->dah_parent)) {
- rc = dt_object_qos_mkdir(env,
- lo->ldo_obj.do_lu.lo_dev,
- hint->dah_parent);
- if (rc <= 0)
- GOTO(out, rc ? rc : -EREMOTE);
- }
+ dt_object_remote(hint->dah_parent) && lds &&
+ lds->lds_dir_def_stripe_offset !=
+ LMV_OFFSET_DEFAULT)
+ GOTO(out, rc = -EREMOTE);
} else if (lo->ldo_dir_stripe_offset !=
ss->ss_node_id) {
struct lod_device *lod;
* This algo can be revised later after knowing the topology of
* cluster.
*/
- lod_qos_statfs_update(env, lod);
+ lod_qos_statfs_update(env, lod, &lod->lod_ost_descs);
for (i = 0; i < lo->ldo_mirror_count; i++) {
bool ost_avail = true;
int index = (i + seq) % lo->ldo_mirror_count;