reasonably uniform, but not cryptographically strong,
hashing of the filename. (default)
.TP
-.B space
-This can only be set on plain directory default striping.
-Newly created sub-directories will be distributed on all
-MDTs by MDT space usage. Note that this is suggested to
-be set on lustre ROOT, so that all sub-directories under
-lustre ROOT are distributed among all MDTs.
-.TP
.B all_char
Sum of ASCII characters modulo number of MDTs. This
provides weak hashing of the filename, and is suitable
lprocfs_pinger_recov_seq_write(struct file *file, const char __user *buffer,
size_t count, loff_t *off);
+int lu_str_to_s64(char *buffer, unsigned long count, __s64 *val, char defunit);
extern int lprocfs_str_with_units_to_s64(const char __user *buffer,
unsigned long count, __s64 *val,
char defunit);
struct lmv_oinfo lsm_md_oinfo[0];
};
-static inline bool lmv_is_known_hash_type(__u32 type)
-{
- return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
- (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
-}
-
static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
{
return lsm && lsm->lsm_md_magic == LMV_MAGIC;
return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
}
-/* NB, this is checking directory default LMV */
-static inline bool lmv_dir_qos_mkdir(const struct lmv_stripe_md *lsm)
-{
- return lsm && (lsm->lsm_md_hash_type & LMV_HASH_FLAG_SPACE);
-}
-
static inline bool
lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
{
#define LOV_PATTERN_DEFAULT 0xffffffff
#define LOV_OFFSET_DEFAULT ((__u16)-1)
+#define LMV_OFFSET_DEFAULT ((__u32)-1)
static inline bool lov_pattern_supported(__u32 pattern)
{
* might be interpreted differently with different flags. */
#define LMV_HASH_TYPE_MASK 0x0000ffff
-/* once this is set on a plain directory default layout, newly created
- * subdirectories will be distributed on all MDTs by space usage.
- */
-#define LMV_HASH_FLAG_SPACE 0x08000000
+static inline bool lmv_is_known_hash_type(__u32 type)
+{
+ return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
+ (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
+}
/* The striped directory has ever lost its master LMV EA, then LFSCK
* re-generated it. This flag is used to indicate such case. It is an
/*
* open(O_CREAT | O_EXCL) needs to check
* existing name, which should be done on both
- * old and new layout, to avoid creating new
- * file under old layout, check old layout on
+ * old and new layout, check old layout on
* client side.
*/
- tgt = lmv_locate_tgt(lmv, op_data);
- if (IS_ERR(tgt))
- RETURN(PTR_ERR(tgt));
-
- rc = md_getattr_name(tgt->ltd_exp, op_data,
- reqp);
- if (!rc) {
- ptlrpc_req_finished(*reqp);
- *reqp = NULL;
- RETURN(-EEXIST);
- }
-
+ rc = lmv_migrate_existence_check(lmv, op_data);
if (rc != -ENOENT)
RETURN(rc);
int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *, int);
int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds);
-int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds);
int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
struct lu_fid *fid, struct md_op_data *op_data);
struct lmv_tgt_desc *lmv_locate_tgt(struct lmv_obd *lmv,
struct md_op_data *op_data);
+int lmv_migrate_existence_check(struct lmv_obd *lmv,
+ struct md_op_data *op_data);
/* lproc_lmv.c */
int lmv_tunables_init(struct obd_device *obd);
-
#endif
RETURN(rc);
}
-/**
- * This is _inode_ placement policy function (not name).
- */
-static u32 lmv_placement_policy(struct obd_device *obd,
- struct md_op_data *op_data)
+int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+ struct lu_fid *fid, struct md_op_data *op_data)
{
+ struct obd_device *obd = class_exp2obd(exp);
struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_user_md *lum;
- u32 mdt;
-
- ENTRY;
-
- if (lmv->lmv_mdt_count == 1)
- RETURN(0);
-
- lum = op_data->op_data;
- /*
- * Choose MDT by
- * 1. See if the stripe offset is specified by lum.
- * 2. If parent has default LMV, and its hash type is "space", choose
- * MDT with QoS. (see lmv_locate_tgt_qos()).
- * 3. Then check if default LMV stripe offset is not -1.
- * 4. Finally choose MDS by name hash if the parent
- * is striped directory. (see lmv_locate_tgt()).
- *
- * presently explicit MDT location is not supported
- * for foreign dirs (as it can't be embedded into free
- * format LMV, like with lum_stripe_offset), so we only
- * rely on default stripe offset or then name hashing.
- */
- if (op_data->op_cli_flags & CLI_SET_MEA && lum != NULL &&
- le32_to_cpu(lum->lum_magic != LMV_MAGIC_FOREIGN) &&
- le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
- mdt = le32_to_cpu(lum->lum_stripe_offset);
- } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
- !lmv_dir_striped(op_data->op_mea1) &&
- lmv_dir_qos_mkdir(op_data->op_default_mea1)) {
- mdt = op_data->op_mds;
- } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
- op_data->op_default_mea1 &&
- op_data->op_default_mea1->lsm_md_master_mdt_index !=
- (__u32)-1) {
- mdt = op_data->op_default_mea1->lsm_md_master_mdt_index;
- op_data->op_mds = mdt;
- } else {
- mdt = op_data->op_mds;
- }
-
- RETURN(mdt);
-}
-
-int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
-{
struct lmv_tgt_desc *tgt;
int rc;
ENTRY;
- tgt = lmv_tgt(lmv, mds);
+ LASSERT(op_data);
+ LASSERT(fid);
+
+ tgt = lmv_tgt(lmv, op_data->op_mds);
if (!tgt)
RETURN(-ENODEV);
+ if (!tgt->ltd_active || !tgt->ltd_exp)
+ RETURN(-ENODEV);
+
/*
* New seq alloc and FLD setup should be atomic. Otherwise we may find
* on server that seq in new allocated fid is not yet known.
*/
mutex_lock(&tgt->ltd_fid_mutex);
-
- if (tgt->ltd_active == 0 || tgt->ltd_exp == NULL)
- GOTO(out, rc = -ENODEV);
-
- /*
- * Asking underlying tgt layer to allocate new fid.
- */
rc = obd_fid_alloc(NULL, tgt->ltd_exp, fid, NULL);
+ mutex_unlock(&tgt->ltd_fid_mutex);
if (rc > 0) {
LASSERT(fid_is_sane(fid));
rc = 0;
}
- EXIT;
-out:
- mutex_unlock(&tgt->ltd_fid_mutex);
- return rc;
-}
-
-int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data)
-{
- struct obd_device *obd = class_exp2obd(exp);
- struct lmv_obd *lmv = &obd->u.lmv;
- u32 mds;
- int rc;
-
- ENTRY;
-
- LASSERT(op_data != NULL);
- LASSERT(fid != NULL);
-
- mds = lmv_placement_policy(obd, op_data);
-
- rc = __lmv_fid_alloc(lmv, fid, mds);
- if (rc)
- CERROR("Can't alloc new fid, rc %d\n", rc);
-
RETURN(rc);
}
* which is set outside, and if dir is migrating, 'op_data->op_post_migrate'
* indicates whether old or new layout is used to locate.
*
- * For plain direcotry, normally it will locate MDT by FID, but if this
- * directory has default LMV, and its hash type is "space", locate MDT with QoS.
+ * For plain direcotry, it just locate the MDT of op_data->op_fid1.
*
* \param[in] lmv LMV device
* \param[in] op_data client MD stack parameters, name, namelen
* index if the file under striped dir is being restored, see
* ct_restore(). */
if (op_data->op_bias & MDS_CREATE_VOLATILE &&
- (int)op_data->op_mds != -1) {
+ op_data->op_mds != LMV_OFFSET_DEFAULT) {
tgt = lmv_tgt(lmv, op_data->op_mds);
if (!tgt)
return ERR_PTR(-ENODEV);
op_data->op_mds = oinfo->lmo_mds;
tgt = lmv_tgt(lmv, oinfo->lmo_mds);
if (!tgt)
- tgt = ERR_PTR(-ENODEV);
- } else if (op_data->op_code == LUSTRE_OPC_MKDIR &&
- lmv_dir_qos_mkdir(op_data->op_default_mea1) &&
- !lmv_dir_striped(lsm)) {
- tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
- if (tgt == ERR_PTR(-EAGAIN))
- tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
- /*
- * only update statfs when mkdir under dir with "space" hash,
- * this means the cached statfs may be stale, and current mkdir
- * may not follow QoS accurately, but it's not serious, and it
- * avoids periodic statfs when client doesn't mkdir under
- * "space" hashed directories.
- *
- * TODO: after MDT support QoS object allocation, also update
- * statfs for 'lfs mkdir -i -1 ...", currently it's done in user
- * space.
- */
- if (!IS_ERR(tgt)) {
- struct obd_device *obd;
-
- obd = container_of(lmv, struct obd_device, u.lmv);
- lmv_statfs_check_update(obd, tgt);
- }
+ return ERR_PTR(-ENODEV);
} else {
tgt = lmv_locate_tgt_by_name(lmv, op_data->op_mea1,
op_data->op_name, op_data->op_namelen,
&op_data->op_mds, true);
}
+int lmv_migrate_existence_check(struct lmv_obd *lmv, struct md_op_data *op_data)
+{
+ struct lu_tgt_desc *tgt;
+ struct ptlrpc_request *request;
+ int rc;
+
+ LASSERT(lmv_dir_migrating(op_data->op_mea1));
+
+ tgt = lmv_locate_tgt(lmv, op_data);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ rc = md_getattr_name(tgt->ltd_exp, op_data, &request);
+ if (!rc) {
+ ptlrpc_req_finished(request);
+ return -EEXIST;
+ }
+
+ return rc;
+}
+
+/* mkdir by QoS in two cases:
+ * 1. 'lfs mkdir -i -1'
+ * 2. parent default LMV master_mdt_index is -1
+ *
+ * NB, mkdir by QoS only if parent is not striped, this is to avoid remote
+ * directories under striped directory.
+ */
+static inline bool lmv_op_qos_mkdir(const struct md_op_data *op_data)
+{
+ const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ if (op_data->op_code != LUSTRE_OPC_MKDIR)
+ return false;
+
+ if (lmv_dir_striped(op_data->op_mea1))
+ return false;
+
+ if (op_data->op_cli_flags & CLI_SET_MEA && lum &&
+ (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) &&
+ le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT)
+ return true;
+
+ if (lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT)
+ return true;
+
+ return false;
+}
+
+/* 'lfs mkdir -i <specific_MDT>' */
+static inline bool lmv_op_user_specific_mkdir(const struct md_op_data *op_data)
+{
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ return op_data->op_code == LUSTRE_OPC_MKDIR &&
+ op_data->op_cli_flags & CLI_SET_MEA && lum &&
+ (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) &&
+ le32_to_cpu(lum->lum_stripe_offset) != LMV_OFFSET_DEFAULT;
+}
+
+/* parent default LMV master_mdt_index is not -1. */
+static inline bool
+lmv_op_default_specific_mkdir(const struct md_op_data *op_data)
+{
+ return op_data->op_code == LUSTRE_OPC_MKDIR &&
+ op_data->op_default_mea1 &&
+ op_data->op_default_mea1->lsm_md_master_mdt_index !=
+ LMV_OFFSET_DEFAULT;
+}
int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
const void *data, size_t datalen, umode_t mode, uid_t uid,
gid_t gid, cfs_cap_t cap_effective, __u64 rdev,
if (lmv_dir_migrating(op_data->op_mea1)) {
/*
* if parent is migrating, create() needs to lookup existing
- * name, to avoid creating new file under old layout of
- * migrating directory, check old layout here.
+ * name in both old and new layout, check old layout on client.
*/
- tgt = lmv_locate_tgt(lmv, op_data);
- if (IS_ERR(tgt))
- RETURN(PTR_ERR(tgt));
-
- rc = md_getattr_name(tgt->ltd_exp, op_data, request);
- if (!rc) {
- ptlrpc_req_finished(*request);
- *request = NULL;
- RETURN(-EEXIST);
- }
-
+ rc = lmv_migrate_existence_check(lmv, op_data);
if (rc != -ENOENT)
RETURN(rc);
if (IS_ERR(tgt))
RETURN(PTR_ERR(tgt));
- CDEBUG(D_INODE, "CREATE name '%.*s' on "DFID" -> mds #%x\n",
- (int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1), op_data->op_mds);
+ if (lmv_op_qos_mkdir(op_data)) {
+ tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
+ if (tgt == ERR_PTR(-EAGAIN))
+ tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
+ /*
+ * only update statfs after QoS mkdir, this means the cached
+ * statfs may be stale, and current mkdir may not follow QoS
+ * accurately, but it's not serious, and avoids periodic statfs
+ * when client doesn't mkdir by QoS.
+ */
+ if (!IS_ERR(tgt))
+ lmv_statfs_check_update(obd, tgt);
+ } else if (lmv_op_user_specific_mkdir(op_data)) {
+ struct lmv_user_md *lum = op_data->op_data;
+
+ op_data->op_mds = le32_to_cpu(lum->lum_stripe_offset);
+ tgt = lmv_tgt(lmv, op_data->op_mds);
+ if (!tgt)
+ RETURN(-ENODEV);
+ } else if (lmv_op_default_specific_mkdir(op_data)) {
+ op_data->op_mds =
+ op_data->op_default_mea1->lsm_md_master_mdt_index;
+ tgt = lmv_tgt(lmv, op_data->op_mds);
+ if (!tgt)
+ RETURN(-ENODEV);
+ }
+
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc)
RETURN(rc);
- if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) {
- /* Send the create request to the MDT where the object
- * will be located */
- tgt = lmv_fid2tgt(lmv, &op_data->op_fid2);
- if (IS_ERR(tgt))
- RETURN(PTR_ERR(tgt));
-
- op_data->op_mds = tgt->ltd_index;
- }
-
- CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
- PFID(&op_data->op_fid2), op_data->op_mds);
+ CDEBUG(D_INODE, "CREATE name '%.*s' "DFID" on "DFID" -> mds #%x\n",
+ (int)op_data->op_namelen, op_data->op_name,
+ PFID(&op_data->op_fid2), PFID(&op_data->op_fid1),
+ op_data->op_mds);
op_data->op_flags |= MF_MDC_CANCEL_FID1;
rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid,
if (IS_ERR(child_tgt))
RETURN(PTR_ERR(child_tgt));
- if (!S_ISDIR(op_data->op_mode) && tp_tgt)
- rc = __lmv_fid_alloc(lmv, &target_fid, tp_tgt->ltd_index);
- else
- rc = lmv_fid_alloc(NULL, exp, &target_fid, op_data);
+ /* for directory, migrate to MDT specified by lum_stripe_offset;
+ * otherwise migrate to the target stripe of parent, but parent
+ * directory may have finished migration (normally current file too),
+ * allocate FID on MDT lum_stripe_offset, and server will check
+ * whether file was migrated already.
+ */
+ if (S_ISDIR(op_data->op_mode) || !tp_tgt) {
+ struct lmv_user_md *lum = op_data->op_data;
+
+ op_data->op_mds = le32_to_cpu(lum->lum_stripe_offset);
+ } else {
+ op_data->op_mds = tp_tgt->ltd_index;
+ }
+ rc = lmv_fid_alloc(NULL, exp, &target_fid, op_data);
if (rc)
RETURN(rc);
* set default value -1, so lmv_locate_tgt() knows this stripe
* target is not initialized.
*/
- lsm->lsm_md_oinfo[i].lmo_mds = (u32)-1;
+ lsm->lsm_md_oinfo[i].lmo_mds = LMV_OFFSET_DEFAULT;
if (!fid_is_sane(&lsm->lsm_md_oinfo[i].lmo_fid))
continue;
void lod_fix_desc_pattern(__u32 *val);
void lod_fix_desc_stripe_count(__u32 *val);
void lod_fix_desc_stripe_size(__u64 *val);
+void lod_fix_lmv_desc_pattern(__u32 *val);
int lod_pools_init(struct lod_device *m, struct lustre_cfg *cfg);
int lod_pools_fini(struct lod_device *m);
int lod_parse_striping(const struct lu_env *env, struct lod_object *mo,
int lod_fill_mirrors(struct lod_object *lo);
/* lod_pool.c */
-int lod_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count);
-int lod_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx);
-int lod_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
+int lod_tgt_pool_init(struct lu_tgt_pool *op, unsigned int count);
+int lod_tgt_pool_free(struct lu_tgt_pool *op);
+int lod_tgt_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count);
+int lod_tgt_pool_remove(struct lu_tgt_pool *op, __u32 idx);
+int lod_tgt_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
void lod_pool_putref(struct pool_desc *pool);
-int lod_ost_pool_free(struct lu_tgt_pool *op);
int lod_pool_del(struct obd_device *obd, char *poolname);
-int lod_ost_pool_init(struct lu_tgt_pool *op, unsigned int count);
extern struct cfs_hash_ops pool_hash_operations;
int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool);
int lod_pool_new(struct obd_device *obd, char *poolname);
};
/* lod_qos.c */
+int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripes);
+int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripe);
int lod_prepare_create(const struct lu_env *env, struct lod_object *lo,
struct lu_attr *attr, const struct lu_buf *buf,
struct thandle *th);
bool is_dir);
__u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
__u16 stripe_count, bool overstriping);
-void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod);
+void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod,
+ struct lu_tgt_descs *ltd);
/* lproc_lod.c */
int lod_procfs_init(struct lod_device *lod);
continue;
list_add(&tgt_desc->ltd_kill, &kill);
- /*FIXME: only support ost pool for now */
- if (ltd == &lod->lod_ost_descs)
- lod_ost_pool_remove(<d->ltd_tgt_pool,
- tgt_desc->ltd_index);
+ lod_tgt_pool_remove(<d->ltd_tgt_pool,
+ tgt_desc->ltd_index);
ltd_del_tgt(ltd, tgt_desc);
ltd->ltd_death_row--;
}
if (rc)
GOTO(out_del_tgt, rc);
- if (for_ost) {
- /* pool is not supported for MDS stack yet */
- rc = lod_ost_pool_add(<d->ltd_tgt_pool, index,
- ltd->ltd_tgts_size);
- if (rc) {
- CERROR("%s: can't set up pool, failed with %d\n",
- obd->obd_name, rc);
- GOTO(out_del_tgt, rc);
- }
+ rc = lod_tgt_pool_add(<d->ltd_tgt_pool, index,
+ ltd->ltd_lov_desc.ld_tgt_count);
+ if (rc) {
+ CERROR("%s: can't set up pool, failed with %d\n",
+ obd->obd_name, rc);
+ GOTO(out_del_tgt, rc);
}
mutex_unlock(<d->ltd_mutex);
thread = LTD_TGT(ltd, index)->ltd_recovery_thread;
OBD_FREE_PTR(thread);
}
- lod_ost_pool_remove(<d->ltd_tgt_pool, index);
+ lod_tgt_pool_remove(<d->ltd_tgt_pool, index);
out_del_tgt:
ltd_del_tgt(ltd, tgt_desc);
out_mutex:
}
}
+void lod_fix_lmv_desc_pattern(__u32 *val)
+{
+ if ((*val) && !lmv_is_known_hash_type(*val)) {
+ LCONSOLE_WARN("lod: Unknown md stripe pattern: %#x\n", *val);
+ *val = 0;
+ }
+}
+
void lod_fix_desc_qos_maxage(__u32 *val)
{
/* fix qos_maxage */
lod_fix_desc_qos_maxage(&desc->ld_qos_maxage);
}
+static void lod_fix_lmv_desc(struct lmv_desc *desc)
+{
+ desc->ld_active_tgt_count = 0;
+ lod_fix_desc_stripe_count(&desc->ld_default_stripe_count);
+ lod_fix_lmv_desc_pattern(&desc->ld_pattern);
+ lod_fix_desc_qos_maxage(&desc->ld_qos_maxage);
+}
+
/**
* Initialize the structures used to store pools and default striping.
*
desc->ld_active_tgt_count = 0;
lod->lod_ost_descs.ltd_lov_desc = *desc;
+ /* NB: config doesn't contain lmv_desc, alter it via sysfs. */
+ lod_fix_lmv_desc(&lod->lod_mdt_descs.ltd_lmv_desc);
+
lod->lod_sp_me = LUSTRE_SP_CLI;
/* Set up OST pool environment */
INIT_LIST_HEAD(&lod->lod_pool_list);
lod->lod_pool_count = 0;
- rc = lod_ost_pool_init(&lod->lod_ost_descs.ltd_tgt_pool, 0);
+ rc = lod_tgt_pool_init(&lod->lod_mdt_descs.ltd_tgt_pool, 0);
if (rc)
GOTO(out_hash, rc);
- rc = lod_ost_pool_init(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool, 0);
+
+ rc = lod_tgt_pool_init(&lod->lod_mdt_descs.ltd_qos.lq_rr.lqr_pool, 0);
+ if (rc)
+ GOTO(out_mdt_pool, rc);
+
+ rc = lod_tgt_pool_init(&lod->lod_ost_descs.ltd_tgt_pool, 0);
+ if (rc)
+ GOTO(out_mdt_rr_pool, rc);
+
+ rc = lod_tgt_pool_init(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool, 0);
if (rc)
- GOTO(out_pool_info, rc);
+ GOTO(out_ost_pool, rc);
RETURN(0);
-out_pool_info:
- lod_ost_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
+out_ost_pool:
+ lod_tgt_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
+out_mdt_rr_pool:
+ lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_qos.lq_rr.lqr_pool);
+out_mdt_pool:
+ lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_tgt_pool);
out_hash:
cfs_hash_putref(lod->lod_pools_hash_body);
}
cfs_hash_putref(lod->lod_pools_hash_body);
- lod_ost_pool_free(&(lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool));
- lod_ost_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
+ lod_tgt_pool_free(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool);
+ lod_tgt_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
+ lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_qos.lq_rr.lqr_pool);
+ lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_tgt_pool);
RETURN(0);
}
RETURN(rc);
}
-static int lod_prep_md_striped_create(const struct lu_env *env,
- struct dt_object *dt,
- struct lu_attr *attr,
- const struct lmv_user_md_v1 *lum,
- struct dt_object_format *dof,
- struct thandle *th)
+/**
+ * Allocate a striping on a predefined set of MDTs.
+ *
+ * Allocates new striping using the MDT index range provided by the data from
+ * the lum_obejcts contained in the lmv_user_md passed to this method if
+ * \a is_specific is true; or allocates new layout starting from MDT index in
+ * lo->ldo_dir_stripe_offset. The exact order of MDTs is not important and
+ * varies depending on MDT status. The number of stripes needed and stripe
+ * offset are taken from the object. If that number cannot be met, then the
+ * function returns an error and then it's the caller's responsibility to
+ * release the stripes allocated. All the internal structures are protected,
+ * but no concurrent allocation is allowed on the same objects.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripes striping created
+ * \param[out] mdt_indices MDT indices of striping created
+ * \param[in] is_specific true if the MDTs are provided by lum; false if
+ * only the starting MDT index is provided
+ *
+ * \retval positive stripes allocated, including the first stripe allocated
+ * outside
+ * \retval negative errno on failure
+ */
+static int lod_mdt_alloc_specific(const struct lu_env *env,
+ struct lod_object *lo,
+ struct dt_object **stripes,
+ __u32 *mdt_indices, bool is_specific)
{
struct lod_thread_info *info = lod_env_info(env);
- struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lod_object *lo = lod_dt_obj(dt);
- struct dt_object **stripe;
- __u32 stripe_count;
- int *idx_array;
- __u32 master_index;
- int rc = 0;
- __u32 i;
- __u32 j;
- bool is_specific = false;
- ENTRY;
-
- /* The lum has been verifed in lod_verify_md_striping */
- LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
- le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC);
-
- stripe_count = lo->ldo_dir_stripe_count;
-
- OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
- if (idx_array == NULL)
- RETURN(-ENOMEM);
-
- OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_count);
- if (stripe == NULL)
- GOTO(out_free, rc = -ENOMEM);
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lu_tgt_desc *tgt = NULL;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct dt_device *tgt_dt = NULL;
+ struct lu_fid fid = { 0 };
+ struct dt_object *dto;
+ u32 master_index;
+ u32 stripe_count = lo->ldo_dir_stripe_count;
+ int stripe_idx = 1;
+ int j;
+ int idx;
+ int rc;
- /* Start index must be the master MDT */
master_index = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id;
- idx_array[0] = master_index;
- if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
- is_specific = true;
- for (i = 1; i < stripe_count; i++)
- idx_array[i] = le32_to_cpu(lum->lum_objects[i].lum_mds);
- }
-
- for (i = 0; i < stripe_count; i++) {
- struct lod_tgt_desc *tgt = NULL;
- struct dt_object *dto;
- struct lu_fid fid = { 0 };
- int idx;
- struct lu_object_conf conf = { 0 };
- struct dt_device *tgt_dt = NULL;
+ if (stripe_count > 1)
+ /* Set the start index for the 2nd stripe allocation */
+ mdt_indices[1] = (mdt_indices[0] + 1) %
+ (lod->lod_remote_mdt_count + 1);
+ for (; stripe_idx < stripe_count; stripe_idx++) {
/* Try to find next avaible target */
- idx = idx_array[i];
+ idx = mdt_indices[stripe_idx];
for (j = 0; j < lod->lod_remote_mdt_count;
j++, idx = (idx + 1) % (lod->lod_remote_mdt_count + 1)) {
bool already_allocated = false;
__u32 k;
CDEBUG(D_INFO, "try idx %d, mdt cnt %u, allocated %u\n",
- idx, lod->lod_remote_mdt_count + 1, i);
+ idx, lod->lod_remote_mdt_count + 1, stripe_idx);
if (likely(!is_specific &&
!OBD_FAIL_CHECK(OBD_FAIL_LARGE_STRIPE))) {
/* check whether the idx already exists
* in current allocated array */
- for (k = 0; k < i; k++) {
- if (idx_array[k] == idx) {
+ for (k = 0; k < stripe_idx; k++) {
+ if (mdt_indices[k] == idx) {
already_allocated = true;
break;
}
rc = obd_fid_alloc(env, lod->lod_child_exp,
&fid, NULL);
if (rc < 0)
- GOTO(out_put, rc);
+ continue;
tgt_dt = lod->lod_child;
break;
}
/* check the status of the OSP */
tgt = LTD_TGT(ltd, idx);
- if (tgt == NULL)
+ if (!tgt)
continue;
tgt_dt = tgt->ltd_tgt;
rc = dt_statfs(env, tgt_dt, &info->lti_osfs);
- if (rc) {
+ if (rc)
/* this OSP doesn't feel well */
- rc = 0;
continue;
- }
rc = obd_fid_alloc(env, tgt->ltd_exp, &fid, NULL);
- if (rc < 0) {
- rc = 0;
+ if (rc < 0)
continue;
- }
break;
}
/* Can not allocate more stripes */
if (j == lod->lod_remote_mdt_count) {
CDEBUG(D_INFO, "%s: require stripes %u only get %d\n",
- lod2obd(lod)->obd_name, stripe_count, i);
+ lod2obd(lod)->obd_name, stripe_count,
+ stripe_idx);
break;
}
CDEBUG(D_INFO, "Get idx %d, for stripe %d "DFID"\n",
- idx, i, PFID(&fid));
- idx_array[i] = idx;
+ idx, stripe_idx, PFID(&fid));
+ mdt_indices[stripe_idx] = idx;
/* Set the start index for next stripe allocation */
- if (!is_specific && i < stripe_count - 1) {
+ if (!is_specific && stripe_idx < stripe_count - 1) {
/*
* for large dir test, put all other slaves on one
* remote MDT, otherwise we may save too many local
*/
if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_LARGE_STRIPE)))
idx = master_index;
- idx_array[i + 1] = (idx + 1) %
+ mdt_indices[stripe_idx + 1] = (idx + 1) %
(lod->lod_remote_mdt_count + 1);
}
/* tgt_dt and fid must be ready after search avaible OSP
LASSERT(fid_is_sane(&fid));
/* fail a remote stripe FID allocation */
- if (i && OBD_FAIL_CHECK(OBD_FAIL_MDS_STRIPE_FID))
+ if (stripe_idx && OBD_FAIL_CHECK(OBD_FAIL_MDS_STRIPE_FID))
continue;
- conf.loc_flags = LOC_F_NEW;
dto = dt_locate_at(env, tgt_dt, &fid,
- dt->do_lu.lo_dev->ld_site->ls_top_dev,
- &conf);
- if (IS_ERR(dto))
- GOTO(out_put, rc = PTR_ERR(dto));
- stripe[i] = dto;
+ lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
+ &conf);
+ if (IS_ERR(dto)) {
+ rc = PTR_ERR(dto);
+ goto error;
+ }
+
+ stripes[stripe_idx] = dto;
}
+ return stripe_idx;
+
+error:
+ for (j = 1; j < stripe_idx; j++) {
+ LASSERT(stripes[j] != NULL);
+ dt_object_put(env, stripes[j]);
+ stripes[j] = NULL;
+ }
+ return rc;
+}
+
+static int lod_prep_md_striped_create(const struct lu_env *env,
+ struct dt_object *dt,
+ struct lu_attr *attr,
+ const struct lmv_user_md_v1 *lum,
+ struct dt_object_format *dof,
+ struct thandle *th)
+{
+ struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct dt_object **stripes;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct lu_fid fid = { 0 };
+ __u32 stripe_count;
+ int i;
+ int rc = 0;
+
+ ENTRY;
+
+ /* The lum has been verifed in lod_verify_md_striping */
+ LASSERT(le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC);
+
+ stripe_count = lo->ldo_dir_stripe_count;
+
+ OBD_ALLOC(stripes, sizeof(stripes[0]) * stripe_count);
+ if (!stripes)
+ RETURN(-ENOMEM);
+
+ /* Allocate the first stripe locally */
+ rc = obd_fid_alloc(env, lod->lod_child_exp, &fid, NULL);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ stripes[0] = dt_locate_at(env, lod->lod_child, &fid,
+ dt->do_lu.lo_dev->ld_site->ls_top_dev, &conf);
+ if (IS_ERR(stripes[0]))
+ GOTO(out, rc = PTR_ERR(stripes[0]));
+
+ if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
+ lod_qos_statfs_update(env, lod, &lod->lod_mdt_descs);
+ rc = lod_mdt_alloc_qos(env, lo, stripes);
+ if (rc == -EAGAIN)
+ rc = lod_mdt_alloc_rr(env, lo, stripes);
+ } else {
+ int *idx_array;
+ bool is_specific = false;
+
+ OBD_ALLOC(idx_array, sizeof(idx_array[0]) * stripe_count);
+ if (!idx_array)
+ GOTO(out, rc = -ENOMEM);
+
+ if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
+ is_specific = true;
+ for (i = 0; i < stripe_count; i++)
+ idx_array[i] =
+ le32_to_cpu(lum->lum_objects[i].lum_mds);
+ }
+
+ /* stripe 0 is local */
+ idx_array[0] =
+ lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id;
+ rc = lod_mdt_alloc_specific(env, lo, stripes, idx_array,
+ is_specific);
+ OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
+ }
+
+ if (rc < 0)
+ GOTO(out, rc);
+
+ LASSERT(rc > 0);
+
lo->ldo_dir_striped = 1;
- lo->ldo_stripe = stripe;
- lo->ldo_dir_stripe_count = i;
+ lo->ldo_stripe = stripes;
+ lo->ldo_dir_stripe_count = rc;
lo->ldo_dir_stripes_allocated = stripe_count;
smp_mb();
lo->ldo_dir_stripe_loaded = 1;
- if (lo->ldo_dir_stripe_count == 0)
- GOTO(out_put, rc = -ENOSPC);
-
rc = lod_dir_declare_create_stripes(env, dt, attr, dof, th);
- if (rc != 0)
- GOTO(out_put, rc);
+ if (rc < 0)
+ lod_striping_free(env, lo);
-out_put:
- if (rc < 0) {
- for (i = 0; i < stripe_count; i++)
- if (stripe[i] != NULL)
- dt_object_put(env, stripe[i]);
- OBD_FREE(stripe, sizeof(stripe[0]) * stripe_count);
- lo->ldo_dir_stripe_count = 0;
- lo->ldo_dir_stripes_allocated = 0;
- lo->ldo_stripe = NULL;
- }
+ RETURN(rc);
-out_free:
- OBD_FREE(idx_array, sizeof(idx_array[0]) * stripe_count);
+out:
+ LASSERT(rc < 0);
+ if (!IS_ERR_OR_NULL(stripes[0]))
+ dt_object_put(env, stripes[0]);
+ for (i = 1; i < stripe_count; i++)
+ LASSERT(!stripes[i]);
+ OBD_FREE(stripes, sizeof(stripes[0]) * stripe_count);
- RETURN(rc);
+ return rc;
}
/**
if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
le32_to_cpu(lum->lum_stripe_offset)) &&
- le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC &&
- !(le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_SPACE)) {
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
rc = lod_xattr_del_internal(env, dt, name, th);
if (rc == -ENODATA)
rc = 0;
lo->ldo_dir_stripe_offset =
lds->lds_dir_def_stripe_offset;
if (lo->ldo_dir_hash_type == 0)
- lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type &
- ~LMV_HASH_FLAG_SPACE;
+ lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type;
CDEBUG(D_LAYOUT, "striping from default dir: count:%hu, "
"offset:%u, hash_type:%u\n",
return -EINVAL;
lmu = info->lti_ea_store;
- return !!(le32_to_cpu(lmu->lum_hash_type) & LMV_HASH_FLAG_SPACE);
+ return le32_to_cpu(lmu->lum_stripe_offset) == LMV_OFFSET_DEFAULT;
}
/**
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT))
GOTO(out, rc = -EREMOTE);
- if (lo->ldo_dir_stripe_offset == -1) {
+ if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
+ struct lod_default_striping *lds;
+
+ lds = lo->ldo_def_striping;
/*
- * child and parent should be in the same MDT,
- * but if parent has plain layout, it's allowed.
+ * child and parent should be on the same MDT,
+ * but if parent has default LMV, and the start
+ * MDT offset is -1, it's allowed. This check
+ * is not necessary after 2.12.22 because client
+ * follows this already, but old client may not.
*/
if (hint->dah_parent &&
- dt_object_remote(hint->dah_parent)) {
- rc = dt_object_qos_mkdir(env,
- lo->ldo_obj.do_lu.lo_dev,
- hint->dah_parent);
- if (rc <= 0)
- GOTO(out, rc ? rc : -EREMOTE);
- }
+ dt_object_remote(hint->dah_parent) && lds &&
+ lds->lds_dir_def_stripe_offset !=
+ LMV_OFFSET_DEFAULT)
+ GOTO(out, rc = -EREMOTE);
} else if (lo->ldo_dir_stripe_offset !=
ss->ss_node_id) {
struct lod_device *lod;
* This algo can be revised later after knowing the topology of
* cluster.
*/
- lod_qos_statfs_update(env, lod);
+ lod_qos_statfs_update(env, lod, &lod->lod_ost_descs);
for (i = 0; i < lo->ldo_mirror_count; i++) {
bool ost_avail = true;
int index = (i + seq) % lo->ldo_mirror_count;
LASSERT(hlist_unhashed(&pool->pool_hash));
LASSERT(list_empty(&pool->pool_list));
LASSERT(pool->pool_proc_entry == NULL);
- lod_ost_pool_free(&(pool->pool_rr.lqr_pool));
- lod_ost_pool_free(&(pool->pool_obds));
+ lod_tgt_pool_free(&(pool->pool_rr.lqr_pool));
+ lod_tgt_pool_free(&(pool->pool_obds));
OBD_FREE_PTR(pool);
EXIT;
}
* \retval negative error number on failure
*/
#define POOL_INIT_COUNT 2
-int lod_ost_pool_init(struct lu_tgt_pool *op, unsigned int count)
+int lod_tgt_pool_init(struct lu_tgt_pool *op, unsigned int count)
{
ENTRY;
* \retval 0 on success
* \retval negative error number on failure.
*/
-int lod_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
+int lod_tgt_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
{
__u32 *new;
__u32 new_size;
* \retval 0 if target could be added to the pool
* \retval negative error if target \a idx was not added
*/
-int lod_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
+int lod_tgt_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
{
unsigned int i;
int rc = 0;
down_write(&op->op_rw_sem);
- rc = lod_ost_pool_extend(op, min_count);
+ rc = lod_tgt_pool_extend(op, min_count);
if (rc)
GOTO(out, rc);
* \retval 0 on success
* \retval negative error number on failure
*/
-int lod_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx)
+int lod_tgt_pool_remove(struct lu_tgt_pool *op, __u32 idx)
{
unsigned int i;
ENTRY;
*
* \retval 0 on success or if pool was already freed
*/
-int lod_ost_pool_free(struct lu_tgt_pool *op)
+int lod_tgt_pool_free(struct lu_tgt_pool *op)
{
ENTRY;
strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
new_pool->pool_lobd = obd;
atomic_set(&new_pool->pool_refcount, 1);
- rc = lod_ost_pool_init(&new_pool->pool_obds, 0);
+ rc = lod_tgt_pool_init(&new_pool->pool_obds, 0);
if (rc)
GOTO(out_err, rc);
lu_qos_rr_init(&new_pool->pool_rr);
- rc = lod_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+ rc = lod_tgt_pool_init(&new_pool->pool_rr.lqr_pool, 0);
if (rc)
GOTO(out_free_pool_obds, rc);
lprocfs_remove(&new_pool->pool_proc_entry);
- lod_ost_pool_free(&new_pool->pool_rr.lqr_pool);
+ lod_tgt_pool_free(&new_pool->pool_rr.lqr_pool);
out_free_pool_obds:
- lod_ost_pool_free(&new_pool->pool_obds);
+ lod_tgt_pool_free(&new_pool->pool_obds);
OBD_FREE_PTR(new_pool);
return rc;
}
if (rc)
GOTO(out, rc);
- rc = lod_ost_pool_add(&pool->pool_obds, tgt->ltd_index,
- lod->lod_ost_descs.ltd_tgts_size);
+ rc = lod_tgt_pool_add(&pool->pool_obds, tgt->ltd_index,
+ lod->lod_ost_count);
if (rc)
GOTO(out, rc);
if (rc)
GOTO(out, rc);
- lod_ost_pool_remove(&pool->pool_obds, ost->ltd_index);
-
+ lod_tgt_pool_remove(&pool->pool_obds, ost->ltd_index);
pool->pool_rr.lqr_dirty = 1;
CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
#define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \
OST_TGT(lod,i)->ltd_statfs.os_bsize)
+static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
+ struct lu_tgt_desc *tgt)
+{
+ struct obd_statfs *sfs = &tgt->ltd_statfs;
+
+ if (((sfs->os_state & OS_STATE_ENOSPC) ||
+ (!ltd->ltd_is_mdt && sfs->os_state & OS_STATE_ENOINO &&
+ sfs->os_fprecreated == 0)))
+ return -ENOSPC;
+
+ /* If the OST is readonly then we can't allocate objects there */
+ if (sfs->os_state & OS_STATE_READONLY)
+ return -EROFS;
+
+ /* object precreation is skipped on the OST with max_create_count=0 */
+ if (!ltd->ltd_is_mdt && sfs->os_state & OS_STATE_NOPRECREATE)
+ return -ENOBUFS;
+
+ return 0;
+}
+
/**
- * Check whether the target is available for new OST objects.
+ * Check whether the target is available for new objects.
*
* Request statfs data from the given target and verify it's active and not
- * read-only. If so, then it can be used to place new OST objects. This
+ * read-only. If so, then it can be used to place new objects. This
* function also maintains the number of active/inactive targets and sets
* dirty flags if those numbers change so others can run re-balance procedures.
* No external locking is required.
* \param[in] env execution environment for this thread
* \param[in] d LOD device
* \param[in] ltd target table
- * \param[in] index target index
- * \param[out] sfs buffer for statfs data
+ * \param[in] tgt target
*
* \retval 0 if the target is good
* \retval negative negated errno on error
-
*/
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
- struct lu_tgt_descs *ltd, int index,
- struct obd_statfs *sfs)
+ struct lu_tgt_descs *ltd,
+ struct lu_tgt_desc *tgt)
{
struct lov_desc *desc = <d->ltd_lov_desc;
- struct lu_tgt_desc *tgt = LTD_TGT(ltd, index);
int rc;
- ENTRY;
-
LASSERT(d);
LASSERT(tgt);
- rc = dt_statfs(env, tgt->ltd_tgt, sfs);
-
- if (rc == 0 && ((sfs->os_state & OS_STATE_ENOSPC) ||
- (sfs->os_state & OS_STATE_ENOINO && sfs->os_fprecreated == 0)))
- RETURN(-ENOSPC);
-
+ rc = dt_statfs(env, tgt->ltd_tgt, &tgt->ltd_statfs);
if (rc && rc != -ENOTCONN)
CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
- /* If the OST is readonly then we can't allocate objects there */
- if (sfs->os_state & OS_STATE_READONLY)
- rc = -EROFS;
-
- /* object precreation is skipped on the OST with max_create_count=0 */
- if (sfs->os_state & OS_STATE_NOPRECREATE)
- rc = -ENOBUFS;
+ if (!rc) {
+ rc = lod_statfs_check(ltd, tgt);
+ if (rc == -ENOSPC)
+ return rc;
+ }
/* check whether device has changed state (active, inactive) */
if (rc != 0 && tgt->ltd_active) {
spin_unlock(&d->lod_lock);
}
- RETURN(rc);
+ return rc;
+}
+
+static int lod_is_tgt_usable(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
+{
+ int rc;
+
+ rc = lod_statfs_check(ltd, tgt);
+ if (rc)
+ return rc;
+
+ if (!tgt->ltd_active)
+ return -ENOTCONN;
+
+ return 0;
}
/**
*
* \param[in] env execution environment for this thread
* \param[in] lod LOD device
+ * \param[in] ltd tgt table
*/
-void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod)
+void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod,
+ struct lu_tgt_descs *ltd)
{
struct obd_device *obd = lod2obd(lod);
- struct lu_tgt_pool *osts = &lod->lod_ost_descs.ltd_tgt_pool;
+ struct lu_tgt_desc *tgt;
time64_t max_age;
- unsigned int i;
u64 avail;
- int idx;
ENTRY;
- max_age = ktime_get_seconds() -
- 2 * lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage;
+ max_age = ktime_get_seconds() - 2 * ltd->ltd_lov_desc.ld_qos_maxage;
if (obd->obd_osfs_age > max_age)
/* statfs data are quite recent, don't need to refresh it */
RETURN_EXIT;
- down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ down_write(<d->ltd_qos.lq_rw_sem);
if (obd->obd_osfs_age > max_age)
goto out;
- for (i = 0; i < osts->op_count; i++) {
- idx = osts->op_array[i];
- avail = OST_TGT(lod,idx)->ltd_statfs.os_bavail;
- if (lod_statfs_and_check(env, lod, &lod->lod_ost_descs, idx,
- &OST_TGT(lod, idx)->ltd_statfs))
+ ltd_foreach_tgt(ltd, tgt) {
+ avail = tgt->ltd_statfs.os_bavail;
+ if (lod_statfs_and_check(env, lod, ltd, tgt))
continue;
- if (OST_TGT(lod,idx)->ltd_statfs.os_bavail != avail)
+
+ if (tgt->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
- lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_dirty = 1;
}
obd->obd_osfs_age = ktime_get_seconds();
out:
- up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ up_write(<d->ltd_qos.lq_rw_sem);
EXIT;
}
* a new target or activation/deactivation).
*
* \param[in] lod LOD device
- * \param[in] src_pool OST pool
+ * \param[in] ltd tgt table
+ * \param[in] src_pool tgt pool
* \param[in] lqr round-robin list
*
* \retval 0 on success
* \retval -ENOMEM fails to allocate the array
*/
-static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_pool *src_pool,
+static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd,
+ const struct lu_tgt_pool *src_pool,
struct lu_qos_rr *lqr)
{
- struct lu_svr_qos *oss;
- struct lod_tgt_desc *ost;
+ struct lu_svr_qos *svr;
+ struct lu_tgt_desc *tgt;
unsigned placed, real_count;
unsigned int i;
int rc;
}
/* Do actual allocation. */
- down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ down_write(<d->ltd_qos.lq_rw_sem);
/*
* Check again. While we were sleeping on @lq_rw_sem something could
*/
if (!lqr->lqr_dirty) {
LASSERT(lqr->lqr_pool.op_size);
- up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ up_write(<d->ltd_qos.lq_rw_sem);
RETURN(0);
}
deleting from the pool. The lq_rw_sem insures that nobody else
is reading. */
lqr->lqr_pool.op_count = real_count;
- rc = lod_ost_pool_extend(&lqr->lqr_pool, real_count);
+ rc = lod_tgt_pool_extend(&lqr->lqr_pool, real_count);
if (rc) {
- up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ up_write(<d->ltd_qos.lq_rw_sem);
RETURN(rc);
}
for (i = 0; i < lqr->lqr_pool.op_count; i++)
lqr->lqr_pool.op_array[i] = LOV_QOS_EMPTY;
- /* Place all the OSTs from 1 OSS at the same time. */
+ /* Place all the tgts from 1 svr at the same time. */
placed = 0;
- list_for_each_entry(oss, &lod->lod_ost_descs.ltd_qos.lq_svr_list,
- lsq_svr_list) {
+ list_for_each_entry(svr, <d->ltd_qos.lq_svr_list, lsq_svr_list) {
int j = 0;
for (i = 0; i < lqr->lqr_pool.op_count; i++) {
int next;
- if (!cfs_bitmap_check(lod->lod_ost_bitmap,
- src_pool->op_array[i]))
+ if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap,
+ src_pool->op_array[i]))
continue;
- ost = OST_TGT(lod,src_pool->op_array[i]);
- LASSERT(ost && ost->ltd_tgt);
- if (ost->ltd_qos.ltq_svr != oss)
+ tgt = LTD_TGT(ltd, src_pool->op_array[i]);
+ LASSERT(tgt && tgt->ltd_tgt);
+ if (tgt->ltd_qos.ltq_svr != svr)
continue;
- /* Evenly space these OSTs across arrayspace */
- next = j * lqr->lqr_pool.op_count / oss->lsq_tgt_count;
+ /* Evenly space these tgts across arrayspace */
+ next = j * lqr->lqr_pool.op_count / svr->lsq_tgt_count;
while (lqr->lqr_pool.op_array[next] != LOV_QOS_EMPTY)
next = (next + 1) % lqr->lqr_pool.op_count;
}
lqr->lqr_dirty = 0;
- up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+ up_write(<d->ltd_qos.lq_rw_sem);
if (placed != real_count) {
/* This should never happen */
- LCONSOLE_ERROR_MSG(0x14e, "Failed to place all OSTs in the "
+ LCONSOLE_ERROR_MSG(0x14e, "Failed to place all tgts in the "
"round-robin list (%d of %d).\n",
placed, real_count);
for (i = 0; i < lqr->lqr_pool.op_count; i++) {
- LCONSOLE(D_WARNING, "rr #%d ost idx=%d\n", i,
+ LCONSOLE(D_WARNING, "rr #%d tgt idx=%d\n", i,
lqr->lqr_pool.op_array[i]);
}
lqr->lqr_dirty = 1;
#define LOV_CREATE_RESEED_MIN 2000
/**
- * Initialize temporary OST-in-use array.
+ * Initialize temporary tgt-in-use array.
*
* Allocate or extend the array used to mark targets already assigned to a new
* striping so they are not used more than once.
* \retval 0 on success
* \retval -ENOMEM on error
*/
-static inline int lod_qos_ost_in_use_clear(const struct lu_env *env,
+static inline int lod_qos_tgt_in_use_clear(const struct lu_env *env,
__u32 stripes)
{
struct lod_thread_info *info = lod_env_info(env);
* Remember a target in the array of used targets.
*
* Mark the given target as used for a new striping being created. The status
- * of an OST in a striping can be checked with lod_qos_is_ost_used().
+ * of an tgt in a striping can be checked with lod_qos_is_tgt_used().
*
* \param[in] env execution environment for this thread
* \param[in] idx index in the array
- * \param[in] ost OST target index to mark as used
+ * \param[in] tgt_idx target index to mark as used
*/
-static inline void lod_qos_ost_in_use(const struct lu_env *env,
- int idx, int ost)
+static inline void lod_qos_tgt_in_use(const struct lu_env *env,
+ int idx, int tgt_idx)
{
struct lod_thread_info *info = lod_env_info(env);
- int *osts = info->lti_ea_store;
+ int *tgts = info->lti_ea_store;
LASSERT(info->lti_ea_store_size >= idx * sizeof(int));
- osts[idx] = ost;
+ tgts[idx] = tgt_idx;
}
/**
- * Check is OST used in a striping.
+ * Check is tgt used in a striping.
*
- * Checks whether OST with the given index is marked as used in the temporary
- * array (see lod_qos_ost_in_use()).
+ * Checks whether tgt with the given index is marked as used in the temporary
+ * array (see lod_qos_tgt_in_use()).
*
* \param[in] env execution environment for this thread
- * \param[in] ost OST target index to check
+ * \param[in] tgt_idx target index to check
* \param[in] stripes the number of items used in the array already
*
* \retval 0 not used
* \retval 1 used
*/
-static int lod_qos_is_ost_used(const struct lu_env *env, int ost, __u32 stripes)
+static int lod_qos_is_tgt_used(const struct lu_env *env, int tgt_idx,
+ __u32 stripes)
{
struct lod_thread_info *info = lod_env_info(env);
- int *osts = info->lti_ea_store;
+ int *tgts = info->lti_ea_store;
__u32 j;
for (j = 0; j < stripes; j++) {
- if (osts[j] == ost)
+ if (tgts[j] == tgt_idx)
return 1;
}
return 0;
static int lod_check_and_reserve_ost(const struct lu_env *env,
struct lod_object *lo,
struct lod_layout_component *lod_comp,
- struct obd_statfs *sfs, __u32 ost_idx,
- __u32 speed, __u32 *s_idx,
+ __u32 ost_idx, __u32 speed, __u32 *s_idx,
struct dt_object **stripe,
__u32 *ost_indices,
struct thandle *th,
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
+ struct lu_tgt_desc *ost = OST_TGT(lod, ost_idx);
struct dt_object *o;
__u32 stripe_idx = *s_idx;
int rc;
+
ENTRY;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost_idx, sfs);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
if (rc)
RETURN(rc);
* We expect number of precreated objects in f_ffree at
* the first iteration, skip OSPs with no objects ready
*/
- if (sfs->os_fprecreated == 0 && speed == 0) {
+ if (ost->ltd_statfs.os_fprecreated == 0 && speed == 0) {
QOS_DEBUG("#%d: precreation is empty\n", ost_idx);
RETURN(rc);
}
/*
* try to use another OSP if this one is degraded
*/
- if (sfs->os_state & OS_STATE_DEGRADED && speed < 2) {
+ if (ost->ltd_statfs.os_state & OS_STATE_DEGRADED && speed < 2) {
QOS_DEBUG("#%d: degraded\n", ost_idx);
RETURN(rc);
}
* for the first and second time.
*/
if (speed < 2 && lod_should_avoid_ost(lo, lag, ost_idx)) {
- QOS_DEBUG("iter %d: OST%d used by conflicting mirror "
- "component\n", speed, ost_idx);
+ QOS_DEBUG("iter %d: OST%d used by conflicting mirror component\n",
+ speed, ost_idx);
RETURN(rc);
}
/* do not put >1 objects on a single OST, except for overstriping */
- if (lod_qos_is_ost_used(env, ost_idx, stripe_idx)) {
+ if (lod_qos_is_tgt_used(env, ost_idx, stripe_idx)) {
if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
*overstriped = true;
else
* We've successfully declared (reserved) an object
*/
lod_avoid_update(lo, lag);
- lod_qos_ost_in_use(env, stripe_idx, ost_idx);
+ lod_qos_tgt_in_use(env, stripe_idx, ost_idx);
stripe[stripe_idx] = o;
ost_indices[stripe_idx] = ost_idx;
OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LOV_CREATE_RACE, 2);
* \retval -ENOSPC if not enough OSTs are found
* \retval negative negated errno for other failures
*/
-static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
- struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripe, __u32 *ost_indices,
+ int flags, struct thandle *th, int comp_idx)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct pool_desc *pool = NULL;
struct lu_tgt_pool *osts;
struct lu_qos_rr *lqr;
lqr = &(m->lod_ost_descs.ltd_qos.lq_rr);
}
- rc = lod_qos_calc_rr(m, osts, lqr);
+ rc = lod_qos_calc_rr(m, &m->lod_ost_descs, osts, lqr);
if (rc)
GOTO(out, rc);
- rc = lod_qos_ost_in_use_clear(env, stripe_count);
+ rc = lod_qos_tgt_in_use_clear(env, stripe_count);
if (rc)
GOTO(out, rc);
continue;
spin_unlock(&lqr->lqr_alloc);
- rc = lod_check_and_reserve_ost(env, lo, lod_comp, sfs, ost_idx,
+ rc = lod_check_and_reserve_ost(env, lo, lod_comp, ost_idx,
speed, &stripe_idx, stripe,
ost_indices, th, &overstriped);
spin_lock(&lqr->lqr_alloc);
}
/**
+ * Allocate a striping using round-robin algorithm.
+ *
+ * Allocates a new striping using round-robin algorithm. The function refreshes
+ * all the internal structures (statfs cache, array of available remote MDTs
+ * sorted with regard to MDS, etc). The number of stripes required is taken from
+ * the object (must be prepared by the caller). The caller should ensure nobody
+ * else is trying to create a striping on the object in parallel. All the
+ * internal structures (like pools, etc) are protected and no additional locking
+ * is required. The function succeeds even if a single stripe is allocated.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripe striping created
+ *
+ * \retval positive stripe objects allocated, including the first stripe
+ * allocated outside
+ * \retval -ENOSPC if not enough MDTs are found
+ * \retval negative negated errno for other failures
+ */
+int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripe)
+{
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lu_tgt_pool *pool;
+ struct lu_qos_rr *lqr;
+ struct lu_tgt_desc *mdt;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct lu_fid fid = { 0 };
+ struct dt_object *dto;
+ unsigned int pool_idx;
+ unsigned int i;
+ u32 start_idx_temp;
+ u32 stripe_count = lo->ldo_dir_stripe_count;
+ u32 stripe_idx = 1;
+ u32 mdt_idx;
+ bool use_degraded = false;
+ int tgt_connecting = 0;
+ int rc;
+
+ ENTRY;
+
+ pool = <d->ltd_tgt_pool;
+ lqr = <d->ltd_qos.lq_rr;
+ rc = lod_qos_calc_rr(lod, ltd, pool, lqr);
+ if (rc)
+ RETURN(rc);
+
+ rc = lod_qos_tgt_in_use_clear(env, stripe_count);
+ if (rc)
+ RETURN(rc);
+
+ down_read(<d->ltd_qos.lq_rw_sem);
+ spin_lock(&lqr->lqr_alloc);
+ if (--lqr->lqr_start_count <= 0) {
+ lqr->lqr_start_idx = prandom_u32_max(pool->op_count);
+ lqr->lqr_start_count =
+ (LOV_CREATE_RESEED_MIN / max(pool->op_count, 1U) +
+ LOV_CREATE_RESEED_MULT) * max(pool->op_count, 1U);
+ } else if (stripe_count - 1 >= pool->op_count ||
+ lqr->lqr_start_idx > pool->op_count) {
+ /* If we have allocated from all of the tgts, slowly
+ * precess the next start if the tgt/stripe count isn't
+ * already doing this for us. */
+ lqr->lqr_start_idx %= pool->op_count;
+ if (stripe_count - 1 > 1 &&
+ (pool->op_count % (stripe_count - 1)) != 1)
+ ++lqr->lqr_offset_idx;
+ }
+ start_idx_temp = lqr->lqr_start_idx;
+
+repeat_find:
+ QOS_DEBUG("want %d start_idx %d start_count %d offset %d active %d count %d\n",
+ stripe_count - 1, lqr->lqr_start_idx, lqr->lqr_start_count,
+ lqr->lqr_offset_idx, pool->op_count, pool->op_count);
+
+ for (i = 0; i < pool->op_count && stripe_idx < stripe_count; i++) {
+ pool_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) %
+ pool->op_count;
+ ++lqr->lqr_start_idx;
+ mdt_idx = lqr->lqr_pool.op_array[pool_idx];
+ mdt = LTD_TGT(ltd, mdt_idx);
+
+ QOS_DEBUG("#%d strt %d act %d strp %d ary %d idx %d\n",
+ i, lqr->lqr_start_idx, /* XXX: active*/ 0,
+ stripe_idx, pool_idx, mdt_idx);
+
+ if (mdt_idx == LOV_QOS_EMPTY ||
+ !cfs_bitmap_check(ltd->ltd_tgt_bitmap, mdt_idx))
+ continue;
+
+ /* do not put >1 objects on one MDT */
+ if (lod_qos_is_tgt_used(env, mdt_idx, stripe_idx))
+ continue;
+
+ rc = lod_is_tgt_usable(ltd, mdt);
+ if (rc) {
+ if (mdt->ltd_connecting)
+ tgt_connecting = 1;
+ continue;
+ }
+
+ /* try to use another OSP if this one is degraded */
+ if (mdt->ltd_statfs.os_state & OS_STATE_DEGRADED &&
+ !use_degraded) {
+ QOS_DEBUG("#%d: degraded\n", mdt_idx);
+ continue;
+ }
+ spin_unlock(&lqr->lqr_alloc);
+
+ rc = obd_fid_alloc(env, mdt->ltd_exp, &fid, NULL);
+ if (rc) {
+ QOS_DEBUG("#%d: alloc FID failed: %dl\n", mdt_idx, rc);
+ spin_lock(&lqr->lqr_alloc);
+ continue;
+ }
+
+ dto = dt_locate_at(env, mdt->ltd_tgt, &fid,
+ lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
+ &conf);
+
+ spin_lock(&lqr->lqr_alloc);
+ if (IS_ERR(dto)) {
+ QOS_DEBUG("can't alloc stripe on #%u: %d\n",
+ mdt->ltd_index, (int) PTR_ERR(dto));
+
+ if (mdt->ltd_connecting)
+ tgt_connecting = 1;
+ continue;
+ }
+
+ lod_qos_tgt_in_use(env, stripe_idx, mdt_idx);
+ stripe[stripe_idx] = dto;
+ stripe_idx++;
+ }
+
+ if (!use_degraded && stripe_idx < stripe_count) {
+ /* Try again, allowing slower OSCs */
+ use_degraded = true;
+ lqr->lqr_start_idx = start_idx_temp;
+
+ tgt_connecting = 0;
+ goto repeat_find;
+ }
+ spin_unlock(&lqr->lqr_alloc);
+ up_read(<d->ltd_qos.lq_rw_sem);
+
+ if (stripe_idx > 1)
+ /* at least one stripe is allocated */
+ RETURN(stripe_idx);
+
+ /* nobody provided us with a single object */
+ if (tgt_connecting)
+ RETURN(-EINPROGRESS);
+
+ RETURN(-ENOSPC);
+}
+
+/**
* Allocate a specific striping layout on a user defined set of OSTs.
*
* Allocates new striping using the OST index range provided by the data from
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct dt_object *o;
unsigned int array_idx = 0;
int stripe_count = 0;
LASSERT(lod_comp->llc_ostlist.op_array);
LASSERT(lod_comp->llc_ostlist.op_count);
- rc = lod_qos_ost_in_use_clear(env, lod_comp->llc_stripe_count);
+ rc = lod_qos_tgt_in_use_clear(env, lod_comp->llc_stripe_count);
if (rc < 0)
RETURN(rc);
/* do not put >1 objects on a single OST, except for
* overstriping
*/
- if (lod_qos_is_ost_used(env, ost_idx, stripe_count) &&
+ if (lod_qos_is_tgt_used(env, ost_idx, stripe_count) &&
!(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)) {
rc = -EINVAL;
break;
}
- rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, ost_idx,
- sfs);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
+ LTD_TGT(&m->lod_ost_descs, ost_idx));
if (rc < 0) /* this OSP doesn't feel well */
break;
/*
* We've successfully declared (reserved) an object
*/
- lod_qos_ost_in_use(env, stripe_count, ost_idx);
+ lod_qos_tgt_in_use(env, stripe_count, ost_idx);
stripe[stripe_count] = o;
ost_indices[stripe_count] = ost_idx;
stripe_count++;
* \retval -EINVAL requested offset is invalid
* \retval negative errno on failure
*/
-static int lod_alloc_specific(const struct lu_env *env, struct lod_object *lo,
- struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+static int lod_ost_alloc_specific(const struct lu_env *env,
+ struct lod_object *lo,
+ struct dt_object **stripe, __u32 *ost_indices,
+ int flags, struct thandle *th, int comp_idx)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct dt_object *o;
+ struct lu_tgt_desc *tgt;
__u32 ost_idx;
unsigned int i, array_idx, ost_count;
int rc, stripe_num = 0;
LASSERT(lo->ldo_comp_cnt > comp_idx && lo->ldo_comp_entries != NULL);
lod_comp = &lo->ldo_comp_entries[comp_idx];
- rc = lod_qos_ost_in_use_clear(env, lod_comp->llc_stripe_count);
+ rc = lod_qos_tgt_in_use_clear(env, lod_comp->llc_stripe_count);
if (rc)
GOTO(out, rc);
* do not put >1 objects on a single OST, except for
* overstriping, where it is intended
*/
- if (lod_qos_is_ost_used(env, ost_idx, stripe_num)) {
+ if (lod_qos_is_tgt_used(env, ost_idx, stripe_num)) {
if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
overstriped = true;
else
lod_comp_is_ost_used(env, lo, ost_idx))
continue;
+ tgt = LTD_TGT(&m->lod_ost_descs, ost_idx);
+
/* Drop slow OSCs if we can, but not for requested start idx.
*
* This means "if OSC is slow and it is not the requested
* start OST, then it can be skipped, otherwise skip it only
* if it is inactive/recovering/out-of-space." */
- rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, ost_idx,
- sfs);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, tgt);
if (rc) {
/* this OSP doesn't feel well */
continue;
* iteration. Skip OSPs with no objects ready. Don't apply
* this logic to OST specified with stripe_offset.
*/
- if (i != 0 && sfs->os_fprecreated == 0 && speed == 0)
+ if (i && !tgt->ltd_statfs.os_fprecreated && !speed)
continue;
o = lod_qos_declare_object_on(env, m, ost_idx, th);
/*
* We've successfully declared (reserved) an object
*/
- lod_qos_ost_in_use(env, stripe_num, ost_idx);
+ lod_qos_tgt_in_use(env, stripe_num, ost_idx);
stripe[stripe_num] = o;
ost_indices[stripe_num] = ost_idx;
stripe_num++;
* \retval -EINVAL requested OST index is invalid
* \retval negative errno on failure
*/
-static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
- struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripe, __u32 *ost_indices,
+ int flags, struct thandle *th, int comp_idx)
{
struct lod_layout_component *lod_comp;
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
struct lod_tgt_desc *ost;
struct dt_object *o;
if (rc)
GOTO(out, rc);
- rc = lod_qos_ost_in_use_clear(env, lod_comp->llc_stripe_count);
+ rc = lod_qos_tgt_in_use_clear(env, lod_comp->llc_stripe_count);
if (rc)
GOTO(out, rc);
ost = OST_TGT(lod, osts->op_array[i]);
ost->ltd_qos.ltq_usable = 0;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs,
- osts->op_array[i], sfs);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
if (rc) {
/* this OSP doesn't feel well */
continue;
}
- if (sfs->os_state & OS_STATE_DEGRADED)
+ if (ost->ltd_statfs.os_state & OS_STATE_DEGRADED)
continue;
/* Fail Check before osc_precreate() is called
- so we can only 'fail' single OSC. */
+ * so we can only 'fail' single OSC.
+ */
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) &&
osts->op_array[i] == 0)
continue;
rand = lu_prandom_u64_max(total_weight);
/* On average, this will hit larger-weighted OSTs more often.
- * 0-weight OSTs will always get used last (only when rand=0) */
+ * 0-weight OSTs will always get used last (only when rand=0)
+ */
for (i = 0; i < osts->op_count; i++) {
__u32 idx = osts->op_array[i];
!(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING))
continue;
- if (lod_qos_is_ost_used(env, idx, nfound)) {
+ if (lod_qos_is_tgt_used(env, idx, nfound)) {
if (lod_comp->llc_pattern &
LOV_PATTERN_OVERSTRIPING)
overstriped = true;
}
lod_avoid_update(lo, lag);
- lod_qos_ost_in_use(env, nfound, idx);
+ lod_qos_tgt_in_use(env, nfound, idx);
stripe[nfound] = o;
ost_indices[nfound] = idx;
ltd_qos_update(&lod->lod_ost_descs, ost, &total_weight);
}
/**
+ * Allocate a striping using an algorithm with weights.
+ *
+ * The function allocates remote MDT objects to create a striping, the first
+ * object was already allocated on current MDT to ensure master object and
+ * the first object are on the same MDT. The algorithm used is based on weights
+ * (both free space and inodes), and it's trying to ensure the space/inodes are
+ * used evenly by MDTs and MDSs. The striping configuration (# of stripes,
+ * offset, pool) is taken from the object and is prepared by the caller.
+ *
+ * If prepared configuration can't be met due to too few MDTs, then allocation
+ * fails.
+ *
+ * No concurrent allocation is allowed on the object and this must be ensured
+ * by the caller. All the internal structures are protected by the function.
+ *
+ * The algorithm has two steps: find available MDTs and calculate their
+ * weights, then select the MDTs with their weights used as the probability.
+ * An MDT with a higher weight is proportionately more likely to be selected
+ * than one with a lower weight.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripes striping created
+ *
+ * \retval positive stripes allocated, and it should be equal to
+ * lo->ldo_dir_stripe_count
+ * \retval -EAGAIN not enough tgts are found for specified stripe count
+ * \retval -EINVAL requested MDT index is invalid
+ * \retval negative errno on failure
+ */
+int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
+ struct dt_object **stripes)
+{
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lu_object_conf conf = { .loc_flags = LOC_F_NEW };
+ struct lu_fid fid = { 0 };
+ const struct lu_tgt_pool *pool;
+ struct lu_tgt_desc *mdt;
+ struct dt_object *dto;
+ u64 total_weight = 0;
+ u32 stripe_count = lo->ldo_dir_stripe_count;
+ unsigned int nfound;
+ unsigned int good_mdts;
+ unsigned int i;
+ int rc = 0;
+
+ ENTRY;
+
+ if (stripe_count == 1)
+ RETURN(1);
+
+ pool = <d->ltd_tgt_pool;
+
+ /* Detect -EAGAIN early, before expensive lock is taken. */
+ if (!ltd_qos_is_usable(ltd))
+ RETURN(-EAGAIN);
+
+ /* Do actual allocation, use write lock here. */
+ down_write(<d->ltd_qos.lq_rw_sem);
+
+ /*
+ * Check again, while we were sleeping on @lq_rw_sem things could
+ * change.
+ */
+ if (!ltd_qos_is_usable(ltd))
+ GOTO(unlock, rc = -EAGAIN);
+
+ rc = ltd_qos_penalties_calc(ltd);
+ if (rc)
+ GOTO(unlock, rc);
+
+ rc = lod_qos_tgt_in_use_clear(env, stripe_count);
+ if (rc)
+ GOTO(unlock, rc);
+
+ good_mdts = 0;
+ /* Find all the tgts that are valid stripe candidates */
+ for (i = 0; i < pool->op_count; i++) {
+ if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, pool->op_array[i]))
+ continue;
+
+ mdt = LTD_TGT(ltd, pool->op_array[i]);
+ mdt->ltd_qos.ltq_usable = 0;
+
+ rc = lod_is_tgt_usable(ltd, mdt);
+ if (rc)
+ continue;
+
+ if (mdt->ltd_statfs.os_state & OS_STATE_DEGRADED)
+ continue;
+
+ mdt->ltd_qos.ltq_usable = 1;
+ lu_tgt_qos_weight_calc(mdt);
+ total_weight += mdt->ltd_qos.ltq_weight;
+
+ good_mdts++;
+ }
+
+ QOS_DEBUG("found %d good tgts\n", good_mdts);
+
+ if (good_mdts < stripe_count - 1)
+ GOTO(unlock, rc = -EAGAIN);
+
+ /* Find enough tgts with weighted random allocation. */
+ nfound = 1;
+ while (nfound < stripe_count) {
+ u64 rand, cur_weight;
+
+ cur_weight = 0;
+ rc = -ENOSPC;
+
+ rand = lu_prandom_u64_max(total_weight);
+
+ /* On average, this will hit larger-weighted tgts more often.
+ * 0-weight tgts will always get used last (only when rand=0) */
+ for (i = 0; i < pool->op_count; i++) {
+ __u32 idx = pool->op_array[i];
+ int rc2;
+
+ mdt = LTD_TGT(ltd, idx);
+
+ if (!mdt->ltd_qos.ltq_usable)
+ continue;
+
+ cur_weight += mdt->ltd_qos.ltq_weight;
+
+ QOS_DEBUG("idx=%d nfound=%d cur_weight=%llu rand=%llu total_weight=%llu\n",
+ idx, nfound, cur_weight, rand,
+ total_weight);
+
+ if (cur_weight < rand)
+ continue;
+
+ QOS_DEBUG("stripe=%d to idx=%d\n", nfound, idx);
+
+ if (lod_qos_is_tgt_used(env, idx, nfound))
+ continue;
+
+ rc2 = obd_fid_alloc(env, mdt->ltd_exp, &fid, NULL);
+ if (rc2) {
+ QOS_DEBUG("can't alloc FID on #%u: %d\n",
+ idx, rc2);
+ continue;
+ }
+
+ conf.loc_flags = LOC_F_NEW;
+ dto = dt_locate_at(env, mdt->ltd_tgt, &fid,
+ lo->ldo_obj.do_lu.lo_dev->ld_site->ls_top_dev,
+ &conf);
+ if (IS_ERR(dto)) {
+ QOS_DEBUG("can't alloc stripe on #%u: %d\n",
+ idx, (int) PTR_ERR(dto));
+ continue;
+ }
+
+ lod_qos_tgt_in_use(env, nfound, idx);
+ stripes[nfound] = dto;
+ ltd_qos_update(ltd, mdt, &total_weight);
+ nfound++;
+ rc = 0;
+ break;
+ }
+
+ /* no MDT found on this iteration, give up */
+ if (rc)
+ break;
+ }
+
+ if (unlikely(nfound != stripe_count)) {
+ /*
+ * when the decision to use weighted algorithm was made
+ * we had enough appropriate OSPs, but this state can
+ * change anytime (no space on MDT, broken connection, etc)
+ * so it's possible OSP won't be able to provide us with
+ * an object due to just changed state
+ */
+ QOS_DEBUG("%s: wanted %d objects, found only %d\n",
+ lod2obd(lod)->obd_name, stripe_count, nfound);
+ for (i = 1; i < nfound; i++) {
+ LASSERT(stripes[i] != NULL);
+ dt_object_put(env, stripes[i]);
+ stripes[i] = NULL;
+ }
+
+ /* makes sense to rebalance next time */
+ ltd->ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_same_space = 0;
+
+ rc = -EAGAIN;
+ } else {
+ rc = nfound;
+ }
+
+unlock:
+ up_write(<d->ltd_qos.lq_rw_sem);
+
+ RETURN(rc);
+}
+
+/**
* Check stripe count the caller can use.
*
* For new layouts (no initialized components), check the total size of the
* statfs and check OST targets now, since ld_active_tgt_count
* could be changed if some OSTs are [de]activated manually.
*/
- lod_qos_statfs_update(env, d);
+ lod_qos_statfs_update(env, d, &d->lod_ost_descs);
stripe_len = lod_get_stripe_count(d, lo,
lod_comp->llc_stripe_count,
lod_comp->llc_pattern &
comp_idx);
lod_collect_avoidance(lo, lag, comp_idx);
- rc = lod_alloc_qos(env, lo, stripe, ost_indices, flag,
- th, comp_idx);
+ rc = lod_ost_alloc_qos(env, lo, stripe, ost_indices,
+ flag, th, comp_idx);
if (rc == -EAGAIN)
- rc = lod_alloc_rr(env, lo, stripe, ost_indices,
- flag, th, comp_idx);
+ rc = lod_ost_alloc_rr(env, lo, stripe,
+ ost_indices, flag, th,
+ comp_idx);
} else {
- rc = lod_alloc_specific(env, lo, stripe, ost_indices,
- flag, th, comp_idx);
+ rc = lod_ost_alloc_specific(env, lo, stripe,
+ ost_indices, flag, th,
+ comp_idx);
}
put_ldts:
lod_putref(d, &d->lod_ost_descs);
#ifdef CONFIG_PROC_FS
/**
- * Show default stripe size.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
+ * Show DoM default stripe size.
*/
-static int lod_dom_stripesize_seq_show(struct seq_file *m, void *v)
+static ssize_t dom_stripesize_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- seq_printf(m, "%u\n", lod->lod_dom_max_stripesize);
- return 0;
+ return snprintf(buf, PAGE_SIZE, "%u\n", lod->lod_dom_max_stripesize);
}
/**
- * Set default stripe size.
- *
- * \param[in] file proc file
- * \param[in] buffer string containing the maximum number of bytes stored in
- * each object before moving to the next object in the
- * layout (if any)
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
+ * Set DoM default stripe size.
*/
-static ssize_t
-lod_dom_stripesize_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
+static ssize_t dom_stripesize_store(struct kobject *kobj,
+ struct attribute *attr, const char *buffer,
+ size_t count)
{
- struct seq_file *m = file->private_data;
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ char tbuf[22] = "";
s64 val;
int rc;
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
+ if (count > (sizeof(tbuf) - 1))
+ return -EINVAL;
+
+ memcpy(tbuf, buffer, count);
+
+ rc = lu_str_to_s64(tbuf, count, &val, '1');
if (rc)
return rc;
+
if (val < 0)
return -ERANGE;
/* 1GB is the limit */
if (val > (1ULL << 30))
return -ERANGE;
- else if (val > 0) {
+
+ if (val > 0) {
if (val < LOV_MIN_STRIPE_SIZE) {
LCONSOLE_INFO("Increasing provided stripe size to "
"a minimum value %u\n",
return count;
}
-LPROC_SEQ_FOPS(lod_dom_stripesize);
-/**
- * Show default stripe size.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
- */
-static int lod_stripesize_seq_show(struct seq_file *m, void *v)
+LUSTRE_RW_ATTR(dom_stripesize);
+
+static ssize_t stripesize_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- seq_printf(m, "%llu\n",
- lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_size);
- return 0;
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_size);
}
-/**
- * Set default stripe size.
- *
- * \param[in] file proc file
- * \param[in] buffer string containing the maximum number of bytes stored in
- * each object before moving to the next object in the
- * layout (if any)
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
- */
-static ssize_t
-lod_stripesize_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
+static ssize_t stripesize_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
{
- struct seq_file *m = file->private_data;
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ char tbuf[22] = "";
s64 val;
int rc;
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
+ if (count > (sizeof(tbuf) - 1))
+ return -EINVAL;
+
+ memcpy(tbuf, buffer, count);
+
+ rc = lu_str_to_s64(tbuf, count, &val, '1');
if (rc)
return rc;
+
if (val < 0)
return -ERANGE;
return count;
}
-LPROC_SEQ_FOPS(lod_stripesize);
+
+LUSTRE_RW_ATTR(stripesize);
/**
* Show default stripe offset.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
static ssize_t stripeoffset_show(struct kobject *kobj, struct attribute *attr,
char *buf)
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%lld\n",
+ return snprintf(buf, PAGE_SIZE, "%lld\n",
lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_offset);
}
*
* Usually contains -1 allowing Lustre to balance objects among OST
* otherwise may cause severe OST imbalance.
- *
- * \param[in] file proc file
- * \param[in] buffer string describing starting OST index for new files
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
-static ssize_t stripeoffset_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t stripeoffset_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
return count;
}
+
LUSTRE_RW_ATTR(stripeoffset);
/**
* Show default striping pattern (LOV_PATTERN_*).
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static ssize_t stripetype_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __stripetype_show(struct kobject *kobj, struct attribute *attr,
+ char *buf, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ltd->ltd_lov_desc.ld_pattern);
+}
- return sprintf(buf, "%u\n", lod->lod_ost_descs.ltd_lov_desc.ld_pattern);
+static ssize_t mdt_stripetype_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __stripetype_show(kobj, attr, buf, true);
+}
+
+static ssize_t stripetype_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __stripetype_show(kobj, attr, buf, false);
}
/**
* Set default striping pattern (a number, not a human-readable string).
- *
- * \param[in] file proc file
- * \param[in] buffer string containing the default striping pattern for new
- * files. This is an integer LOV_PATTERN_* value
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
-static ssize_t stripetype_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t __stripetype_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
u32 pattern;
int rc;
if (rc)
return rc;
- lod_fix_desc_pattern(&pattern);
- lod->lod_ost_descs.ltd_lov_desc.ld_pattern = pattern;
+ if (is_mdt)
+ lod_fix_lmv_desc_pattern(&pattern);
+ else
+ lod_fix_desc_pattern(&pattern);
+
+ ltd->ltd_lov_desc.ld_pattern = pattern;
return count;
}
+
+static ssize_t mdt_stripetype_store(struct kobject *kobj,
+ struct attribute *attr, const char *buffer,
+ size_t count)
+{
+ return __stripetype_store(kobj, attr, buffer, count, true);
+}
+
+static ssize_t stripetype_store(struct kobject *kobj,
+ struct attribute *attr, const char *buffer,
+ size_t count)
+{
+ return __stripetype_store(kobj, attr, buffer, count, false);
+}
+
+LUSTRE_RW_ATTR(mdt_stripetype);
LUSTRE_RW_ATTR(stripetype);
/**
* Show default number of stripes.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success,
- * \retval negative error code if failed
*/
-static ssize_t stripecount_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __stripecount_show(struct kobject *kobj, struct attribute *attr,
+ char *buf, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- struct lov_desc *desc = &lod->lod_ost_descs.ltd_lov_desc;
+ struct lov_desc *desc = is_mdt ? &lod->lod_mdt_descs.ltd_lov_desc :
+ &lod->lod_ost_descs.ltd_lov_desc;
- return sprintf(buf, "%d\n",
+ return snprintf(buf, PAGE_SIZE, "%d\n",
(s16)(desc->ld_default_stripe_count + 1) - 1);
}
+static ssize_t mdt_stripecount_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return __stripecount_show(kobj, attr, buf, true);
+}
+
+static ssize_t stripecount_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __stripecount_show(kobj, attr, buf, false);
+}
+
/**
* Set default number of stripes.
- *
- * \param[in] file proc file
- * \param[in] buffer string containing the default number of stripes
- * for new files
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code otherwise
*/
-static ssize_t stripecount_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t __stripecount_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count,
+ bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
int stripe_count;
int rc;
return -ERANGE;
lod_fix_desc_stripe_count(&stripe_count);
- lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_count = stripe_count;
+ ltd->ltd_lov_desc.ld_default_stripe_count = stripe_count;
return count;
}
+
+static ssize_t mdt_stripecount_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __stripecount_store(kobj, attr, buffer, count, true);
+}
+
+static ssize_t stripecount_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __stripecount_store(kobj, attr, buffer, count, false);
+}
+
+LUSTRE_RW_ATTR(mdt_stripecount);
LUSTRE_RW_ATTR(stripecount);
/**
* Show number of targets.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __numobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ltd->ltd_lov_desc.ld_tgt_count);
+}
+
+static ssize_t mdt_numobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __numobd_show(kobj, attr, buf, true);
+}
- return sprintf(buf, "%u\n", lod->lod_ost_count);
+static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __numobd_show(kobj, attr, buf, false);
}
+
+LUSTRE_RO_ATTR(mdt_numobd);
LUSTRE_RO_ATTR(numobd);
/**
* Show number of active targets.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __activeobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ ltd->ltd_lov_desc.ld_active_tgt_count);
+}
- return sprintf(buf, "%u\n",
- lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count);
+static ssize_t mdt_activeobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __activeobd_show(kobj, attr, buf, true);
}
+
+static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __activeobd_show(kobj, attr, buf, false);
+}
+
+LUSTRE_RO_ATTR(mdt_activeobd);
LUSTRE_RO_ATTR(activeobd);
/**
* Show UUID of LOD device.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
static ssize_t desc_uuid_show(struct kobject *kobj, struct attribute *attr,
char *buf)
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%s\n",
+ return snprintf(buf, PAGE_SIZE, "%s\n",
lod->lod_ost_descs.ltd_lov_desc.ld_uuid.uuid);
}
LUSTRE_RO_ATTR(desc_uuid);
* of free space compared to performance. 0% means select OSTs equally
* regardless of their free space, 100% means select OSTs only by their free
* space even if it results in very imbalanced load on the OSTs.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static ssize_t qos_prio_free_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __qos_prio_free_show(struct kobject *kobj,
+ struct attribute *attr, char *buf,
+ bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
+
+ return snprintf(buf, PAGE_SIZE, "%d%%\n",
+ (ltd->ltd_qos.lq_prio_free * 100 + 255) >> 8);
+}
- return sprintf(buf, "%d%%\n",
- (lod->lod_ost_descs.ltd_qos.lq_prio_free * 100 + 255) >>
- 8);
+static ssize_t mdt_qos_prio_free_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return __qos_prio_free_show(kobj, attr, buf, true);
+}
+
+static ssize_t qos_prio_free_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return __qos_prio_free_show(kobj, attr, buf, false);
}
/**
* are space imbalanced. See lod_qos_priofree_seq_show() for description of
* this parameter. See lod_qos_thresholdrr_seq_write() and lq_threshold_rr to
* determine what constitutes "space imbalanced" OSTs.
- *
- * \param[in] file proc file
- * \param[in] buffer string which contains the free space priority (0-100)
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
-static ssize_t qos_prio_free_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t __qos_prio_free_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count,
+ bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
unsigned int val;
int rc;
if (val > 100)
return -EINVAL;
- lod->lod_ost_descs.ltd_qos.lq_prio_free = (val << 8) / 100;
- lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
- lod->lod_ost_descs.ltd_qos.lq_reset = 1;
+ ltd->ltd_qos.lq_prio_free = (val << 8) / 100;
+ ltd->ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_reset = 1;
return count;
}
+
+static ssize_t mdt_qos_prio_free_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_prio_free_store(kobj, attr, buffer, count, true);
+}
+
+static ssize_t qos_prio_free_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_prio_free_store(kobj, attr, buffer, count, false);
+}
+
+LUSTRE_RW_ATTR(mdt_qos_prio_free);
LUSTRE_RW_ATTR(qos_prio_free);
/**
* Show threshold for "same space on all OSTs" rule.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static int lod_qos_thresholdrr_seq_show(struct seq_file *m, void *v)
+static ssize_t __qos_thresholdrr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf,
+ bool is_mdt)
{
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- seq_printf(m, "%d%%\n",
- (lod->lod_ost_descs.ltd_qos.lq_threshold_rr * 100 + 255) >>
- 8);
- return 0;
+ return snprintf(buf, PAGE_SIZE, "%d%%\n",
+ (ltd->ltd_qos.lq_threshold_rr * 100 + 255) >> 8);
+}
+
+static ssize_t mdt_qos_thresholdrr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return __qos_thresholdrr_show(kobj, attr, buf, true);
+}
+
+static ssize_t lod_qos_thresholdrr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return __qos_thresholdrr_show(kobj, attr, buf, false);
}
/**
* is exceeded, use the QoS allocator to select OSTs based on their available
* space so that more full OSTs are chosen less often, otherwise use the
* round-robin allocator for efficiency and performance.
-
- * \param[in] file proc file
- * \param[in] buffer string containing percentage difference of free space
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
-static ssize_t
-lod_qos_thresholdrr_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
+static ssize_t __qos_thresholdrr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count,
+ bool is_mdt)
{
- struct seq_file *m = file->private_data;
- struct obd_device *dev = m->private;
- struct lod_device *lod;
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
+ unsigned int val;
int rc;
- __s64 val;
- LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
-
- rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '%');
+ rc = kstrtouint(buffer, 0, &val);
if (rc)
return rc;
- if (val > 100 || val < 0)
+ if (val > 100)
return -EINVAL;
-
- lod->lod_ost_descs.ltd_qos.lq_threshold_rr = (val << 8) / 100;
- lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_threshold_rr = (val << 8) / 100;
+ ltd->ltd_qos.lq_dirty = 1;
return count;
}
-LPROC_SEQ_FOPS(lod_qos_thresholdrr);
+
+static ssize_t mdt_qos_thresholdrr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_thresholdrr_store(kobj, attr, buffer, count, true);
+}
+
+static ssize_t lod_qos_thresholdrr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_thresholdrr_store(kobj, attr, buffer, count, false);
+}
+
+LUSTRE_RW_ATTR(mdt_qos_thresholdrr);
+LUSTRE_RW_ATTR(lod_qos_thresholdrr);
/**
* Show expiration period used to refresh cached statfs data, which
* is used to implement QoS/RR striping allocation algorithm.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
-static ssize_t qos_maxage_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t __qos_maxage_show(struct kobject *kobj, struct attribute *attr,
+ char *buf, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
- return sprintf(buf, "%u Sec\n",
- lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage);
+ return snprintf(buf, PAGE_SIZE, "%u Sec\n",
+ ltd->ltd_lov_desc.ld_qos_maxage);
+}
+
+static ssize_t mdt_qos_maxage_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __qos_maxage_show(kobj, attr, buf, true);
+}
+
+static ssize_t qos_maxage_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return __qos_maxage_show(kobj, attr, buf, true);
}
/**
* Set expiration period used to refresh cached statfs data.
- *
- * \param[in] file proc file
- * \param[in] buffer string contains maximum age of statfs data in seconds
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
-static ssize_t qos_maxage_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t __qos_maxage_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count, bool is_mdt)
{
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
struct lustre_cfg_bufs bufs;
struct lu_device *next;
struct lustre_cfg *lcfg;
if (val <= 0)
return -EINVAL;
- lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage = val;
+
+ ltd->ltd_lov_desc.ld_qos_maxage = val;
/*
* propogate the value down to OSPs
return -ENOMEM;
lustre_cfg_init(lcfg, LCFG_PARAM, &bufs);
- lod_getref(&lod->lod_ost_descs);
- lod_foreach_ost(lod, tgt) {
+ lod_getref(ltd);
+ ltd_foreach_tgt(ltd, tgt) {
next = &tgt->ltd_tgt->dd_lu_dev;
rc = next->ld_ops->ldo_process_config(NULL, next, lcfg);
if (rc)
CERROR("can't set maxage on #%d: %d\n",
tgt->ltd_index, rc);
}
- lod_putref(lod, &lod->lod_ost_descs);
+ lod_putref(lod, ltd);
OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
return count;
}
+
+static ssize_t mdt_qos_maxage_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_maxage_store(kobj, attr, buffer, count, true);
+}
+
+static ssize_t qos_maxage_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ return __qos_maxage_store(kobj, attr, buffer, count, false);
+}
+
+LUSTRE_RW_ATTR(mdt_qos_maxage);
LUSTRE_RW_ATTR(qos_maxage);
-static void *lod_osts_seq_start(struct seq_file *p, loff_t *pos)
+static void *lod_tgts_seq_start(struct seq_file *p, loff_t *pos, bool is_mdt)
{
struct obd_device *dev = p->private;
- struct lod_device *lod;
+ struct lod_device *lod = lu2lod_dev(dev->obd_lu_dev);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- lod_getref(&lod->lod_ost_descs); /* released in lod_osts_seq_stop */
- if (*pos >= lod->lod_ost_bitmap->size)
+ lod_getref(ltd); /* released in lod_tgts_seq_stop */
+ if (*pos >= ltd->ltd_tgt_bitmap->size)
return NULL;
- *pos = find_next_bit(lod->lod_ost_bitmap->data,
- lod->lod_ost_bitmap->size, *pos);
- if (*pos < lod->lod_ost_bitmap->size)
- return OST_TGT(lod,*pos);
+ *pos = find_next_bit(ltd->ltd_tgt_bitmap->data,
+ ltd->ltd_tgt_bitmap->size, *pos);
+ if (*pos < ltd->ltd_tgt_bitmap->size)
+ return LTD_TGT(ltd, *pos);
else
return NULL;
}
-static void lod_osts_seq_stop(struct seq_file *p, void *v)
+static void *lod_mdts_seq_start(struct seq_file *p, loff_t *pos)
+{
+ return lod_tgts_seq_start(p, pos, true);
+}
+
+static void *lod_osts_seq_start(struct seq_file *p, loff_t *pos)
+{
+ return lod_tgts_seq_start(p, pos, false);
+}
+
+static void lod_tgts_seq_stop(struct seq_file *p, void *v, bool is_mdt)
{
struct obd_device *dev = p->private;
- struct lod_device *lod;
+ struct lod_device *lod = lu2lod_dev(dev->obd_lu_dev);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
LASSERT(dev != NULL);
- lod = lu2lod_dev(dev->obd_lu_dev);
- lod_putref(lod, &lod->lod_ost_descs);
+ lod_putref(lod, ltd);
}
-static void *lod_osts_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void lod_mdts_seq_stop(struct seq_file *p, void *v)
+{
+ lod_tgts_seq_stop(p, v, true);
+}
+
+static void lod_osts_seq_stop(struct seq_file *p, void *v)
+{
+ lod_tgts_seq_stop(p, v, false);
+}
+
+static void *lod_tgts_seq_next(struct seq_file *p, void *v, loff_t *pos,
+ bool is_mdt)
{
struct obd_device *dev = p->private;
struct lod_device *lod = lu2lod_dev(dev->obd_lu_dev);
+ struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
+ &lod->lod_ost_descs;
- if (*pos >= lod->lod_ost_bitmap->size - 1)
+ if (*pos >= ltd->ltd_tgt_bitmap->size - 1)
return NULL;
- *pos = find_next_bit(lod->lod_ost_bitmap->data,
- lod->lod_ost_bitmap->size, *pos + 1);
- if (*pos < lod->lod_ost_bitmap->size)
- return OST_TGT(lod,*pos);
+ *pos = find_next_bit(ltd->ltd_tgt_bitmap->data,
+ ltd->ltd_tgt_bitmap->size, *pos + 1);
+ if (*pos < ltd->ltd_tgt_bitmap->size)
+ return LTD_TGT(ltd, *pos);
else
return NULL;
}
+static void *lod_mdts_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ return lod_tgts_seq_next(p, v, pos, true);
+}
+
+static void *lod_osts_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ return lod_tgts_seq_next(p, v, pos, false);
+}
+
/**
* Show active/inactive status for OST found by lod_osts_seq_next().
*
* \retval 0 on success
* \retval negative error code if failed
*/
-static int lod_osts_seq_show(struct seq_file *p, void *v)
+static int lod_tgts_seq_show(struct seq_file *p, void *v)
{
- struct obd_device *obd = p->private;
- struct lu_tgt_desc *ost_desc = v;
- struct lod_device *lod;
- int idx, rc, active;
- struct dt_device *next;
- struct obd_statfs sfs;
+ struct obd_device *obd = p->private;
+ struct lu_tgt_desc *tgt = v;
+ struct dt_device *next;
+ int rc, active;
LASSERT(obd->obd_lu_dev);
- lod = lu2lod_dev(obd->obd_lu_dev);
- idx = ost_desc->ltd_index;
- next = OST_TGT(lod, idx)->ltd_tgt;
- if (next == NULL)
+ next = tgt->ltd_tgt;
+ if (!next)
return -EINVAL;
/* XXX: should be non-NULL env, but it's very expensive */
active = 1;
- rc = dt_statfs(NULL, next, &sfs);
+ rc = dt_statfs(NULL, next, &tgt->ltd_statfs);
if (rc == -ENOTCONN) {
active = 0;
rc = 0;
} else if (rc)
return rc;
- seq_printf(p, "%d: %s %sACTIVE\n", idx,
- obd_uuid2str(&ost_desc->ltd_uuid),
+ seq_printf(p, "%d: %s %sACTIVE\n", tgt->ltd_index,
+ obd_uuid2str(&tgt->ltd_uuid),
active ? "" : "IN");
return 0;
}
+static const struct seq_operations lod_mdts_sops = {
+ .start = lod_mdts_seq_start,
+ .stop = lod_mdts_seq_stop,
+ .next = lod_mdts_seq_next,
+ .show = lod_tgts_seq_show,
+};
+
static const struct seq_operations lod_osts_sops = {
.start = lod_osts_seq_start,
.stop = lod_osts_seq_stop,
.next = lod_osts_seq_next,
- .show = lod_osts_seq_show,
+ .show = lod_tgts_seq_show,
};
+static int lod_mdts_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &lod_mdts_sops);
+ if (rc)
+ return rc;
+
+ seq = file->private_data;
+ seq->private = PDE_DATA(inode);
+ return 0;
+}
+
static int lod_osts_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
/**
* Show whether special failout mode for testing is enabled or not.
- *
- * \param[in] m seq file
- * \param[in] v unused for single entry
- *
- * \retval 0 on success
- * \retval negative error code if failed
*/
static ssize_t lmv_failout_show(struct kobject *kobj, struct attribute *attr,
char *buf)
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%d\n", lod->lod_lmv_failout ? 1 : 0);
+ return snprintf(buf, PAGE_SIZE, "%d\n", lod->lod_lmv_failout ? 1 : 0);
}
/**
* This determines whether the LMV will try to continue processing a striped
* directory even if it has a (partly) corrupted entry in the master directory,
* or if it will abort upon finding a corrupted slave directory entry.
- *
- * \param[in] file proc file
- * \param[in] buffer string: 0 or non-zero to disable or enable LMV failout
- * \param[in] count @buffer length
- * \param[in] off unused for single entry
- *
- * \retval @count on success
- * \retval negative error code if failed
*/
static ssize_t lmv_failout_store(struct kobject *kobj, struct attribute *attr,
const char *buffer, size_t count)
LUSTRE_RW_ATTR(lmv_failout);
static struct lprocfs_vars lprocfs_lod_obd_vars[] = {
- { .name = "stripesize",
- .fops = &lod_stripesize_fops },
- { .name = "qos_threshold_rr",
- .fops = &lod_qos_thresholdrr_fops },
- { .name = "dom_stripesize",
- .fops = &lod_dom_stripesize_fops },
{ NULL }
};
+static const struct file_operations lod_proc_mdt_fops = {
+ .owner = THIS_MODULE,
+ .open = lod_mdts_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+};
+
static const struct file_operations lod_proc_target_fops = {
.owner = THIS_MODULE,
.open = lod_osts_seq_open,
};
static struct attribute *lod_attrs[] = {
+ &lustre_attr_dom_stripesize.attr,
+ &lustre_attr_stripesize.attr,
&lustre_attr_stripeoffset.attr,
&lustre_attr_stripecount.attr,
&lustre_attr_stripetype.attr,
&lustre_attr_numobd.attr,
&lustre_attr_qos_maxage.attr,
&lustre_attr_qos_prio_free.attr,
+ &lustre_attr_lod_qos_thresholdrr.attr,
+ &lustre_attr_mdt_stripecount.attr,
+ &lustre_attr_mdt_stripetype.attr,
+ &lustre_attr_mdt_activeobd.attr,
+ &lustre_attr_mdt_numobd.attr,
+ &lustre_attr_mdt_qos_maxage.attr,
+ &lustre_attr_mdt_qos_prio_free.attr,
+ &lustre_attr_mdt_qos_thresholdrr.attr,
NULL,
};
GOTO(out, rc);
}
+ rc = lprocfs_seq_create(obd->obd_proc_entry, "mdt_obd",
+ 0444, &lod_proc_mdt_fops, obd);
+ if (rc) {
+ CWARN("%s: Error adding the target_obd file %d\n",
+ obd->obd_name, rc);
+ GOTO(out, rc);
+ }
+
rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd",
0444, &lod_proc_target_fops, obd);
if (rc) {
}
#endif /* CONFIG_PROC_FS */
-
buf->lb_buf = lmu;
buf->lb_len = ma->ma_lmv_size;
-
- if (le32_to_cpu(lmu->lum_hash_type) &
- LMV_HASH_FLAG_SPACE) {
- /*
- * only allow setting "space" hash flag on
- * plain directory.
- */
- rc = mdt_object_striped(info, mo);
- if (rc)
- GOTO(out_put,
- rc = (rc == 1) ? -EPERM : rc);
- }
-
name = XATTR_NAME_DEFAULT_LMV;
/* force client to update dir default layout */
lockpart |= MDS_INODELOCK_LOOKUP;
* have a unit as the last character. The function handles overflow/underflow
* of the signed integer.
*/
-static int str_to_s64_internal(const char __user *buffer, unsigned long count,
- __s64 *val, __u64 def_mult, bool allow_units)
+int lu_str_to_s64(char *buffer, unsigned long count, __s64 *val, char defunit)
{
- char kernbuf[22];
+ __u64 mult = 1;
__u64 tmp;
unsigned int offset = 0;
int signed sign = 1;
__u64 max = LLONG_MAX;
int rc = 0;
- if (count > (sizeof(kernbuf) - 1))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
-
- kernbuf[count] = '\0';
+ if (defunit != '1') {
+ rc = get_mult(defunit, &mult);
+ if (rc)
+ return rc;
+ }
/* keep track of our sign */
- if (*kernbuf == '-') {
+ if (*buffer == '-') {
sign = -1;
offset++;
/* equivalent to max = -LLONG_MIN, avoids overflow */
max++;
}
- rc = str_to_u64_parse(kernbuf + offset, count - offset,
- &tmp, def_mult, allow_units);
+ rc = str_to_u64_parse(buffer + offset, count - offset,
+ &tmp, mult, true);
if (rc)
return rc;
return 0;
}
+EXPORT_SYMBOL(lu_str_to_s64);
/* identical to s64 version, but does not handle overflow */
static int str_to_u64_internal(const char __user *buffer, unsigned long count,
int lprocfs_str_with_units_to_s64(const char __user *buffer,
unsigned long count, __s64 *val, char defunit)
{
- __u64 mult = 1;
- int rc;
+ char kernbuf[22];
- if (defunit != '1') {
- rc = get_mult(defunit, &mult);
- if (rc)
- return rc;
- }
+ if (count > (sizeof(kernbuf) - 1))
+ return -EINVAL;
+
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+
+ kernbuf[count] = '\0';
- return str_to_s64_internal(buffer, count, val, mult, true);
+ return lu_str_to_s64(kernbuf, count, val, defunit);
}
EXPORT_SYMBOL(lprocfs_str_with_units_to_s64);
ENTRY;
- /* tgt not connected, this function will be called again later */
- if (!exp)
- RETURN(0);
-
down_write(&qos->lq_rw_sem);
/*
* a bit hacky approach to learn NID of corresponding connection
* per-tgt penalty is
* prio * bavail * iavail / (num_tgt - 1) / 2
*/
- tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia;
+ tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 8;
do_div(tgt->ltd_qos.ltq_penalty_per_obj, num_active);
tgt->ltd_qos.ltq_penalty_per_obj >>= 1;
list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
ba = svr->lsq_bavail;
ia = svr->lsq_iavail;
- svr->lsq_penalty_per_obj = prio_wide * ba * ia;
- do_div(ba, svr->lsq_tgt_count * num_active);
+ svr->lsq_penalty_per_obj = prio_wide * ba * ia >> 8;
+ do_div(svr->lsq_penalty_per_obj,
+ svr->lsq_tgt_count * num_active);
svr->lsq_penalty_per_obj >>= 1;
age = (now - svr->lsq_used) >> 3;
if (!tgt->ltd_active)
continue;
+ ltq = &tgt->ltd_qos;
if (ltq->ltq_penalty < ltq->ltq_penalty_per_obj)
ltq->ltq_penalty = 0;
else
if (ltq->ltq_usable)
*total_wt += ltq->ltq_weight;
- CDEBUG(D_OTHER, "recalc tgt %d usable=%d avail=%llu tgtppo=%llu tgtp=%llu svrppo=%llu svrp=%llu wt=%llu\n",
+ CDEBUG(D_OTHER, "recalc tgt %d usable=%d bavail=%llu ffree=%llu tgtppo=%llu tgtp=%llu svrppo=%llu svrp=%llu wt=%llu\n",
tgt->ltd_index, ltq->ltq_usable,
- tgt_statfs_bavail(tgt) >> 10,
+ tgt_statfs_bavail(tgt) >> 16,
+ tgt_statfs_iavail(tgt) >> 8,
ltq->ltq_penalty_per_obj >> 10,
ltq->ltq_penalty >> 10,
ltq->ltq_svr->lsq_penalty_per_obj >> 10,
CLASSERT(LMV_MAGIC_V1 == 0x0CD20CD0);
CLASSERT(LMV_MAGIC_STRIPE == 0x0CD40CD0);
CLASSERT(LMV_HASH_TYPE_MASK == 0x0000ffff);
- CLASSERT(LMV_HASH_FLAG_SPACE == 0x08000000);
CLASSERT(LMV_HASH_FLAG_LOST_LMV == 0x10000000);
CLASSERT(LMV_HASH_FLAG_BAD_TYPE == 0x20000000);
CLASSERT(LMV_HASH_FLAG_MIGRATION == 0x80000000);
}
run_test 412 "mkdir on specific MDTs"
-test_413a() {
- [ $MDSCOUNT -lt 2 ] &&
- skip "We need at least 2 MDTs for this test"
-
- if [ $(lustre_version_code mds1) -lt $(version_code 2.10.55) ]; then
- skip "Need server version at least 2.10.55"
- fi
-
- mkdir $DIR/$tdir || error "mkdir failed"
-
- # find MDT that is the most full
- local max=$($LFS df | grep MDT |
- awk 'BEGIN { a=0 }
- { sub("%", "", $5)
- if (0+$5 >= a)
- {
- a = $5
- b = $6
- }
- }
- END { split(b, c, ":")
- sub("]", "", c[2])
- print c[2]
- }')
-
- for i in $(seq $((MDSCOUNT - 1))); do
- $LFS mkdir -c $i $DIR/$tdir/d$i ||
- error "mkdir d$i failed"
- $LFS getdirstripe $DIR/$tdir/d$i
- local stripe_index=$($LFS getdirstripe -i $DIR/$tdir/d$i)
- [ $stripe_index -ne $max ] ||
- error "don't expect $max"
- done
-}
-run_test 413a "mkdir on less full MDTs"
-
-test_413b() {
- [ $MDSCOUNT -lt 2 ] &&
- skip "We need at least 2 MDTs for this test"
-
- [ $MDS1_VERSION -lt $(version_code 2.12.52) ] &&
- skip "Need server version at least 2.12.52"
-
- mkdir $DIR/$tdir || error "mkdir failed"
- $LFS setdirstripe -D -i -1 -H space $DIR/$tdir ||
- error "setdirstripe failed"
+test_qos_mkdir() {
+ local mkdir_cmd=$1
+ local stripe_count=$2
+ local mdts=$(comma_list $(mdts_nodes))
- local qos_prio_free
- local qos_threshold_rr
+ local testdir
+ local lmv_qos_prio_free
+ local lmv_qos_threshold_rr
+ local lmv_qos_maxage
+ local lod_qos_prio_free
+ local lod_qos_threshold_rr
+ local lod_qos_maxage
local count
+ local i
- qos_prio_free=$($LCTL get_param -n lmv.*.qos_prio_free | head -n1)
- qos_prio_free=${qos_prio_free%%%}
- qos_threshold_rr=$($LCTL get_param -n lmv.*.qos_threshold_rr | head -n1)
- qos_threshold_rr=${qos_threshold_rr%%%}
- qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage)
-
- stack_trap "$LCTL set_param lmv.*.qos_prio_free=$qos_prio_free" EXIT
- stack_trap "$LCTL set_param lmv.*.qos_threshold_rr=$qos_threshold_rr" \
+ lmv_qos_prio_free=$($LCTL get_param -n lmv.*.qos_prio_free | head -n1)
+ lmv_qos_prio_free=${lmv_qos_prio_free%%%}
+ lmv_qos_threshold_rr=$($LCTL get_param -n lmv.*.qos_threshold_rr |
+ head -n1)
+ lmv_qos_threshold_rr=${lmv_qos_threshold_rr%%%}
+ lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage)
+ stack_trap "$LCTL set_param \
+ lmv.*.qos_prio_free=$lmv_qos_prio_free > /dev/null" EXIT
+ stack_trap "$LCTL set_param \
+ lmv.*.qos_threshold_rr=$lmv_qos_threshold_rr > /dev/null" EXIT
+ stack_trap "$LCTL set_param \
+ lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null" EXIT
+
+ lod_qos_prio_free=$(do_facet mds1 $LCTL get_param -n \
+ lod.lustre-MDT0000-mdtlov.mdt_qos_prio_free | head -n1)
+ lod_qos_prio_free=${lod_qos_prio_free%%%}
+ lod_qos_threshold_rr=$(do_facet mds1 $LCTL get_param -n \
+ lod.lustre-MDT0000-mdtlov.mdt_qos_thresholdrr | head -n1)
+ lod_qos_threshold_rr=${lod_qos_threshold_rr%%%}
+ lod_qos_maxage=$(do_facet mds1 $LCTL get_param -n \
+ lod.lustre-MDT0000-mdtlov.qos_maxage | awk '{ print $1 }')
+ stack_trap "do_nodes $mdts $LCTL set_param \
+ lod.*.mdt_qos_prio_free=$lod_qos_prio_free > /dev/null" EXIT
+ stack_trap "do_nodes $mdts $LCTL set_param \
+ lod.*.mdt_qos_thresholdrr=$lod_qos_threshold_rr > /dev/null" \
EXIT
- stack_trap "$LCTL set_param lmv.*.qos_maxage=$qos_maxage" EXIT
+ stack_trap "do_nodes $mdts $LCTL set_param \
+ lod.*.mdt_qos_maxage=$lod_qos_maxage > /dev/null" EXIT
+
+ echo
+ echo "Mkdir (stripe_count $stripe_count) roundrobin:"
- echo "mkdir with roundrobin"
+ $LCTL set_param lmv.*.qos_threshold_rr=100 > /dev/null
+ do_nodes $mdts $LCTL set_param lod.*.mdt_qos_thresholdrr=100 > /dev/null
+
+ testdir=$DIR/$tdir-s$stripe_count/rr
- $LCTL set_param lmv.*.qos_threshold_rr=100
for i in $(seq $((100 * MDSCOUNT))); do
- mkdir $DIR/$tdir/subdir$i || error "mkdir subdir$i failed"
+ eval $mkdir_cmd $testdir/subdir$i ||
+ error "$mkdir_cmd subdir$i failed"
done
+
for i in $(seq $MDSCOUNT); do
- count=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$((i - 1))$ |
- wc -w)
+ count=$($LFS getdirstripe -i $testdir/* |
+ grep ^$((i - 1))$ | wc -l)
echo "$count directories created on MDT$((i - 1))"
[ $count -eq 100 ] || error "subdirs are not evenly distributed"
+
+ if [ $stripe_count -gt 1 ]; then
+ count=$($LFS getdirstripe $testdir/* |
+ grep -P "^\s+$((i - 1))\t" | wc -l)
+ echo "$count stripes created on MDT$((i - 1))"
+ # deviation should < 5% of average
+ [ $count -lt $((95 * stripe_count)) ] ||
+ [ $count -gt $((105 * stripe_count)) ] &&
+ error "stripes are not evenly distributed"
+ fi
done
- rm -rf $DIR/$tdir/*
+ $LCTL set_param lmv.*.qos_threshold_rr=$lmv_qos_threshold_rr > /dev/null
+ do_nodes $mdts $LCTL set_param \
+ lod.*.mdt_qos_thresholdrr=$lod_qos_threshold_rr > /dev/null
- $LCTL set_param lmv.*.qos_threshold_rr=$qos_threshold_rr
- # Shorten statfs result age, so that it can be updated in time
- $LCTL set_param lmv.*.qos_maxage=1
- sleep_maxage
+ echo
+ echo "Check for uneven MDTs: "
local ffree
local bavail
# Check if we need to generate uneven MDTs
local threshold=50
- local diff=$(((max - min ) * 100 / min))
+ local diff=$(((max - min) * 100 / min))
local value="$(generate_string 1024)"
- local i
while [ $diff -lt $threshold ]; do
# generate uneven MDTs, create till $threshold% diff
error "mkdir $tdir-MDT$min_index failed"
for i in $(seq $count); do
$OPENFILE -f O_CREAT:O_LOV_DELAY_CREATE \
- $DIR/$tdir-MDT$min_index/f$i > /dev/null ||
- error "create f$i failed"
+ $DIR/$tdir-MDT$min_index/f$j_$i > /dev/null ||
+ error "create f$j_$i failed"
setfattr -n user.413b -v $value \
- $DIR/$tdir-MDT$min_index/f$i ||
- error "setfattr f$i failed"
+ $DIR/$tdir-MDT$min_index/f$j_$i ||
+ error "setfattr f$j_$i failed"
done
ffree=($(lctl get_param -n mdc.*[mM][dD][cC]-*.filesfree))
echo "MDT blocks available: ${bavail[@]}"
echo "weight diff=$diff%"
- echo "mkdir with balanced space usage"
- $LCTL set_param lmv.*.qos_prio_free=100
+ echo
+ echo "Mkdir (stripe_count $stripe_count) with balanced space usage:"
+
+ $LCTL set_param lmv.*.qos_prio_free=100 > /dev/null
+ do_nodes $mdts $LCTL set_param lod.*.mdt_qos_prio_free=100 > /dev/null
+ # decrease statfs age, so that it can be updated in time
+ $LCTL set_param lmv.*.qos_maxage=1 > /dev/null
+ do_nodes $mdts $LCTL set_param lod.*.mdt_qos_maxage=1 > /dev/null
+
+ sleep 1
+
+ testdir=$DIR/$tdir-s$stripe_count/qos
+
for i in $(seq $((100 * MDSCOUNT))); do
- mkdir $DIR/$tdir/subdir$i || error "mkdir subdir$i failed"
+ eval $mkdir_cmd $testdir/subdir$i ||
+ error "$mkdir_cmd subdir$i failed"
done
for i in $(seq $MDSCOUNT); do
- count=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$((i - 1))$ |
- wc -w)
+ count=$($LFS getdirstripe -i $testdir/* | grep ^$((i - 1))$ |
+ wc -l)
echo "$count directories created on MDT$((i - 1))"
+
+ if [ $stripe_count -gt 1 ]; then
+ count=$($LFS getdirstripe $testdir/* |
+ grep -P "^\s+$((i - 1))\t" | wc -l)
+ echo "$count stripes created on MDT$((i - 1))"
+ fi
done
- max=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$max_index$ | wc -l)
- min=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$min_index$ | wc -l)
+ max=$($LFS getdirstripe -i $testdir/* | grep ^$max_index$ | wc -l)
+ min=$($LFS getdirstripe -i $testdir/* | grep ^$min_index$ | wc -l)
+ # D-value should > 10% of averge
[ $((max - min)) -lt 10 ] &&
error "subdirs shouldn't be evenly distributed"
- which getfattr > /dev/null 2>&1 || skip_env "no getfattr command"
+ # ditto
+ if [ $stripe_count -gt 1 ]; then
+ max=$($LFS getdirstripe $testdir/* |
+ grep -P "^\s+$max_index\t" | wc -l)
+ min=$($LFS getdirstripe $testdir/* |
+ grep -P "^\s+$min_index\t" | wc -l)
+ [ $((max - min)) -le $((10 * stripe_count)) ] &&
+ error "stripes shouldn't be evenly distributed"|| true
+ fi
+}
+
+test_413a() {
+ [ $MDSCOUNT -lt 2 ] &&
+ skip "We need at least 2 MDTs for this test"
- $LFS setdirstripe -D -d $DIR/$tdir || error "setdirstripe -d failed"
- getfattr -n trusted.dmv $DIR/$tdir &&
- error "default dir layout exists" || true
+ [ $MDS1_VERSION -lt $(version_code 2.12.52) ] &&
+ skip "Need server version at least 2.12.52"
+
+ local stripe_count
+
+ for stripe_count in $(seq 1 $((MDSCOUNT - 1))); do
+ mkdir $DIR/$tdir-s$stripe_count || error "mkdir failed"
+ mkdir $DIR/$tdir-s$stripe_count/rr || error "mkdir failed"
+ mkdir $DIR/$tdir-s$stripe_count/qos || error "mkdir failed"
+ test_qos_mkdir "$LFS mkdir -c $stripe_count" $stripe_count
+ done
+}
+run_test 413a "QoS mkdir with 'lfs mkdir -i -1'"
+
+test_413b() {
+ [ $MDSCOUNT -lt 2 ] &&
+ skip "We need at least 2 MDTs for this test"
+
+ [ $MDS1_VERSION -lt $(version_code 2.12.52) ] &&
+ skip "Need server version at least 2.12.52"
+
+ local stripe_count
+
+ for stripe_count in $(seq 1 $((MDSCOUNT - 1))); do
+ mkdir $DIR/$tdir-s$stripe_count || error "mkdir failed"
+ mkdir $DIR/$tdir-s$stripe_count/rr || error "mkdir failed"
+ mkdir $DIR/$tdir-s$stripe_count/qos || error "mkdir failed"
+ $LFS setdirstripe -D -c $stripe_count \
+ $DIR/$tdir-s$stripe_count/rr ||
+ error "setdirstripe failed"
+ $LFS setdirstripe -D -c $stripe_count \
+ $DIR/$tdir-s$stripe_count/qos ||
+ error "setdirstripe failed"
+ test_qos_mkdir "mkdir" $stripe_count
+ done
}
-run_test 413b "mkdir with balanced space usage"
+run_test 413b "QoS mkdir under dir whose default LMV starting MDT offset is -1"
test_414() {
#define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521
if (strcmp(hashtype, mdt_hash_name[i]) == 0)
return i;
- if (!strcmp(hashtype, LMV_HASH_NAME_SPACE))
- return LMV_HASH_TYPE_DEFAULT | LMV_HASH_FLAG_SPACE;
-
return 0;
}
return rc;
}
-static int ll_statfs_data_comp(const void *sd1, const void *sd2)
-{
- const struct obd_statfs *st1 = &((const struct ll_statfs_data *)sd1)->
- sd_st;
- const struct obd_statfs *st2 = &((const struct ll_statfs_data *)sd2)->
- sd_st;
- int r1 = obd_statfs_ratio(st1, false);
- int r2 = obd_statfs_ratio(st2, false);
- int64_t result = r1 - r2;
-
- /* if both space usage are above 90, compare free inodes */
- if (r1 > 90 && r2 > 90)
- result = st2->os_ffree - st1->os_ffree;
-
- if (result < 0)
- return -1;
- else if (result == 0)
- return 0;
- else
- return 1;
-}
-
/* functions */
static int lfs_setdirstripe(int argc, char **argv)
{
char *mode_opt = NULL;
bool default_stripe = false;
bool delete = false;
- bool auto_distributed = false;
bool foreign_mode = false;
mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO;
mode_t previous_mode = 0;
- struct ll_statfs_buf *lsb = NULL;
- char mntdir[PATH_MAX] = "";
char *xattr = NULL;
__u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0;
struct option long_opts[] = {
memcpy(param->lsp_tgts, mdts, sizeof(*mdts) * lsa.lsa_nr_tgts);
}
- if (!default_stripe && (lsa.lsa_pattern & LMV_HASH_FLAG_SPACE)) {
- fprintf(stderr, "%s %s: can only specify -H space with -D\n",
- progname, argv[0]);
- free(param);
- return CMD_HELP;
- }
-
- if (param->lsp_stripe_offset != -1 &&
- lsa.lsa_pattern & LMV_HASH_FLAG_SPACE) {
- fprintf(stderr, "%s %s: can only specify -H space with -i -1\n",
- progname, argv[0]);
- free(param);
- return CMD_HELP;
- }
-
dname = argv[optind];
do {
if (default_stripe) {
continue;
}
- /*
- * if current \a dname isn't under the same \a mntdir as the
- * last one, and the last one was auto-distributed, restore
- * \a param.
- */
- if (mntdir[0] != '\0' &&
- strncmp(dname, mntdir, strlen(mntdir)) &&
- auto_distributed) {
- param->lsp_is_specific = false;
- param->lsp_stripe_offset = -1;
- auto_distributed = false;
- }
-
- /*
- * TODO: when MDT can allocate object with QoS (LU-9435), below
- * code should be removed, instead we should let LMV to allocate
- * the starting MDT object, and then let LOD allocate other MDT
- * objects.
- */
- if (!param->lsp_is_specific && param->lsp_stripe_offset == -1) {
- char path[PATH_MAX] = "";
-
- if (!lsb) {
- lsb = malloc(sizeof(*lsb));
- if (!lsb) {
- result = -ENOMEM;
- break;
- }
- }
- lsb->sb_count = 0;
-
- /* use mntdir for dirname() temporarily */
- strncpy(mntdir, dname, sizeof(mntdir) - 1);
- if (!realpath(dirname(mntdir), path)) {
- result = -errno;
- fprintf(stderr,
- "error: invalid path '%s': %s\n",
- argv[optind], strerror(errno));
- break;
- }
- mntdir[0] = '\0';
-
- result = llapi_search_mounts(path, 0, mntdir, NULL);
- if (result < 0 || mntdir[0] == '\0') {
- fprintf(stderr,
- "No suitable Lustre mount found\n");
- break;
- }
-
- result = mntdf(mntdir, NULL, NULL, 0, LL_STATFS_LMV,
- lsb);
- if (result < 0)
- break;
-
- if (param->lsp_stripe_count > lsb->sb_count) {
- fprintf(stderr,
- "error: stripe count %d is too big\n",
- param->lsp_stripe_count);
- result = -ERANGE;
- break;
- }
-
- qsort(lsb->sb_buf, lsb->sb_count,
- sizeof(struct ll_statfs_data),
- ll_statfs_data_comp);
-
- auto_distributed = true;
- }
-
- if (auto_distributed) {
- int r;
- int nr = MAX(param->lsp_stripe_count,
- lsb->sb_count / 2);
-
- /* don't use server whose usage is above 90% */
- while (nr != param->lsp_stripe_count &&
- obd_statfs_ratio(&lsb->sb_buf[nr].sd_st, false) >
- 90)
- nr = MAX(param->lsp_stripe_count, nr / 2);
-
- /* get \a r between [0, nr) */
- r = rand() % nr;
-
- param->lsp_stripe_offset = lsb->sb_buf[r].sd_index;
- if (param->lsp_stripe_count > 1) {
- int i = 0;
-
- param->lsp_is_specific = true;
- for (; i < param->lsp_stripe_count; i++)
- param->lsp_tgts[(i + r) % nr] =
- lsb->sb_buf[i].sd_index;
- }
- }
-
result = llapi_dir_create(dname, mode, param);
if (result)
fprintf(stderr,
if (mode_opt != NULL)
umask(previous_mode);
- free(lsb);
free(param);
return result;
}
}
}
- if (lmu.lum_stripe_offset == -1) {
+ if (lmu.lum_stripe_offset == LMV_OFFSET_DEFAULT) {
fprintf(stderr, "%s mv: MDT index must be specified\n",
progname);
return CMD_HELP;
else
llapi_printf(LLAPI_MSG_NORMAL, "%#x", type);
- if (flags & LMV_HASH_FLAG_SPACE)
- llapi_printf(LLAPI_MSG_NORMAL, ",space");
if (flags & LMV_HASH_FLAG_MIGRATION)
llapi_printf(LLAPI_MSG_NORMAL, ",migrating");
if (flags & LMV_HASH_FLAG_BAD_TYPE)
lum->lum_magic = LMV_USER_MAGIC;
lum->lum_stripe_count = 0;
- lum->lum_stripe_offset = -1;
+ lum->lum_stripe_offset = LMV_OFFSET_DEFAULT;
goto dump;
} else if (param->fp_get_lmv) {
struct lmv_user_md *lum = param->fp_lmv_md;
CHECK_CDEFINE(LMV_MAGIC_V1);
CHECK_CDEFINE(LMV_MAGIC_STRIPE);
CHECK_CDEFINE(LMV_HASH_TYPE_MASK);
- CHECK_CDEFINE(LMV_HASH_FLAG_SPACE);
CHECK_CDEFINE(LMV_HASH_FLAG_LOST_LMV);
CHECK_CDEFINE(LMV_HASH_FLAG_BAD_TYPE);
CHECK_CDEFINE(LMV_HASH_FLAG_MIGRATION);
CLASSERT(LMV_MAGIC_V1 == 0x0CD20CD0);
CLASSERT(LMV_MAGIC_STRIPE == 0x0CD40CD0);
CLASSERT(LMV_HASH_TYPE_MASK == 0x0000ffff);
- CLASSERT(LMV_HASH_FLAG_SPACE == 0x08000000);
CLASSERT(LMV_HASH_FLAG_LOST_LMV == 0x10000000);
CLASSERT(LMV_HASH_FLAG_BAD_TYPE == 0x20000000);
CLASSERT(LMV_HASH_FLAG_MIGRATION == 0x80000000);