static int lmv_check_connect(struct obd_device *obd);
-static void lmv_activate_target(struct lmv_obd *lmv,
- struct lmv_tgt_desc *tgt,
- int activate)
+void lmv_activate_target(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt,
+ int activate)
{
if (tgt->ltd_active == activate)
return;
int rc;
ENTRY;
- mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME,
- &obd->obd_uuid);
- if (!mdc_obd) {
- CERROR("target %s not attached\n", tgt->ltd_uuid.uuid);
- RETURN(-EINVAL);
- }
+ mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME,
+ &obd->obd_uuid);
+ if (!mdc_obd) {
+ CERROR("target %s not attached\n", tgt->ltd_uuid.uuid);
+ RETURN(-EINVAL);
+ }
CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s\n",
mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
tgt->ltd_uuid.uuid, obd->obd_uuid.uuid);
- if (!mdc_obd->obd_set_up) {
- CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid);
- RETURN(-EINVAL);
- }
+ if (!mdc_obd->obd_set_up) {
+ CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid);
+ RETURN(-EINVAL);
+ }
rc = obd_connect(NULL, &mdc_exp, mdc_obd, &obd->obd_uuid,
&lmv->conn_data, lmv->lmv_cache);
if (rc)
RETURN(rc);
- target.ft_srv = NULL;
- target.ft_exp = mdc_exp;
- target.ft_idx = tgt->ltd_idx;
+ target.ft_srv = NULL;
+ target.ft_exp = mdc_exp;
+ target.ft_idx = tgt->ltd_index;
- fld_client_add_target(&lmv->lmv_fld, &target);
+ fld_client_add_target(&lmv->lmv_fld, &target);
- rc = obd_register_observer(mdc_obd, obd);
- if (rc) {
- obd_disconnect(mdc_exp);
- CERROR("target %s register_observer error %d\n",
- tgt->ltd_uuid.uuid, rc);
- RETURN(rc);
- }
+ rc = obd_register_observer(mdc_obd, obd);
+ if (rc) {
+ obd_disconnect(mdc_exp);
+ CERROR("target %s register_observer error %d\n",
+ tgt->ltd_uuid.uuid, rc);
+ RETURN(rc);
+ }
if (obd->obd_observer) {
/*
md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize);
+ rc = lqos_add_tgt(&lmv->lmv_qos, tgt);
+ if (rc) {
+ obd_disconnect(mdc_exp);
+ RETURN(rc);
+ }
+
CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n",
mdc_obd->obd_name, mdc_obd->obd_uuid.uuid,
atomic_read(&obd->obd_refcount));
if (lmv->tgts[index] == NULL)
return;
+ lqos_del_tgt(&lmv->lmv_qos, lmv->tgts[index]);
+
OBD_FREE_PTR(lmv->tgts[index]);
lmv->tgts[index] = NULL;
return;
__u32 index, int gen)
{
struct obd_device *mdc_obd;
- struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_tgt_desc *tgt;
- int orig_tgt_count = 0;
- int rc = 0;
- ENTRY;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_tgt_desc *tgt;
+ int orig_tgt_count = 0;
+ int rc = 0;
+
+ ENTRY;
CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
}
mutex_init(&tgt->ltd_fid_mutex);
- tgt->ltd_idx = index;
+ tgt->ltd_index = index;
tgt->ltd_uuid = *uuidp;
tgt->ltd_active = 0;
lmv->tgts[index] = tgt;
RETURN(-EINVAL);
/* only files on same MDT can have their layouts swapped */
- if (tgt1->ltd_idx != tgt2->ltd_idx)
+ if (tgt1->ltd_index != tgt2->ltd_index)
RETURN(-EPERM);
rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg);
static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
{
- struct lmv_obd *lmv = &obd->u.lmv;
+ struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_desc *desc;
- int rc;
+ struct lnet_process_id lnet_id;
+ int i = 0;
+ int rc;
+
ENTRY;
if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
lmv->desc.ld_tgt_count = 0;
lmv->desc.ld_active_tgt_count = 0;
- lmv->desc.ld_qos_maxage = 60;
+ lmv->desc.ld_qos_maxage = LMV_DESC_QOS_MAXAGE_DEFAULT;
lmv->max_def_easize = 0;
lmv->max_easize = 0;
spin_lock_init(&lmv->lmv_lock);
mutex_init(&lmv->lmv_init_mutex);
+ /* Set up allocation policy (QoS and RR) */
+ INIT_LIST_HEAD(&lmv->lmv_qos.lq_svr_list);
+ init_rwsem(&lmv->lmv_qos.lq_rw_sem);
+ lmv->lmv_qos.lq_dirty = 1;
+ lmv->lmv_qos.lq_rr.lqr_dirty = 1;
+ lmv->lmv_qos.lq_reset = 1;
+ /* Default priority is toward free space balance */
+ lmv->lmv_qos.lq_prio_free = 232;
+ /* Default threshold for rr (roughly 17%) */
+ lmv->lmv_qos.lq_threshold_rr = 43;
+
+ /*
+ * initialize rr_index to lower 32bit of netid, so that client
+ * can distribute subdirs evenly from the beginning.
+ */
+ while (LNetGetId(i++, &lnet_id) != -ENOENT) {
+ if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) {
+ lmv->lmv_qos_rr_index = (u32)lnet_id.nid;
+ break;
+ }
+ }
+
rc = lmv_tunables_init(obd);
if (rc)
CWARN("%s: error adding LMV sysfs/debugfs files: rc = %d\n",
tgt->ltd_statfs = *osfs;
tgt->ltd_statfs_age = ktime_get_seconds();
spin_unlock(&lmv->lmv_lock);
+ lmv->lmv_qos.lq_dirty = 1;
}
return rc;
RETURN(PTR_ERR(tgt));
if (op_data->op_flags & MF_GET_MDT_IDX) {
- op_data->op_mds = tgt->ltd_idx;
+ op_data->op_mds = tgt->ltd_index;
RETURN(0);
}
RETURN(rc);
}
-static struct lmv_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
-{
- static unsigned int rr_index;
-
- /* locate MDT round-robin is the first step */
- *mdt = rr_index % lmv->tgts_size;
- rr_index++;
-
- return lmv->tgts[*mdt];
-}
-
static struct lmv_tgt_desc *
lmv_locate_tgt_by_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
const char *name, int namelen, struct lu_fid *fid,
if (IS_ERR(tgt))
return tgt;
- *mds = tgt->ltd_idx;
+ *mds = tgt->ltd_index;
return tgt;
}
lmv_dir_space_hashed(op_data->op_default_mea1) &&
!lmv_dir_striped(lsm)) {
tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
+ if (tgt == ERR_PTR(-EAGAIN))
+ tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
/*
* only update statfs when mkdir under dir with "space" hash,
* this means the cached statfs may be stale, and current mkdir
* may not follow QoS accurately, but it's not serious, and it
* avoids periodic statfs when client doesn't mkdir under
* "space" hashed directories.
+ *
+ * TODO: after MDT support QoS object allocation, also update
+ * statfs for 'lfs mkdir -i -1 ...", currently it's done in user
+ * space.
*/
if (!IS_ERR(tgt)) {
struct obd_device *obd;
if (IS_ERR(tgt))
RETURN(PTR_ERR(tgt));
- op_data->op_mds = tgt->ltd_idx;
+ op_data->op_mds = tgt->ltd_index;
}
CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
RETURN(PTR_ERR(tgt));
CDEBUG(D_INODE, "ENQUEUE on "DFID" -> mds #%u\n",
- PFID(&op_data->op_fid1), tgt->ltd_idx);
+ PFID(&op_data->op_fid1), tgt->ltd_index);
rc = md_enqueue(tgt->ltd_exp, einfo, policy, op_data, lockh,
extra_lock_flags);
CDEBUG(D_INODE, "GETATTR_NAME for %*s on "DFID" -> mds #%d\n",
(int)op_data->op_namelen, op_data->op_name,
- PFID(&op_data->op_fid1), tgt->ltd_idx);
+ PFID(&op_data->op_fid1), tgt->ltd_index);
rc = md_getattr_name(tgt->ltd_exp, op_data, preq);
if (rc == -ENOENT && lmv_dir_retry_check_update(op_data)) {
RETURN(PTR_ERR(tgt));
}
- if (tgt->ltd_idx != op_tgt) {
+ if (tgt->ltd_index != op_tgt) {
CDEBUG(D_INODE, "EARLY_CANCEL on "DFID"\n", PFID(fid));
policy.l_inodebits.bits = bits;
rc = md_cancel_unused(tgt->ltd_exp, fid, &policy,
* Cancel UPDATE lock on child (fid1).
*/
op_data->op_flags |= MF_MDC_CANCEL_FID2;
- rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
if (rc != 0)
RETURN(rc);
RETURN(PTR_ERR(child_tgt));
if (!S_ISDIR(op_data->op_mode) && tp_tgt)
- rc = __lmv_fid_alloc(lmv, &target_fid, tp_tgt->ltd_idx);
+ rc = __lmv_fid_alloc(lmv, &target_fid, tp_tgt->ltd_index);
else
rc = lmv_fid_alloc(NULL, exp, &target_fid, op_data);
if (rc)
}
/* cancel UPDATE lock of parent master object */
- rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
if (rc)
RETURN(rc);
op_data->op_fid4 = target_fid;
/* cancel UPDATE locks of target parent */
- rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID2);
if (rc)
RETURN(rc);
/* cancel LOOKUP lock of source if source is remote object */
if (child_tgt != sp_tgt) {
- rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_idx,
+ rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_index,
LCK_EX, MDS_INODELOCK_LOOKUP,
MF_MDC_CANCEL_FID3);
if (rc)
}
/* cancel ELC locks of source */
- rc = lmv_early_cancel(exp, child_tgt, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, child_tgt, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_ELC, MF_MDC_CANCEL_FID3);
if (rc)
RETURN(rc);
op_data->op_flags |= MF_MDC_CANCEL_FID4;
/* cancel UPDATE locks of target parent */
- rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID2);
if (rc != 0)
RETURN(rc);
/* cancel LOOKUP lock of target on target parent */
if (tgt != tp_tgt) {
rc = lmv_early_cancel(exp, tp_tgt, op_data,
- tgt->ltd_idx, LCK_EX,
+ tgt->ltd_index, LCK_EX,
MDS_INODELOCK_LOOKUP,
MF_MDC_CANCEL_FID4);
if (rc != 0)
RETURN(PTR_ERR(src_tgt));
/* cancel ELC locks of source */
- rc = lmv_early_cancel(exp, src_tgt, op_data, tgt->ltd_idx,
+ rc = lmv_early_cancel(exp, src_tgt, op_data, tgt->ltd_index,
LCK_EX, MDS_INODELOCK_ELC,
MF_MDC_CANCEL_FID3);
if (rc != 0)
RETURN(PTR_ERR(sp_tgt));
/* cancel UPDATE locks of source parent */
- rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
if (rc != 0)
RETURN(rc);
/* cancel LOOKUP lock of source on source parent */
if (src_tgt != sp_tgt) {
rc = lmv_early_cancel(exp, sp_tgt, op_data,
- tgt->ltd_idx, LCK_EX,
+ tgt->ltd_index, LCK_EX,
MDS_INODELOCK_LOOKUP,
MF_MDC_CANCEL_FID3);
if (rc != 0)
/* cancel LOOKUP lock of target on target parent */
if (tgt != tp_tgt) {
rc = lmv_early_cancel(exp, tp_tgt, op_data,
- tgt->ltd_idx, LCK_EX,
+ tgt->ltd_index, LCK_EX,
MDS_INODELOCK_LOOKUP,
MF_MDC_CANCEL_FID4);
if (rc != 0)
op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3;
if (parent_tgt != tgt)
- rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx,
+ rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_index,
LCK_EX, MDS_INODELOCK_LOOKUP,
MF_MDC_CANCEL_FID3);
- rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
+ rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_index, LCK_EX,
MDS_INODELOCK_ELC, MF_MDC_CANCEL_FID3);
if (rc)
RETURN(rc);
CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%u\n",
- PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
+ PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
+ tgt->ltd_index);
rc = md_unlink(tgt->ltd_exp, op_data, request);
if (rc == -ENOENT && lmv_dir_retry_check_update(op_data)) {