return lu_device_is_cl(o->lo_dev);
}
-/* Generic subset of OSTs */
-struct ost_pool {
+/* Generic subset of tgts */
+struct lu_tgt_pool {
__u32 *op_array; /* array of index of
* lov_obd->lov_tgts */
- unsigned int op_count; /* number of OSTs in the array */
- unsigned int op_size; /* allocated size of lp_array */
- struct rw_semaphore op_rw_sem; /* to protect ost_pool use */
+ unsigned int op_count; /* number of tgts in the array */
+ unsigned int op_size; /* allocated size of op_array */
+ struct rw_semaphore op_rw_sem; /* to protect lu_tgt_pool use */
};
/* round-robin QoS data for LOD/LMV */
__u32 lqr_start_idx; /* start index of new inode */
__u32 lqr_offset_idx;/* aliasing for start_idx */
int lqr_start_count;/* reseed counter */
- struct ost_pool lqr_pool; /* round-robin optimized list */
+ struct lu_tgt_pool lqr_pool; /* round-robin optimized list */
unsigned long lqr_dirty:1; /* recalc round-robin list */
};
struct lu_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK];
};
+/* QoS data for LOD/LMV */
+struct lu_qos {
+ struct list_head lq_svr_list; /* lu_svr_qos list */
+ struct rw_semaphore lq_rw_sem;
+ __u32 lq_active_svr_count;
+ unsigned int lq_prio_free; /* priority for free space */
+ unsigned int lq_threshold_rr;/* priority for rr */
+ struct lu_qos_rr lq_rr; /* round robin qos data */
+ unsigned long lq_dirty:1, /* recalc qos data */
+ lq_same_space:1,/* the servers all have approx.
+ * the same space avail */
+ lq_reset:1; /* zero current penalties */
+};
+
struct lu_tgt_descs {
+ union {
+ struct lov_desc ltd_lov_desc;
+ struct lmv_desc ltd_lmv_desc;
+ };
/* list of known TGTs */
struct lu_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS];
/* Size of the lu_tgts array, granted to be a power of 2 */
__u32 ltd_tgts_size;
- /* number of registered TGTs */
- __u32 ltd_tgtnr;
/* bitmap of TGTs available */
struct cfs_bitmap *ltd_tgt_bitmap;
/* TGTs scheduled to be deleted */
struct mutex ltd_mutex;
/* read/write semaphore used for array relocation */
struct rw_semaphore ltd_rw_sem;
+ /* QoS */
+ struct lu_qos ltd_qos;
+ /* all tgts in a packed array */
+ struct lu_tgt_pool ltd_tgt_pool;
+ /* true if tgt is MDT */
+ bool ltd_is_mdt;
};
#define LTD_TGT(ltd, index) \
(ltd)->ltd_tgt_idx[(index) / \
TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]
-/* QoS data for LOD/LMV */
-struct lu_qos {
- struct list_head lq_svr_list; /* lu_svr_qos list */
- struct rw_semaphore lq_rw_sem;
- __u32 lq_active_svr_count;
- unsigned int lq_prio_free; /* priority for free space */
- unsigned int lq_threshold_rr;/* priority for rr */
- struct lu_qos_rr lq_rr; /* round robin qos data */
- unsigned long lq_dirty:1, /* recalc qos data */
- lq_same_space:1,/* the servers all have approx.
- * the same space avail */
- lq_reset:1; /* zero current penalties */
-};
-
-void lu_qos_rr_init(struct lu_qos_rr *lqr);
-int lqos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
-int lqos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
-bool lqos_is_usable(struct lu_qos *qos, __u32 active_tgt_nr);
-int lqos_calc_penalties(struct lu_qos *qos, struct lu_tgt_descs *ltd,
- __u32 active_tgt_nr, __u32 maxage, bool is_mdt);
-void lqos_calc_weight(struct lu_tgt_desc *tgt);
-int lqos_recalc_weight(struct lu_qos *qos, struct lu_tgt_descs *ltd,
- struct lu_tgt_desc *tgt, __u32 active_tgt_nr,
- __u64 *total_wt);
u64 lu_prandom_u64_max(u64 ep_ro);
+void lu_qos_rr_init(struct lu_qos_rr *lqr);
+int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
+void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt);
-int lu_tgt_descs_init(struct lu_tgt_descs *ltd);
+int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt);
void lu_tgt_descs_fini(struct lu_tgt_descs *ltd);
-int lu_tgt_descs_add(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
-void lu_tgt_descs_del(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
+int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
+void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
+bool ltd_qos_is_usable(struct lu_tgt_descs *ltd);
+int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd);
+int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt,
+ __u64 *total_wt);
static inline struct lu_tgt_desc *ltd_first_tgt(struct lu_tgt_descs *ltd)
{
struct lov_obd {
struct lov_desc desc;
struct lov_tgt_desc **lov_tgts; /* sparse array */
- struct ost_pool lov_packed; /* all OSTs in a packed
+ struct lu_tgt_pool lov_packed; /* all OSTs in a packed
array */
struct mutex lov_lock;
struct obd_connect_data lov_ocd;
struct lmv_obd {
struct lu_client_fld lmv_fld;
spinlock_t lmv_lock;
- struct lmv_desc desc;
int connected;
int max_easize;
struct kobject *lmv_tgts_kobj;
void *lmv_cache;
- struct lu_qos lmv_qos;
__u32 lmv_qos_rr_index;
};
+#define lmv_mdt_count lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count
+#define lmv_qos lmv_mdt_descs.ltd_qos
+
/* Minimum sector size is 512 */
#define MAX_GUARD_NUMBER (PAGE_SIZE / 512)
CDEBUG(D_INODE, "FLD lookup got mds #%x for fid="DFID"\n",
*mds, PFID(fid));
- if (*mds >= lmv->desc.ld_tgt_count) {
+ if (*mds >= lmv->lmv_mdt_descs.ltd_tgts_size) {
rc = -EINVAL;
CERROR("%s: FLD lookup got invalid mds #%x (max: %x) for fid="DFID": rc = %d\n",
- obd->obd_name, *mds, lmv->desc.ld_tgt_count, PFID(fid),
- rc);
+ obd->obd_name, *mds, lmv->lmv_mdt_descs.ltd_tgts_size,
+ PFID(fid), rc);
}
RETURN(rc);
}
u32 mdt_idx;
int rc;
- if (lmv->desc.ld_tgt_count < 2)
+ if (lmv->lmv_mdt_count < 2)
return 0;
rc = lmv_fld_lookup(lmv, fid, &mdt_idx);
void lmv_activate_target(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt,
int activate)
{
- if (tgt->ltd_active == activate)
- return;
+ if (tgt->ltd_active == activate)
+ return;
- tgt->ltd_active = activate;
- lmv->desc.ld_active_tgt_count += (activate ? 1 : -1);
+ tgt->ltd_active = activate;
+ lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count +=
+ (activate ? 1 : -1);
tgt->ltd_exp->exp_obd->obd_inactive = !activate;
}
tgt->ltd_active = 1;
tgt->ltd_exp = mdc_exp;
- lmv->desc.ld_active_tgt_count++;
+ lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count++;
md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize);
- rc = lqos_add_tgt(&lmv->lmv_qos, tgt);
+ rc = lu_qos_add_tgt(&lmv->lmv_qos, tgt);
if (rc) {
obd_disconnect(mdc_exp);
RETURN(rc);
static void lmv_del_target(struct lmv_obd *lmv, struct lu_tgt_desc *tgt)
{
LASSERT(tgt);
- lqos_del_tgt(&lmv->lmv_qos, tgt);
- lu_tgt_descs_del(&lmv->lmv_mdt_descs, tgt);
+ ltd_del_tgt(&lmv->lmv_mdt_descs, tgt);
OBD_FREE_PTR(tgt);
}
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt;
struct lu_tgt_descs *ltd = &lmv->lmv_mdt_descs;
- int orig_tgt_count = 0;
int rc = 0;
ENTRY;
tgt->ltd_active = 0;
mutex_lock(<d->ltd_mutex);
- rc = lu_tgt_descs_add(ltd, tgt);
- if (!rc && index >= lmv->desc.ld_tgt_count) {
- orig_tgt_count = lmv->desc.ld_tgt_count;
- lmv->desc.ld_tgt_count = index + 1;
- }
+ rc = ltd_add_tgt(ltd, tgt);
mutex_unlock(<d->ltd_mutex);
if (rc)
RETURN(0);
rc = lmv_connect_mdc(obd, tgt);
- if (rc != 0) {
- mutex_lock(<d->ltd_mutex);
- lmv->desc.ld_tgt_count = orig_tgt_count;
- memset(tgt, 0, sizeof(*tgt));
- mutex_unlock(<d->ltd_mutex);
- } else {
+ if (!rc) {
int easize = sizeof(struct lmv_stripe_md) +
- lmv->desc.ld_tgt_count * sizeof(struct lu_fid);
+ lmv->lmv_mdt_count * sizeof(struct lu_fid);
+
lmv_init_ea_size(obd->obd_self_export, easize, 0);
}
if (lmv->connected)
GOTO(unlock, rc = 0);
- if (lmv->desc.ld_tgt_count == 0) {
+ if (!lmv->lmv_mdt_count) {
CERROR("%s: no targets configured: rc = -EINVAL\n",
obd->obd_name);
GOTO(unlock, rc = -EINVAL);
}
lmv->connected = 1;
- easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC);
+ easize = lmv_mds_md_size(lmv->lmv_mdt_count, LMV_MAGIC);
lmv_init_ea_size(obd->obd_self_export, easize, 0);
EXIT;
unlock:
if (!tgt->ltd_exp)
continue;
- --lmv->desc.ld_active_tgt_count;
+ --lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count;
obd_disconnect(tgt->ltd_exp);
}
struct lmv_obd *lmv = &obddev->u.lmv;
struct lu_tgt_desc *tgt = NULL;
int set = 0;
- __u32 count = lmv->desc.ld_tgt_count;
+ __u32 count = lmv->lmv_mdt_count;
int rc = 0;
ENTRY;
__u32 index;
memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
- if (index >= count)
+
+ if (index >= lmv->lmv_mdt_descs.ltd_tgts_size)
RETURN(-ENODEV);
tgt = lmv_tgt(lmv, index);
struct obd_quotactl *oqctl;
if (qctl->qc_valid == QC_MDTIDX) {
- if (count <= qctl->qc_idx)
- RETURN(-EINVAL);
-
tgt = lmv_tgt(lmv, qctl->qc_idx);
- if (!tgt || !tgt->ltd_exp)
- RETURN(-EINVAL);
} else if (qctl->qc_valid == QC_UUID) {
lmv_foreach_tgt(lmv, tgt) {
if (!obd_uuid_equals(&tgt->ltd_uuid,
RETURN(-EINVAL);
}
- if (tgt->ltd_index >= count)
- RETURN(-EAGAIN);
+ if (!tgt || !tgt->ltd_exp)
+ RETURN(-EINVAL);
- LASSERT(tgt != NULL && tgt->ltd_exp != NULL);
OBD_ALLOC_PTR(oqctl);
if (!oqctl)
RETURN(-ENOMEM);
ENTRY;
- if (lmv->desc.ld_tgt_count == 1)
+ if (lmv->lmv_mdt_count == 1)
RETURN(0);
lum = op_data->op_data;
RETURN(-EINVAL);
}
- obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
- lmv->desc.ld_tgt_count = 0;
- lmv->desc.ld_active_tgt_count = 0;
- lmv->desc.ld_qos_maxage = LMV_DESC_QOS_MAXAGE_DEFAULT;
+ obd_str2uuid(&lmv->lmv_mdt_descs.ltd_lmv_desc.ld_uuid,
+ desc->ld_uuid.uuid);
+ lmv->lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count = 0;
+ lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count = 0;
+ lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage =
+ LMV_DESC_QOS_MAXAGE_DEFAULT;
lmv->max_def_easize = 0;
lmv->max_easize = 0;
spin_lock_init(&lmv->lmv_lock);
- /* Set up allocation policy (QoS and RR) */
- INIT_LIST_HEAD(&lmv->lmv_qos.lq_svr_list);
- init_rwsem(&lmv->lmv_qos.lq_rw_sem);
- lmv->lmv_qos.lq_dirty = 1;
- lmv->lmv_qos.lq_reset = 1;
- /* Default priority is toward free space balance */
- lmv->lmv_qos.lq_prio_free = 232;
- /* Default threshold for rr (roughly 17%) */
- lmv->lmv_qos.lq_threshold_rr = 43;
-
- lu_qos_rr_init(&lmv->lmv_qos.lq_rr);
-
/*
* initialize rr_index to lower 32bit of netid, so that client
* can distribute subdirs evenly from the beginning.
if (rc)
CERROR("Can't init FLD, err %d\n", rc);
- rc = lu_tgt_descs_init(&lmv->lmv_mdt_descs);
+ rc = lu_tgt_descs_init(&lmv->lmv_mdt_descs, true);
if (rc)
CWARN("%s: error initialize target table: rc = %d\n",
obd->obd_name, rc);
if (flags & OBD_STATFS_FOR_MDT0)
return 0;
- if (lmv->lmv_statfs_start || lmv->desc.ld_tgt_count == 1)
+ if (lmv->lmv_statfs_start || lmv->lmv_mdt_count == 1)
return lmv->lmv_statfs_start;
/* choose initial MDT for this client */
/* We dont need a full 64-bit modulus, just enough
* to distribute the requests across MDTs evenly.
*/
- lmv->lmv_statfs_start =
- (u32)lnet_id.nid % lmv->desc.ld_tgt_count;
+ lmv->lmv_statfs_start = (u32)lnet_id.nid %
+ lmv->lmv_mdt_count;
break;
}
}
/* distribute statfs among MDTs */
idx = lmv_select_statfs_mdt(lmv, flags);
- for (i = 0; i < lmv->desc.ld_tgt_count; i++, idx++) {
- idx = idx % lmv->desc.ld_tgt_count;
+ for (i = 0; i < lmv->lmv_mdt_descs.ltd_tgts_size; i++, idx++) {
+ idx = idx % lmv->lmv_mdt_descs.ltd_tgts_size;
tgt = lmv_tgt(lmv, idx);
if (!tgt || !tgt->ltd_exp)
continue;
int rc;
if (ktime_get_seconds() - tgt->ltd_statfs_age <
- obd->u.lmv.desc.ld_qos_maxage)
+ obd->u.lmv.lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage)
return 0;
rc = obd_statfs_async(tgt->ltd_exp, &oinfo, 0, NULL);
ENTRY;
- if (!lqos_is_usable(&lmv->lmv_qos, lmv->desc.ld_active_tgt_count))
+ if (!ltd_qos_is_usable(&lmv->lmv_mdt_descs))
RETURN(ERR_PTR(-EAGAIN));
down_write(&lmv->lmv_qos.lq_rw_sem);
- if (!lqos_is_usable(&lmv->lmv_qos, lmv->desc.ld_active_tgt_count))
+ if (!ltd_qos_is_usable(&lmv->lmv_mdt_descs))
GOTO(unlock, tgt = ERR_PTR(-EAGAIN));
- rc = lqos_calc_penalties(&lmv->lmv_qos, &lmv->lmv_mdt_descs,
- lmv->desc.ld_active_tgt_count,
- lmv->desc.ld_qos_maxage, true);
+ rc = ltd_qos_penalties_calc(&lmv->lmv_mdt_descs);
if (rc)
GOTO(unlock, tgt = ERR_PTR(rc));
continue;
tgt->ltd_qos.ltq_usable = 1;
- lqos_calc_weight(tgt);
+ lu_tgt_qos_weight_calc(tgt);
total_weight += tgt->ltd_qos.ltq_weight;
}
continue;
*mdt = tgt->ltd_index;
- lqos_recalc_weight(&lmv->lmv_qos, &lmv->lmv_mdt_descs, tgt,
- lmv->desc.ld_active_tgt_count,
- &total_weight);
+ ltd_qos_update(&lmv->lmv_mdt_descs, tgt, &total_weight);
GOTO(unlock, rc = 0);
}
ENTRY;
spin_lock(&lmv->lmv_qos.lq_rr.lqr_alloc);
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
- index = (i + lmv->lmv_qos_rr_index) % lmv->desc.ld_tgt_count;
+ for (i = 0; i < lmv->lmv_mdt_descs.ltd_tgts_size; i++) {
+ index = (i + lmv->lmv_qos_rr_index) %
+ lmv->lmv_mdt_descs.ltd_tgts_size;
tgt = lmv_tgt(lmv, index);
if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
*mdt = tgt->ltd_index;
- lmv->lmv_qos_rr_index = (*mdt + 1) % lmv->desc.ld_tgt_count;
+ lmv->lmv_qos_rr_index = (*mdt + 1) %
+ lmv->lmv_mdt_descs.ltd_tgts_size;
spin_unlock(&lmv->lmv_qos.lq_rr.lqr_alloc);
RETURN(tgt);
ENTRY;
- if (!lmv->desc.ld_active_tgt_count)
+ if (!lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count)
RETURN(-EIO);
if (lmv_dir_bad_hash(op_data->op_mea1))
exp->exp_connect_data = *(struct obd_connect_data *)val;
RETURN(rc);
} else if (KEY_IS(KEY_TGT_COUNT)) {
- *((int *)val) = lmv->desc.ld_tgt_count;
+ *((int *)val) = lmv->lmv_mdt_descs.ltd_tgts_size;
RETURN(0);
}
struct obd_device *obddev = class_exp2obd(exp);
struct ptlrpc_request_set *set = _set;
struct lmv_obd *lmv = &obddev->u.lmv;
- int tgt_count = lmv->desc.ld_tgt_count;
+ int tgt_count = lmv->lmv_mdt_count;
struct lu_tgt_desc *tgt;
struct fid_array *fat, **fas = NULL;
int i, rc, **rcs = NULL;
* since this can be easily found, and only try others if that fails.
*/
for (i = 0, index = lmv_fid2tgt_index(lmv, fid);
- i < lmv->desc.ld_tgt_count;
- i++, index = (index + 1) % lmv->desc.ld_tgt_count) {
+ i < lmv->lmv_mdt_descs.ltd_tgts_size;
+ i++, index = (index + 1) % lmv->lmv_mdt_descs.ltd_tgts_size) {
if (index < 0) {
CDEBUG(D_HA, "%s: "DFID" is inaccessible: rc = %d\n",
obd->obd_name, PFID(fid), index);
{
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kset.kobj);
- struct lmv_desc *desc;
- desc = &dev->u.lmv.desc;
- return sprintf(buf, "%u\n", desc->ld_tgt_count);
+ return sprintf(buf, "%u\n", dev->u.lmv.lmv_mdt_count);
}
LUSTRE_RO_ATTR(numobd);
{
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kset.kobj);
- struct lmv_desc *desc;
- desc = &dev->u.lmv.desc;
- return sprintf(buf, "%u\n", desc->ld_active_tgt_count);
+ return sprintf(buf, "%u\n",
+ dev->u.lmv.lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count);
}
LUSTRE_RO_ATTR(activeobd);
{
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kset.kobj);
- struct lmv_desc *desc;
- desc = &dev->u.lmv.desc;
- return sprintf(buf, "%s\n", desc->ld_uuid.uuid);
+ return sprintf(buf, "%s\n",
+ dev->u.lmv.lmv_mdt_descs.ltd_lmv_desc.ld_uuid.uuid);
}
LUSTRE_RO_ATTR(desc_uuid);
struct obd_device *dev = container_of(kobj, struct obd_device,
obd_kset.kobj);
- return sprintf(buf, "%u\n", dev->u.lmv.desc.ld_qos_maxage);
+ return sprintf(buf, "%u\n",
+ dev->u.lmv.lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage);
}
static ssize_t qos_maxage_store(struct kobject *kobj,
if (rc)
return rc;
- dev->u.lmv.desc.ld_qos_maxage = val;
+ dev->u.lmv.lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage = val;
return count;
}
struct llog_ctxt *ctxt = NULL;
struct lu_env env;
struct lu_target *lut;
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lod_tgt_desc *tgt = NULL;
+ struct lu_tgt_desc *mdt = NULL;
time64_t start;
int retries = 0;
int rc;
GOTO(out, rc = 0);
}
- ltd_foreach_tgt(ltd, tgt) {
- if (!tgt->ltd_got_update_log) {
+ lod_foreach_mdt(lod, mdt) {
+ if (!mdt->ltd_got_update_log) {
spin_unlock(&lod->lod_lock);
GOTO(out, rc = 0);
}
thread = &lod->lod_child_recovery_thread;
index = master_index;
} else {
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *mdt;
- ltd_foreach_tgt(&lod->lod_mdt_descs, tgt) {
- if (tgt->ltd_tgt == dt) {
- index = tgt->ltd_index;
- subtgt = tgt;
+ lod_foreach_mdt(lod, mdt) {
+ if (mdt->ltd_tgt == dt) {
+ index = mdt->ltd_index;
+ subtgt = mdt;
break;
}
}
static void lod_sub_stop_recovery_threads(const struct lu_env *env,
struct lod_device *lod)
{
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
struct ptlrpc_thread *thread;
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *mdt;
/*
* Stop the update log commit cancel threads and finish master
wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
}
- lod_getref(ltd);
- ltd_foreach_tgt(ltd, tgt) {
- thread = tgt->ltd_recovery_thread;
+ lod_getref(&lod->lod_mdt_descs);
+ lod_foreach_mdt(lod, mdt) {
+ thread = mdt->ltd_recovery_thread;
if (thread && thread->t_flags & SVC_RUNNING) {
thread->t_flags = SVC_STOPPING;
wake_up(&thread->t_ctl_waitq);
wait_event(thread->t_ctl_waitq,
thread->t_flags & SVC_STOPPED);
- OBD_FREE_PTR(tgt->ltd_recovery_thread);
- tgt->ltd_recovery_thread = NULL;
+ OBD_FREE_PTR(mdt->ltd_recovery_thread);
+ mdt->ltd_recovery_thread = NULL;
}
}
-
- lod_putref(lod, ltd);
+ lod_putref(lod, &lod->lod_mdt_descs);
}
/**
static void lod_sub_fini_all_llogs(const struct lu_env *env,
struct lod_device *lod)
{
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *mdt;
/*
* Stop the update log commit cancel threads and finish master
*/
lod_sub_fini_llog(env, lod->lod_child,
&lod->lod_child_recovery_thread);
- lod_getref(ltd);
- ltd_foreach_tgt(ltd, tgt)
- lod_sub_fini_llog(env, tgt->ltd_tgt,
- tgt->ltd_recovery_thread);
- lod_putref(lod, ltd);
+ lod_getref(&lod->lod_mdt_descs);
+ lod_foreach_mdt(lod, mdt)
+ lod_sub_fini_llog(env, mdt->ltd_tgt,
+ mdt->ltd_recovery_thread);
+ lod_putref(lod, &lod->lod_mdt_descs);
}
static char *lod_show_update_logs_retrievers(void *data, int *size, int *count)
{
struct lod_device *lod = (struct lod_device *)data;
struct lu_target *lut = lod2lu_dev(lod)->ld_site->ls_tgt;
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lod_tgt_desc *tgt = NULL;
+ struct lu_tgt_desc *mdt = NULL;
char *buf;
int len = 0;
int rc;
(*count)++;
}
- ltd_foreach_tgt(ltd, tgt) {
- if (!tgt->ltd_got_update_log) {
+ lod_foreach_mdt(lod, mdt) {
+ if (!mdt->ltd_got_update_log) {
rc = snprintf(buf + len, *size - len, " %04x",
- tgt->ltd_index);
+ mdt->ltd_index);
if (unlikely(rc <= 0))
break;
rc = lod_add_device(env, lod, arg1, index, gen,
mdt_index, LUSTRE_OSC_NAME, 0);
} else {
- rc = lod_del_device(env, lod,
- &lod->lod_ost_descs,
- arg1, index, gen, true);
+ rc = lod_del_device(env, lod, &lod->lod_ost_descs,
+ arg1, index, gen);
}
break;
*/
param = lustre_cfg_buf(lcfg, 1);
if (strstr(param, "osp") && strstr(param, ".active=")) {
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
struct lod_tgt_desc *sub_tgt = NULL;
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *mdt;
char *ptr;
char *tmp;
GOTO(out, rc);
}
- ltd_foreach_tgt(ltd, tgt) {
- if (tgt->ltd_tgt->dd_lu_dev.ld_obd == obd) {
- sub_tgt = tgt;
+ lod_foreach_mdt(lod, mdt) {
+ if (mdt->ltd_tgt->dd_lu_dev.ld_obd == obd) {
+ sub_tgt = mdt;
break;
}
}
{
struct lod_device *lod = lu2lod_dev(dev);
struct lu_device *next = &lod->lod_child->dd_lu_dev;
- unsigned int i;
+ struct lod_tgt_desc *tgt;
int rc;
ENTRY;
rc = next->ld_ops->ldo_recovery_complete(env, next);
lod_getref(&lod->lod_ost_descs);
- if (lod->lod_osts_size > 0) {
- cfs_foreach_bit(lod->lod_ost_bitmap, i) {
- struct lod_tgt_desc *tgt;
-
- tgt = OST_TGT(lod, i);
+ if (lod->lod_ost_descs.ltd_tgts_size > 0) {
+ lod_foreach_ost(lod, tgt) {
LASSERT(tgt && tgt->ltd_tgt);
- next = &tgt->ltd_ost->dd_lu_dev;
+ next = &tgt->ltd_tgt->dd_lu_dev;
rc = next->ld_ops->ldo_recovery_complete(env, next);
if (rc)
CERROR("%s: can't complete recovery on #%d: rc = %d\n",
- lod2obd(lod)->obd_name, i, rc);
+ lod2obd(lod)->obd_name, tgt->ltd_index,
+ rc);
}
}
lod_putref(lod, &lod->lod_ost_descs);
*/
static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod)
{
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *mdt;
int rc;
ENTRY;
if (rc < 0)
RETURN(rc);
- ltd_foreach_tgt(ltd, tgt) {
- rc = lod_sub_init_llog(env, lod, tgt->ltd_tgt);
+ lod_foreach_mdt(lod, mdt) {
+ rc = lod_sub_init_llog(env, lod, mdt->ltd_tgt);
if (rc != 0)
break;
}
struct obd_statfs *sfs, struct obd_statfs_info *info)
{
struct lod_device *lod = dt2lod_dev(dev);
- struct lod_ost_desc *ost;
- struct lod_mdt_desc *mdt;
+ struct lu_tgt_desc *tgt;
struct obd_statfs ost_sfs;
u64 ost_files = 0;
u64 ost_ffree = 0;
- int i, rc, bs;
+ int rc, bs;
rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
if (rc)
sfs->os_granted = 0;
lod_getref(&lod->lod_mdt_descs);
- lod_foreach_mdt(lod, i) {
- mdt = MDT_TGT(lod, i);
- LASSERT(mdt && mdt->ltd_mdt);
- rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs);
+ lod_foreach_mdt(lod, tgt) {
+ rc = dt_statfs(env, tgt->ltd_tgt, &ost_sfs);
/* ignore errors */
if (rc)
continue;
* just fallback to pre-DoM policy if any OST is alive
*/
lod_getref(&lod->lod_ost_descs);
- lod_foreach_ost(lod, i) {
- ost = OST_TGT(lod, i);
- LASSERT(ost && ost->ltd_ost);
- rc = dt_statfs(env, ost->ltd_ost, &ost_sfs);
+ lod_foreach_ost(lod, tgt) {
+ rc = dt_statfs(env, tgt->ltd_tgt, &ost_sfs);
/* ignore errors */
if (rc || ost_sfs.os_bsize == 0)
continue;
static int lod_sync(const struct lu_env *env, struct dt_device *dev)
{
struct lod_device *lod = dt2lod_dev(dev);
- struct lod_ost_desc *ost;
- struct lod_mdt_desc *mdt;
- unsigned int i;
+ struct lu_tgt_desc *tgt;
int rc = 0;
ENTRY;
lod_getref(&lod->lod_ost_descs);
- lod_foreach_ost(lod, i) {
- ost = OST_TGT(lod, i);
- LASSERT(ost && ost->ltd_ost);
- if (!ost->ltd_active)
+ lod_foreach_ost(lod, tgt) {
+ if (!tgt->ltd_active)
continue;
- rc = dt_sync(env, ost->ltd_ost);
+ rc = dt_sync(env, tgt->ltd_tgt);
if (rc) {
if (rc != -ENOTCONN) {
CERROR("%s: can't sync ost %u: rc = %d\n",
- lod2obd(lod)->obd_name, i, rc);
+ lod2obd(lod)->obd_name, tgt->ltd_index,
+ rc);
break;
}
rc = 0;
RETURN(rc);
lod_getref(&lod->lod_mdt_descs);
- lod_foreach_mdt(lod, i) {
- mdt = MDT_TGT(lod, i);
- LASSERT(mdt && mdt->ltd_mdt);
- if (!mdt->ltd_active)
+ lod_foreach_mdt(lod, tgt) {
+ if (!tgt->ltd_active)
continue;
- rc = dt_sync(env, mdt->ltd_mdt);
+ rc = dt_sync(env, tgt->ltd_tgt);
if (rc) {
if (rc != -ENOTCONN) {
CERROR("%s: can't sync mdt %u: rc = %d\n",
- lod2obd(lod)->obd_name, i, rc);
+ lod2obd(lod)->obd_name, tgt->ltd_index,
+ rc);
break;
}
rc = 0;
spin_lock_init(&lod->lod_lock);
spin_lock_init(&lod->lod_connects_lock);
- lu_tgt_descs_init(&lod->lod_mdt_descs);
- lu_tgt_descs_init(&lod->lod_ost_descs);
+ lu_tgt_descs_init(&lod->lod_mdt_descs, true);
+ lu_tgt_descs_init(&lod->lod_ost_descs, false);
RETURN(0);
lod_procfs_fini(lod);
- rc = lod_fini_tgt(env, lod, &lod->lod_ost_descs, true);
+ rc = lod_fini_tgt(env, lod, &lod->lod_ost_descs);
if (rc)
CERROR("%s: can not fini ost descriptors: rc = %d\n",
lod2obd(lod)->obd_name, rc);
- rc = lod_fini_tgt(env, lod, &lod->lod_mdt_descs, false);
+ rc = lod_fini_tgt(env, lod, &lod->lod_mdt_descs);
if (rc)
CERROR("%s: can not fini mdt descriptors: rc = %d\n",
lod2obd(lod)->obd_name, rc);
struct obd_device *obd = exp->exp_obd;
struct lod_device *d;
struct lod_tgt_desc *tgt;
- unsigned int i;
int rc = 1;
if (!obd->obd_set_up || obd->obd_stopping)
d = lu2lod_dev(obd->obd_lu_dev);
lod_getref(&d->lod_ost_descs);
- lod_foreach_ost(d, i) {
- tgt = OST_TGT(d, i);
- LASSERT(tgt && tgt->ltd_tgt);
+ lod_foreach_ost(d, tgt) {
rc = obd_get_info(env, tgt->ltd_exp, keylen, key,
vallen, val);
/* one healthy device is enough */
lod_putref(d, &d->lod_ost_descs);
lod_getref(&d->lod_mdt_descs);
- lod_foreach_mdt(d, i) {
+ lod_foreach_mdt(d, tgt) {
struct llog_ctxt *ctxt;
- tgt = MDT_TGT(d, i);
- LASSERT(tgt != NULL);
- LASSERT(tgt->ltd_tgt != NULL);
if (!tgt->ltd_active)
continue;
struct lod_device *d;
struct lod_tgt_desc *tgt;
int no_set = 0;
- int i, rc = 0, rc2;
+ int rc = 0, rc2;
ENTRY;
d = lu2lod_dev(obd->obd_lu_dev);
lod_getref(&d->lod_ost_descs);
- lod_foreach_ost(d, i) {
- tgt = OST_TGT(d, i);
- LASSERT(tgt && tgt->ltd_tgt);
+ lod_foreach_ost(d, tgt) {
if (!tgt->ltd_active)
continue;
lod_putref(d, &d->lod_ost_descs);
lod_getref(&d->lod_mdt_descs);
- lod_foreach_mdt(d, i) {
- tgt = MDT_TGT(d, i);
- LASSERT(tgt && tgt->ltd_tgt);
+ lod_foreach_mdt(d, tgt) {
if (!tgt->ltd_active)
continue;
rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
struct pool_desc {
char pool_name[LOV_MAXPOOLNAME + 1];
- struct ost_pool pool_obds; /* pool members */
+ struct lu_tgt_pool pool_obds; /* pool members */
atomic_t pool_refcount;
struct lu_qos_rr pool_rr;
struct hlist_node pool_hash; /* access by poolname */
lod_lmv_failout:1,
lod_child_got_update_log:1;
- /* lov settings descriptor storing static information */
- struct lov_desc lod_desc;
-
/* protect ld_active_tgt_count, ltd_active and lod_md_root */
spinlock_t lod_lock;
/* maximum size of MDT stripe for Data-on-MDT files. */
unsigned int lod_dom_max_stripesize;
- /*FIXME: When QOS and pool is implemented for MDT, probably these
- * structure should be moved to lod_tgt_descs as well.
- */
- /* QoS info per LOD */
- struct lu_qos lod_qos; /* qos info per lod */
-
/* OST pool data */
- struct ost_pool lod_pool_info; /* all OSTs in a packed array */
int lod_pool_count;
struct cfs_hash *lod_pools_hash_body; /* used for key access */
struct list_head lod_pool_list; /* used for sequential access */
struct lod_object *lod_md_root;
};
-#define lod_osts lod_ost_descs.ltd_tgts
-#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap
-#define lod_ostnr lod_ost_descs.ltd_tgtnr
-#define lod_osts_size lod_ost_descs.ltd_tgts_size
-#define ltd_ost ltd_tgt
-#define lod_ost_desc lu_tgt_desc
-
-#define lod_mdts lod_mdt_descs.ltd_tgts
-#define lod_mdt_bitmap lod_mdt_descs.ltd_tgt_bitmap
-#define lod_remote_mdt_count lod_mdt_descs.ltd_tgtnr
-#define lod_mdts_size lod_mdt_descs.ltd_tgts_size
-#define ltd_mdt ltd_tgt
-#define lod_mdt_desc lu_tgt_desc
+#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap
+#define lod_ost_count lod_ost_descs.ltd_lov_desc.ld_tgt_count
+#define lod_remote_mdt_count lod_mdt_descs.ltd_lmv_desc.ld_tgt_count
struct lod_layout_component {
struct lu_extent llc_extent;
__u64 llc_timestamp; /* snapshot time */
char *llc_pool;
/* ost list specified with LOV_USER_MAGIC_SPECIFIC lum */
- struct ost_pool llc_ostlist;
+ struct lu_tgt_pool llc_ostlist;
struct dt_object **llc_stripe;
__u32 *llc_ost_indices;
};
}
}
-#define lod_foreach_ost(__dev, index) \
- if ((__dev)->lod_osts_size > 0) \
- cfs_foreach_bit((__dev)->lod_ost_bitmap, (index))
-
-#define lod_foreach_mdt(mdt_dev, index) \
- cfs_foreach_bit((mdt_dev)->lod_mdt_bitmap, (index))
+#define lod_foreach_mdt(lod, mdt) ltd_foreach_tgt(&(lod)->lod_mdt_descs, mdt)
+#define lod_foreach_ost(lod, ost) ltd_foreach_tgt(&(lod)->lod_ost_descs, ost)
/* lod_dev.c */
extern struct kmem_cache *lod_object_kmem;
char *osp, unsigned index, unsigned gen, int mdt_index,
char *type, int active);
int lod_del_device(const struct lu_env *env, struct lod_device *lod,
- struct lod_tgt_descs *ltd, char *osp, unsigned idx,
- unsigned gen, bool for_ost);
+ struct lod_tgt_descs *ltd, char *osp, unsigned int idx,
+ unsigned int gen);
int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod,
- struct lod_tgt_descs *ltd, bool for_ost);
+ struct lod_tgt_descs *ltd);
int lod_striping_load(const struct lu_env *env, struct lod_object *lo);
int lod_striping_reload(const struct lu_env *env, struct lod_object *lo,
const struct lu_buf *buf);
int lod_fill_mirrors(struct lod_object *lo);
/* lod_pool.c */
-int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
-int lod_ost_pool_remove(struct ost_pool *op, __u32 idx);
-int lod_ost_pool_extend(struct ost_pool *op, unsigned int min_count);
+int lod_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count);
+int lod_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx);
+int lod_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
void lod_pool_putref(struct pool_desc *pool);
-int lod_ost_pool_free(struct ost_pool *op);
+int lod_ost_pool_free(struct lu_tgt_pool *op);
int lod_pool_del(struct obd_device *obd, char *poolname);
-int lod_ost_pool_init(struct ost_pool *op, unsigned int count);
+int lod_ost_pool_init(struct lu_tgt_pool *op, unsigned int count);
extern struct cfs_hash_ops pool_hash_operations;
int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool);
int lod_pool_new(struct obd_device *obd, char *poolname);
if (ltd->ltd_refcount == 0 && ltd->ltd_death_row) {
struct lod_tgt_desc *tgt_desc, *tmp;
struct list_head kill;
- unsigned int idx;
CDEBUG(D_CONFIG, "destroying %d ltd desc\n",
ltd->ltd_death_row);
INIT_LIST_HEAD(&kill);
- cfs_foreach_bit(ltd->ltd_tgt_bitmap, idx) {
- tgt_desc = LTD_TGT(ltd, idx);
+ ltd_foreach_tgt_safe(ltd, tgt_desc, tmp) {
LASSERT(tgt_desc);
-
if (!tgt_desc->ltd_reap)
continue;
list_add(&tgt_desc->ltd_kill, &kill);
/*FIXME: only support ost pool for now */
- if (ltd == &lod->lod_ost_descs) {
- lod_ost_pool_remove(&lod->lod_pool_info, idx);
- if (tgt_desc->ltd_active)
- lod->lod_desc.ld_active_tgt_count--;
- }
- lu_tgt_descs_del(ltd, tgt_desc);
+ if (ltd == &lod->lod_ost_descs)
+ lod_ost_pool_remove(<d->ltd_tgt_pool,
+ tgt_desc->ltd_index);
+ ltd_del_tgt(ltd, tgt_desc);
ltd->ltd_death_row--;
}
mutex_unlock(<d->ltd_mutex);
list_for_each_entry_safe(tgt_desc, tmp, &kill, ltd_kill) {
int rc;
+
list_del(&tgt_desc->ltd_kill);
- if (ltd == &lod->lod_ost_descs) {
- /* remove from QoS structures */
- rc = lqos_del_tgt(&lod->lod_qos, tgt_desc);
- if (rc)
- CERROR("%s: qos_del_tgt(%s) failed:"
- "rc = %d\n",
- lod2obd(lod)->obd_name,
- obd_uuid2str(&tgt_desc->ltd_uuid),
- rc);
- }
rc = obd_disconnect(tgt_desc->ltd_exp);
if (rc)
CERROR("%s: failed to disconnect %s: rc = %d\n",
down_write(<d->ltd_rw_sem);
mutex_lock(<d->ltd_mutex);
- lu_tgt_descs_add(ltd, tgt_desc);
+ rc = ltd_add_tgt(ltd, tgt_desc);
+ if (rc)
+ GOTO(out_mutex, rc);
+
+ rc = lu_qos_add_tgt(<d->ltd_qos, tgt_desc);
+ if (rc)
+ GOTO(out_del_tgt, rc);
+
if (for_ost) {
- /* pool and qos are not supported for MDS stack yet */
- rc = lod_ost_pool_add(&lod->lod_pool_info, index,
- lod->lod_osts_size);
+ /* pool is not supported for MDS stack yet */
+ rc = lod_ost_pool_add(<d->ltd_tgt_pool, index,
+ ltd->ltd_tgts_size);
if (rc) {
CERROR("%s: can't set up pool, failed with %d\n",
obd->obd_name, rc);
- GOTO(out_mutex, rc);
- }
-
- rc = lqos_add_tgt(&lod->lod_qos, tgt_desc);
- if (rc) {
- CERROR("%s: qos_add_tgt failed with %d\n",
- obd->obd_name, rc);
- GOTO(out_pool, rc);
+ GOTO(out_del_tgt, rc);
}
-
- /* The new OST is now a full citizen */
- if (index >= lod->lod_desc.ld_tgt_count)
- lod->lod_desc.ld_tgt_count = index + 1;
- if (active)
- lod->lod_desc.ld_active_tgt_count++;
}
+
mutex_unlock(<d->ltd_mutex);
up_write(<d->ltd_rw_sem);
thread = LTD_TGT(ltd, index)->ltd_recovery_thread;
OBD_FREE_PTR(thread);
}
-out_pool:
- lod_ost_pool_remove(&lod->lod_pool_info, index);
+ lod_ost_pool_remove(<d->ltd_tgt_pool, index);
+out_del_tgt:
+ ltd_del_tgt(ltd, tgt_desc);
out_mutex:
- lu_tgt_descs_del(ltd, tgt_desc);
mutex_unlock(<d->ltd_mutex);
up_write(<d->ltd_rw_sem);
OBD_FREE_PTR(tgt_desc);
* \param[in] env execution environment for this thread
* \param[in] lod LOD device the target table belongs to
* \param[in] ltd target table
- * \param[in] idx index of the target
- * \param[in] for_ost type of the target: 0 - MDT, 1 - OST
+ * \param[in] tgt target
*/
static void __lod_del_device(const struct lu_env *env, struct lod_device *lod,
- struct lod_tgt_descs *ltd, unsigned idx,
- bool for_ost)
+ struct lod_tgt_descs *ltd, struct lu_tgt_desc *tgt)
{
- LASSERT(LTD_TGT(ltd, idx));
-
- lfsck_del_target(env, lod->lod_child, LTD_TGT(ltd, idx)->ltd_tgt,
- idx, for_ost);
+ lfsck_del_target(env, lod->lod_child, tgt->ltd_tgt, tgt->ltd_index,
+ !ltd->ltd_is_mdt);
- if (!for_ost && LTD_TGT(ltd, idx)->ltd_recovery_thread != NULL) {
- struct ptlrpc_thread *thread;
+ if (ltd->ltd_is_mdt && tgt->ltd_recovery_thread)
+ OBD_FREE_PTR(tgt->ltd_recovery_thread);
- thread = LTD_TGT(ltd, idx)->ltd_recovery_thread;
- OBD_FREE_PTR(thread);
- }
-
- if (LTD_TGT(ltd, idx)->ltd_reap == 0) {
- LTD_TGT(ltd, idx)->ltd_reap = 1;
+ if (!tgt->ltd_reap) {
+ tgt->ltd_reap = 1;
ltd->ltd_death_row++;
}
}
* \param[in] env execution environment for this thread
* \param[in] lod LOD device the target table belongs to
* \param[in] ltd target table
- * \param[in] for_ost type of the target: MDT or OST
*
* \retval 0 always
*/
int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod,
- struct lod_tgt_descs *ltd, bool for_ost)
+ struct lod_tgt_descs *ltd)
{
- unsigned int idx;
+ struct lu_tgt_desc *tgt;
if (ltd->ltd_tgts_size <= 0)
return 0;
lod_getref(ltd);
mutex_lock(<d->ltd_mutex);
- cfs_foreach_bit(ltd->ltd_tgt_bitmap, idx)
- __lod_del_device(env, lod, ltd, idx, for_ost);
+ ltd_foreach_tgt(ltd, tgt)
+ __lod_del_device(env, lod, ltd, tgt);
mutex_unlock(<d->ltd_mutex);
lod_putref(lod, ltd);
* \param[in] osp name of OSP device to be removed
* \param[in] idx index of the target
* \param[in] gen generation number, not used currently
- * \param[in] for_ost type of the target: 0 - MDT, 1 - OST
*
* \retval 0 if the device was scheduled for removal
* \retval -EINVAL if no device was found
*/
int lod_del_device(const struct lu_env *env, struct lod_device *lod,
- struct lod_tgt_descs *ltd, char *osp, unsigned idx,
- unsigned gen, bool for_ost)
+ struct lod_tgt_descs *ltd, char *osp, unsigned int idx,
+ unsigned int gen)
{
struct obd_device *obd;
- int rc = 0;
- struct obd_uuid uuid;
+ struct lu_tgt_desc *tgt;
+ struct obd_uuid uuid;
+ int rc = 0;
+
ENTRY;
CDEBUG(D_CONFIG, "osp:%s idx:%d gen:%d\n", osp, idx, gen);
lod_getref(ltd);
mutex_lock(<d->ltd_mutex);
+ tgt = LTD_TGT(ltd, idx);
/* check that the index is allocated in the bitmap */
- if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx) ||
- !LTD_TGT(ltd, idx)) {
+ if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx) || !tgt) {
CERROR("%s: device %d is not set up\n", obd->obd_name, idx);
GOTO(out, rc = -EINVAL);
}
/* check that the UUID matches */
- if (!obd_uuid_equals(&uuid, <D_TGT(ltd, idx)->ltd_uuid)) {
+ if (!obd_uuid_equals(&uuid, &tgt->ltd_uuid)) {
CERROR("%s: LOD target UUID %s at index %d does not match %s\n",
- obd->obd_name, obd_uuid2str(<D_TGT(ltd,idx)->ltd_uuid),
- idx, osp);
+ obd->obd_name, obd_uuid2str(&tgt->ltd_uuid), idx, osp);
GOTO(out, rc = -EINVAL);
}
- __lod_del_device(env, lod, ltd, idx, for_ost);
+ __lod_del_device(env, lod, ltd, tgt);
EXIT;
out:
mutex_unlock(<d->ltd_mutex);
return -EINVAL;
}
- if (unlikely(OST_TGT(md, idx)->ltd_ost == NULL)) {
+ if (unlikely(OST_TGT(md, idx)->ltd_tgt == NULL)) {
CERROR("%s: invalid lod device, for idx: %d\n",
lod2obd(md)->obd_name , idx);
return -EINVAL;
GOTO(out, rc);
}
- nd = &OST_TGT(md,idx)->ltd_ost->dd_lu_dev;
+ nd = &OST_TGT(md, idx)->ltd_tgt->dd_lu_dev;
lod_putref(md, &md->lod_ost_descs);
/* In the function below, .hs_keycmp resolves to
if (!is_from_disk && stripe_offset != LOV_OFFSET_DEFAULT &&
lov_pattern(le32_to_cpu(lum->lmm_pattern)) != LOV_PATTERN_MDT) {
/* if offset is not within valid range [0, osts_size) */
- if (stripe_offset >= d->lod_osts_size) {
+ if (stripe_offset >= d->lod_ost_descs.ltd_tgts_size) {
CDEBUG(D_LAYOUT, "stripe offset %u >= bitmap size %u\n",
- stripe_offset, d->lod_osts_size);
+ stripe_offset, d->lod_ost_descs.ltd_tgts_size);
GOTO(out, rc = -EINVAL);
}
int lod_verify_striping(struct lod_device *d, struct lod_object *lo,
const struct lu_buf *buf, bool is_from_disk)
{
- struct lov_desc *desc = &d->lod_desc;
+ struct lov_desc *desc = &d->lod_ost_descs.ltd_lov_desc;
struct lov_user_md_v1 *lum;
struct lov_comp_md_v1 *comp_v1;
struct lov_comp_md_entry_v1 *ent;
lod_fix_desc(desc);
desc->ld_active_tgt_count = 0;
- lod->lod_desc = *desc;
+ lod->lod_ost_descs.ltd_lov_desc = *desc;
lod->lod_sp_me = LUSTRE_SP_CLI;
- /* Set up allocation policy (QoS and RR) */
- INIT_LIST_HEAD(&lod->lod_qos.lq_svr_list);
- init_rwsem(&lod->lod_qos.lq_rw_sem);
- lod->lod_qos.lq_dirty = 1;
- lod->lod_qos.lq_reset = 1;
- /* Default priority is toward free space balance */
- lod->lod_qos.lq_prio_free = 232;
- /* Default threshold for rr (roughly 17%) */
- lod->lod_qos.lq_threshold_rr = 43;
-
- lu_qos_rr_init(&lod->lod_qos.lq_rr);
-
/* Set up OST pool environment */
lod->lod_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
HASH_POOLS_MAX_BITS,
INIT_LIST_HEAD(&lod->lod_pool_list);
lod->lod_pool_count = 0;
- rc = lod_ost_pool_init(&lod->lod_pool_info, 0);
+ rc = lod_ost_pool_init(&lod->lod_ost_descs.ltd_tgt_pool, 0);
if (rc)
GOTO(out_hash, rc);
- rc = lod_ost_pool_init(&lod->lod_qos.lq_rr.lqr_pool, 0);
+ rc = lod_ost_pool_init(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool, 0);
if (rc)
GOTO(out_pool_info, rc);
RETURN(0);
out_pool_info:
- lod_ost_pool_free(&lod->lod_pool_info);
+ lod_ost_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
out_hash:
cfs_hash_putref(lod->lod_pools_hash_body);
}
cfs_hash_putref(lod->lod_pools_hash_body);
- lod_ost_pool_free(&(lod->lod_qos.lq_rr.lqr_pool));
- lod_ost_pool_free(&lod->lod_pool_info);
+ lod_ost_pool_free(&(lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool));
+ lod_ost_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
RETURN(0);
}
if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
struct lov_user_md *lum = buf->lb_buf;
- struct lov_desc *desc = &dev->lod_desc;
+ struct lov_desc *desc = &dev->lod_ost_descs.ltd_lov_desc;
if (buf->lb_buf == NULL) {
rc = sizeof(*lum);
else if (lod_comp_inited(entry))
return entry->llc_stripe_count;
else if ((__u16)-1 == entry->llc_stripe_count)
- return lod->lod_desc.ld_tgt_count;
+ return lod->lod_ost_count;
else
return lod_get_stripe_count(lod, lo,
entry->llc_stripe_count, false);
{
struct lod_thread_info *info = lod_env_info(env);
struct lod_layout_component *comp_array, *lod_comp, *old_array;
- struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
+ struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
struct dt_object *next = dt_object_child(dt);
- struct lov_desc *desc = &d->lod_desc;
- struct lod_object *lo = lod_dt_obj(dt);
- struct lov_user_md_v3 *v3;
- struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
- __u32 magic;
- int i, rc, array_cnt, old_array_cnt;
+ struct lov_desc *desc = &d->lod_ost_descs.ltd_lov_desc;
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct lov_user_md_v3 *v3;
+ struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
+ __u32 magic;
+ int i, rc, array_cnt, old_array_cnt;
ENTRY;
LASSERT(lo->ldo_is_composite);
umode_t mode)
{
struct lod_device *d = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct lov_desc *desc = &d->lod_desc;
int i, rc;
if (lds->lds_def_striping_set && S_ISREG(mode)) {
+ struct lov_desc *desc = &d->lod_ost_descs.ltd_lov_desc;
+
rc = lod_alloc_comp_entries(lo, lds->lds_def_mirror_cnt,
lds->lds_def_comp_cnt);
if (rc != 0)
LASSERT(child);
if (ah->dah_append_stripes == -1)
- ah->dah_append_stripes = d->lod_desc.ld_tgt_count;
+ ah->dah_append_stripes =
+ d->lod_ost_descs.ltd_lov_desc.ld_tgt_count;
if (likely(parent)) {
nextp = dt_object_child(parent);
}
LASSERT(!lc->ldo_is_composite);
lod_comp = &lc->ldo_comp_entries[0];
- desc = &d->lod_desc;
+ desc = &d->lod_ost_descs.ltd_lov_desc;
lod_adjust_stripe_info(lod_comp, desc, ah->dah_append_stripes);
if (ah->dah_append_pool && ah->dah_append_pool[0])
lod_obj_set_pool(lc, 0, ah->dah_append_pool);
} else if (lo->ldo_dir_stripe_offset !=
ss->ss_node_id) {
struct lod_device *lod;
- struct lod_tgt_descs *ltd;
- struct lod_tgt_desc *tgt = NULL;
+ struct lu_tgt_desc *mdt = NULL;
bool found_mdt = false;
lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- ltd = &lod->lod_mdt_descs;
- ltd_foreach_tgt(ltd, tgt) {
- if (tgt->ltd_index ==
+ lod_foreach_mdt(lod, mdt) {
+ if (mdt->ltd_index ==
lo->ldo_dir_stripe_offset) {
found_mdt = true;
break;
break;
}
- rc = dt_statfs_info(env, ost->ltd_ost, sfs, &info);
+ rc = dt_statfs_info(env, ost->ltd_tgt, sfs, &info);
if (rc) {
CDEBUG(D_LAYOUT, "statfs failed for ost %d, error %d\n",
index, rc);
* \retval negative error number on failure
*/
#define POOL_INIT_COUNT 2
-int lod_ost_pool_init(struct ost_pool *op, unsigned int count)
+int lod_ost_pool_init(struct lu_tgt_pool *op, unsigned int count)
{
ENTRY;
* \retval 0 on success
* \retval negative error number on failure.
*/
-int lod_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
+int lod_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
{
__u32 *new;
__u32 new_size;
* \retval 0 if target could be added to the pool
* \retval negative error if target \a idx was not added
*/
-int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
+int lod_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
{
unsigned int i;
int rc = 0;
* \retval 0 on success
* \retval negative error number on failure
*/
-int lod_ost_pool_remove(struct ost_pool *op, __u32 idx)
+int lod_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx)
{
unsigned int i;
ENTRY;
*
* \retval 0 on success or if pool was already freed
*/
-int lod_ost_pool_free(struct ost_pool *op)
+int lod_ost_pool_free(struct lu_tgt_pool *op)
{
ENTRY;
*/
int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
{
- struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
- struct obd_uuid ost_uuid;
- struct pool_desc *pool;
- unsigned int idx;
- int rc = -EINVAL;
+ struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
+ struct obd_uuid ost_uuid;
+ struct pool_desc *pool;
+ struct lu_tgt_desc *tgt;
+ int rc = -EINVAL;
ENTRY;
pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
/* search ost in lod array */
lod_getref(&lod->lod_ost_descs);
- lod_foreach_ost(lod, idx) {
- if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) {
+ lod_foreach_ost(lod, tgt) {
+ if (obd_uuid_equals(&ost_uuid, &tgt->ltd_uuid)) {
rc = 0;
break;
}
if (rc)
GOTO(out, rc);
- rc = lod_ost_pool_add(&pool->pool_obds, idx, lod->lod_osts_size);
+ rc = lod_ost_pool_add(&pool->pool_obds, tgt->ltd_index,
+ lod->lod_ost_descs.ltd_tgts_size);
if (rc)
GOTO(out, rc);
*/
int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
{
- struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
- struct obd_uuid ost_uuid;
- struct pool_desc *pool;
- unsigned int idx;
- int rc = -EINVAL;
+ struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
+ struct lu_tgt_desc *ost;
+ struct obd_uuid ost_uuid;
+ struct pool_desc *pool;
+ int rc = -EINVAL;
ENTRY;
pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
obd_str2uuid(&ost_uuid, ostname);
lod_getref(&lod->lod_ost_descs);
- cfs_foreach_bit(lod->lod_ost_bitmap, idx) {
- if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) {
+ lod_foreach_ost(lod, ost) {
+ if (obd_uuid_equals(&ost_uuid, &ost->ltd_uuid)) {
rc = 0;
break;
}
if (rc)
GOTO(out, rc);
- lod_ost_pool_remove(&pool->pool_obds, idx);
+ lod_ost_pool_remove(&pool->pool_obds, ost->ltd_index);
pool->pool_rr.lqr_dirty = 1;
*
* \param[in] env execution environment for this thread
* \param[in] d LOD device
- * \param[in] index index of OST target to check
+ * \param[in] ltd target table
+ * \param[in] index target index
* \param[out] sfs buffer for statfs data
*
* \retval 0 if the target is good
*/
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
- int index, struct obd_statfs *sfs)
+ struct lu_tgt_descs *ltd, int index,
+ struct obd_statfs *sfs)
{
- struct lod_tgt_desc *ost;
- int rc;
+ struct lov_desc *desc = <d->ltd_lov_desc;
+ struct lu_tgt_desc *tgt = LTD_TGT(ltd, index);
+ int rc;
+
ENTRY;
LASSERT(d);
- ost = OST_TGT(d,index);
- LASSERT(ost);
+ LASSERT(tgt);
- rc = dt_statfs(env, ost->ltd_ost, sfs);
+ rc = dt_statfs(env, tgt->ltd_tgt, sfs);
if (rc == 0 && ((sfs->os_state & OS_STATE_ENOSPC) ||
(sfs->os_state & OS_STATE_ENOINO && sfs->os_fprecreated == 0)))
rc = -ENOBUFS;
/* check whether device has changed state (active, inactive) */
- if (rc != 0 && ost->ltd_active) {
+ if (rc != 0 && tgt->ltd_active) {
/* turned inactive? */
spin_lock(&d->lod_lock);
- if (ost->ltd_active) {
- ost->ltd_active = 0;
+ if (tgt->ltd_active) {
+ tgt->ltd_active = 0;
if (rc == -ENOTCONN)
- ost->ltd_connecting = 1;
+ tgt->ltd_connecting = 1;
- LASSERT(d->lod_desc.ld_active_tgt_count > 0);
- d->lod_desc.ld_active_tgt_count--;
- d->lod_qos.lq_dirty = 1;
- d->lod_qos.lq_rr.lqr_dirty = 1;
+ LASSERT(desc->ld_active_tgt_count > 0);
+ desc->ld_active_tgt_count--;
+ ltd->ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_rr.lqr_dirty = 1;
CDEBUG(D_CONFIG, "%s: turns inactive\n",
- ost->ltd_exp->exp_obd->obd_name);
+ tgt->ltd_exp->exp_obd->obd_name);
}
spin_unlock(&d->lod_lock);
- } else if (rc == 0 && ost->ltd_active == 0) {
+ } else if (rc == 0 && tgt->ltd_active == 0) {
/* turned active? */
- LASSERTF(d->lod_desc.ld_active_tgt_count < d->lod_ostnr,
- "active tgt count %d, ost nr %d\n",
- d->lod_desc.ld_active_tgt_count, d->lod_ostnr);
+ LASSERTF(desc->ld_active_tgt_count < desc->ld_tgt_count,
+ "active tgt count %d, tgt nr %d\n",
+ desc->ld_active_tgt_count, desc->ld_tgt_count);
spin_lock(&d->lod_lock);
- if (ost->ltd_active == 0) {
- ost->ltd_active = 1;
- ost->ltd_connecting = 0;
- d->lod_desc.ld_active_tgt_count++;
- d->lod_qos.lq_dirty = 1;
- d->lod_qos.lq_rr.lqr_dirty = 1;
+ if (tgt->ltd_active == 0) {
+ tgt->ltd_active = 1;
+ tgt->ltd_connecting = 0;
+ desc->ld_active_tgt_count++;
+ ltd->ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_rr.lqr_dirty = 1;
CDEBUG(D_CONFIG, "%s: turns active\n",
- ost->ltd_exp->exp_obd->obd_name);
+ tgt->ltd_exp->exp_obd->obd_name);
}
spin_unlock(&d->lod_lock);
}
void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod)
{
struct obd_device *obd = lod2obd(lod);
- struct ost_pool *osts = &(lod->lod_pool_info);
+ struct lu_tgt_pool *osts = &lod->lod_ost_descs.ltd_tgt_pool;
time64_t max_age;
unsigned int i;
u64 avail;
int idx;
ENTRY;
- max_age = ktime_get_seconds() - 2 * lod->lod_desc.ld_qos_maxage;
+ max_age = ktime_get_seconds() -
+ 2 * lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage;
if (obd->obd_osfs_age > max_age)
/* statfs data are quite recent, don't need to refresh it */
RETURN_EXIT;
- down_write(&lod->lod_qos.lq_rw_sem);
+ down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
if (obd->obd_osfs_age > max_age)
goto out;
for (i = 0; i < osts->op_count; i++) {
idx = osts->op_array[i];
avail = OST_TGT(lod,idx)->ltd_statfs.os_bavail;
- if (lod_statfs_and_check(env, lod, idx,
+ if (lod_statfs_and_check(env, lod, &lod->lod_ost_descs, idx,
&OST_TGT(lod, idx)->ltd_statfs))
continue;
if (OST_TGT(lod,idx)->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
- lod->lod_qos.lq_dirty = 1;
+ lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
}
obd->obd_osfs_age = ktime_get_seconds();
out:
- up_write(&lod->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
EXIT;
}
* \retval 0 on success
* \retval -ENOMEM fails to allocate the array
*/
-static int lod_qos_calc_rr(struct lod_device *lod, struct ost_pool *src_pool,
+static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_pool *src_pool,
struct lu_qos_rr *lqr)
{
struct lu_svr_qos *oss;
}
/* Do actual allocation. */
- down_write(&lod->lod_qos.lq_rw_sem);
+ down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
/*
* Check again. While we were sleeping on @lq_rw_sem something could
*/
if (!lqr->lqr_dirty) {
LASSERT(lqr->lqr_pool.op_size);
- up_write(&lod->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
RETURN(0);
}
lqr->lqr_pool.op_count = real_count;
rc = lod_ost_pool_extend(&lqr->lqr_pool, real_count);
if (rc) {
- up_write(&lod->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
RETURN(rc);
}
for (i = 0; i < lqr->lqr_pool.op_count; i++)
/* Place all the OSTs from 1 OSS at the same time. */
placed = 0;
- list_for_each_entry(oss, &lod->lod_qos.lq_svr_list, lsq_svr_list) {
+ list_for_each_entry(oss, &lod->lod_ost_descs.ltd_qos.lq_svr_list,
+ lsq_svr_list) {
int j = 0;
for (i = 0; i < lqr->lqr_pool.op_count; i++) {
continue;
ost = OST_TGT(lod,src_pool->op_array[i]);
- LASSERT(ost && ost->ltd_ost);
+ LASSERT(ost && ost->ltd_tgt);
if (ost->ltd_qos.ltq_svr != oss)
continue;
}
lqr->lqr_dirty = 0;
- up_write(&lod->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
if (placed != real_count) {
/* This should never happen */
ENTRY;
LASSERT(d);
- LASSERT(ost_idx < d->lod_osts_size);
+ LASSERT(ost_idx < d->lod_ost_descs.ltd_tgts_size);
ost = OST_TGT(d,ost_idx);
LASSERT(ost);
- LASSERT(ost->ltd_ost);
+ LASSERT(ost->ltd_tgt);
- nd = &ost->ltd_ost->dd_lu_dev;
+ nd = &ost->ltd_tgt->dd_lu_dev;
/*
* allocate anonymous object with zero fid, real fid
int rc;
ENTRY;
- rc = lod_statfs_and_check(env, lod, ost_idx, sfs);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost_idx, sfs);
if (rc)
RETURN(rc);
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct pool_desc *pool = NULL;
- struct ost_pool *osts;
+ struct lu_tgt_pool *osts;
struct lu_qos_rr *lqr;
- unsigned int i, array_idx;
+ unsigned int i, array_idx;
__u32 ost_start_idx_temp;
__u32 stripe_idx = 0;
__u32 stripe_count, stripe_count_min, ost_idx;
osts = &(pool->pool_obds);
lqr = &(pool->pool_rr);
} else {
- osts = &(m->lod_pool_info);
- lqr = &(m->lod_qos.lq_rr);
+ osts = &m->lod_ost_descs.ltd_tgt_pool;
+ lqr = &(m->lod_ost_descs.ltd_qos.lq_rr);
}
rc = lod_qos_calc_rr(m, osts, lqr);
if (rc)
GOTO(out, rc);
- down_read(&m->lod_qos.lq_rw_sem);
+ down_read(&m->lod_ost_descs.ltd_qos.lq_rw_sem);
spin_lock(&lqr->lqr_alloc);
if (--lqr->lqr_start_count <= 0) {
lqr->lqr_start_idx = prandom_u32_max(osts->op_count);
}
spin_unlock(&lqr->lqr_alloc);
- up_read(&m->lod_qos.lq_rw_sem);
+ up_read(&m->lod_ost_descs.ltd_qos.lq_rw_sem);
/* If there are enough OSTs, a component with overstriping requested
* will not actually end up overstriped. The comp should reflect this.
break;
}
- rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, ost_idx,
+ sfs);
if (rc < 0) /* this OSP doesn't feel well */
break;
unsigned int i, array_idx, ost_count;
int rc, stripe_num = 0;
int speed = 0;
- struct pool_desc *pool = NULL;
- struct ost_pool *osts;
+ struct pool_desc *pool = NULL;
+ struct lu_tgt_pool *osts;
int stripes_per_ost = 1;
bool overstriped = false;
ENTRY;
down_read(&pool_tgt_rw_sem(pool));
osts = &(pool->pool_obds);
} else {
- osts = &(m->lod_pool_info);
+ osts = &m->lod_ost_descs.ltd_tgt_pool;
}
ost_count = osts->op_count;
* start OST, then it can be skipped, otherwise skip it only
* if it is inactive/recovering/out-of-space." */
- rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, ost_idx,
+ sfs);
if (rc) {
/* this OSP doesn't feel well */
continue;
struct dt_object *o;
__u64 total_weight = 0;
struct pool_desc *pool = NULL;
- struct ost_pool *osts;
+ struct lu_tgt_pool *osts;
unsigned int i;
__u32 nfound, good_osts, stripe_count, stripe_count_min;
bool overstriped = false;
down_read(&pool_tgt_rw_sem(pool));
osts = &(pool->pool_obds);
} else {
- osts = &(lod->lod_pool_info);
+ osts = &lod->lod_ost_descs.ltd_tgt_pool;
}
/* Detect -EAGAIN early, before expensive lock is taken. */
- if (!lqos_is_usable(&lod->lod_qos, lod->lod_desc.ld_active_tgt_count))
+ if (!ltd_qos_is_usable(&lod->lod_ost_descs))
GOTO(out_nolock, rc = -EAGAIN);
if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
(lod_comp->llc_stripe_count - 1)/osts->op_count + 1;
/* Do actual allocation, use write lock here. */
- down_write(&lod->lod_qos.lq_rw_sem);
+ down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
/*
* Check again, while we were sleeping on @lq_rw_sem things could
* change.
*/
- if (!lqos_is_usable(&lod->lod_qos, lod->lod_desc.ld_active_tgt_count))
+ if (!ltd_qos_is_usable(&lod->lod_ost_descs))
GOTO(out, rc = -EAGAIN);
- rc = lqos_calc_penalties(&lod->lod_qos, &lod->lod_ost_descs,
- lod->lod_desc.ld_active_tgt_count,
- lod->lod_desc.ld_qos_maxage, false);
+ rc = ltd_qos_penalties_calc(&lod->lod_ost_descs);
if (rc)
GOTO(out, rc);
ost = OST_TGT(lod, osts->op_array[i]);
ost->ltd_qos.ltq_usable = 0;
- rc = lod_statfs_and_check(env, lod, osts->op_array[i], sfs);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs,
+ osts->op_array[i], sfs);
if (rc) {
/* this OSP doesn't feel well */
continue;
continue;
ost->ltd_qos.ltq_usable = 1;
- lqos_calc_weight(ost);
+ lu_tgt_qos_weight_calc(ost);
total_weight += ost->ltd_qos.ltq_weight;
good_osts++;
lod_qos_ost_in_use(env, nfound, idx);
stripe[nfound] = o;
ost_indices[nfound] = idx;
- lqos_recalc_weight(&lod->lod_qos, &lod->lod_ost_descs,
- ost,
- lod->lod_desc.ld_active_tgt_count,
- &total_weight);
+ ltd_qos_update(&lod->lod_ost_descs, ost, &total_weight);
nfound++;
rc = 0;
break;
}
/* makes sense to rebalance next time */
- lod->lod_qos.lq_dirty = 1;
- lod->lod_qos.lq_same_space = 0;
+ lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
+ lod->lod_ost_descs.ltd_qos.lq_same_space = 0;
rc = -EAGAIN;
}
lod_comp->llc_pattern &= ~LOV_PATTERN_OVERSTRIPING;
out:
- up_write(&lod->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
out_nolock:
if (pool != NULL) {
if (!stripe_count)
- stripe_count = lod->lod_desc.ld_default_stripe_count;
+ stripe_count =
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_count;
if (!stripe_count)
stripe_count = 1;
/* Overstriping allows more stripes than targets */
- if (stripe_count > lod->lod_desc.ld_active_tgt_count && !overstriping)
- stripe_count = lod->lod_desc.ld_active_tgt_count;
+ if (stripe_count >
+ lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count &&
+ !overstriping)
+ stripe_count =
+ lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count;
if (lo->ldo_is_composite) {
struct lod_layout_component *lod_comp;
{
struct lod_layout_component *lod_comp;
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
- struct lov_desc *desc = &d->lod_desc;
+ struct lov_desc *desc = &d->lod_ost_descs.ltd_lov_desc;
struct lov_user_md_v1 *v1 = NULL;
struct lov_user_md_v3 *v3 = NULL;
struct lov_comp_md_v1 *comp_v1 = NULL;
int lod_prepare_avoidance(const struct lu_env *env, struct lod_object *lo)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct lod_tgt_descs *ltds = &lod->lod_ost_descs;
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
struct cfs_bitmap *bitmap = NULL;
__u32 *new_oss = NULL;
- lag->lag_ost_avail = ltds->ltd_tgtnr;
+ lag->lag_ost_avail = lod->lod_ost_count;
/* reset OSS avoid guide array */
lag->lag_oaa_count = 0;
- if (lag->lag_oss_avoid_array && lag->lag_oaa_size < ltds->ltd_tgtnr) {
+ if (lag->lag_oss_avoid_array &&
+ lag->lag_oaa_size < lod->lod_ost_count) {
OBD_FREE(lag->lag_oss_avoid_array,
sizeof(__u32) * lag->lag_oaa_size);
lag->lag_oss_avoid_array = NULL;
/* init OST avoid guide bitmap */
if (lag->lag_ost_avoid_bitmap) {
- if (ltds->ltd_tgtnr <= lag->lag_ost_avoid_bitmap->size) {
+ if (lod->lod_ost_count <= lag->lag_ost_avoid_bitmap->size) {
CFS_RESET_BITMAP(lag->lag_ost_avoid_bitmap);
} else {
CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap);
}
if (!lag->lag_ost_avoid_bitmap) {
- bitmap = CFS_ALLOCATE_BITMAP(ltds->ltd_tgtnr);
+ bitmap = CFS_ALLOCATE_BITMAP(lod->lod_ost_count);
if (!bitmap)
return -ENOMEM;
}
* using OST count to allocate the array to store the OSS
* id.
*/
- OBD_ALLOC(new_oss, sizeof(*new_oss) * ltds->ltd_tgtnr);
+ OBD_ALLOC(new_oss, sizeof(*new_oss) * lod->lod_ost_count);
if (!new_oss) {
CFS_FREE_BITMAP(bitmap);
return -ENOMEM;
if (new_oss) {
lag->lag_oss_avoid_array = new_oss;
- lag->lag_oaa_size = ltds->ltd_tgtnr;
+ lag->lag_oaa_size = lod->lod_ost_count;
}
if (bitmap)
lag->lag_ost_avoid_bitmap = bitmap;
lod_getref(&d->lod_ost_descs);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripe_count %d\n",
- d->lod_desc.ld_tgt_count, stripe_len);
+ d->lod_ost_count, stripe_len);
if (lod_comp->llc_ostlist.op_array &&
lod_comp->llc_ostlist.op_count) {
/* no OST available */
/* XXX: should we be waiting a bit to prevent failures during
* cluster initialization? */
- if (d->lod_ostnr == 0)
+ if (!d->lod_ost_count)
RETURN(-EIO);
/*
LASSERT(dev != NULL);
lod = lu2lod_dev(dev->obd_lu_dev);
seq_printf(m, "%llu\n",
- lod->lod_desc.ld_default_stripe_size);
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_size);
return 0;
}
return -ERANGE;
lod_fix_desc_stripe_size(&val);
- lod->lod_desc.ld_default_stripe_size = val;
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_size = val;
return count;
}
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%lld\n", lod->lod_desc.ld_default_stripe_offset);
+ return sprintf(buf, "%lld\n",
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_offset);
}
/**
if (val < -1 || val > LOV_MAX_STRIPE_COUNT)
return -ERANGE;
- lod->lod_desc.ld_default_stripe_offset = val;
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_offset = val;
return count;
}
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%u\n", lod->lod_desc.ld_pattern);
+ return sprintf(buf, "%u\n", lod->lod_ost_descs.ltd_lov_desc.ld_pattern);
}
/**
return rc;
lod_fix_desc_pattern(&pattern);
- lod->lod_desc.ld_pattern = pattern;
+ lod->lod_ost_descs.ltd_lov_desc.ld_pattern = pattern;
return count;
}
struct dt_device *dt = container_of(kobj, struct dt_device,
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
+ struct lov_desc *desc = &lod->lod_ost_descs.ltd_lov_desc;
return sprintf(buf, "%d\n",
- (s16)(lod->lod_desc.ld_default_stripe_count + 1) - 1);
+ (s16)(desc->ld_default_stripe_count + 1) - 1);
}
/**
return -ERANGE;
lod_fix_desc_stripe_count(&stripe_count);
- lod->lod_desc.ld_default_stripe_count = stripe_count;
+ lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_count = stripe_count;
return count;
}
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%u\n", lod->lod_desc.ld_tgt_count);
+ return sprintf(buf, "%u\n", lod->lod_ost_count);
}
LUSTRE_RO_ATTR(numobd);
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%u\n", lod->lod_desc.ld_active_tgt_count);
+ return sprintf(buf, "%u\n",
+ lod->lod_ost_descs.ltd_lov_desc.ld_active_tgt_count);
}
LUSTRE_RO_ATTR(activeobd);
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%s\n", lod->lod_desc.ld_uuid.uuid);
+ return sprintf(buf, "%s\n",
+ lod->lod_ost_descs.ltd_lov_desc.ld_uuid.uuid);
}
LUSTRE_RO_ATTR(desc_uuid);
struct lod_device *lod = dt2lod_dev(dt);
return sprintf(buf, "%d%%\n",
- (lod->lod_qos.lq_prio_free * 100 + 255) >> 8);
+ (lod->lod_ost_descs.ltd_qos.lq_prio_free * 100 + 255) >>
+ 8);
}
/**
if (val > 100)
return -EINVAL;
- lod->lod_qos.lq_prio_free = (val << 8) / 100;
- lod->lod_qos.lq_dirty = 1;
- lod->lod_qos.lq_reset = 1;
+ lod->lod_ost_descs.ltd_qos.lq_prio_free = (val << 8) / 100;
+ lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
+ lod->lod_ost_descs.ltd_qos.lq_reset = 1;
return count;
}
LASSERT(dev != NULL);
lod = lu2lod_dev(dev->obd_lu_dev);
seq_printf(m, "%d%%\n",
- (lod->lod_qos.lq_threshold_rr * 100 + 255) >> 8);
+ (lod->lod_ost_descs.ltd_qos.lq_threshold_rr * 100 + 255) >>
+ 8);
return 0;
}
if (val > 100 || val < 0)
return -EINVAL;
- lod->lod_qos.lq_threshold_rr = (val << 8) / 100;
- lod->lod_qos.lq_dirty = 1;
+ lod->lod_ost_descs.ltd_qos.lq_threshold_rr = (val << 8) / 100;
+ lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
return count;
}
dd_kobj);
struct lod_device *lod = dt2lod_dev(dt);
- return sprintf(buf, "%u Sec\n", lod->lod_desc.ld_qos_maxage);
+ return sprintf(buf, "%u Sec\n",
+ lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage);
}
/**
struct lu_device *next;
struct lustre_cfg *lcfg;
char str[32];
- unsigned int i;
+ struct lu_tgt_desc *tgt;
int rc;
u32 val;
if (val <= 0)
return -EINVAL;
- lod->lod_desc.ld_qos_maxage = val;
+ lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage = val;
/*
* propogate the value down to OSPs
lustre_cfg_init(lcfg, LCFG_PARAM, &bufs);
lod_getref(&lod->lod_ost_descs);
- lod_foreach_ost(lod, i) {
- next = &OST_TGT(lod,i)->ltd_ost->dd_lu_dev;
+ lod_foreach_ost(lod, tgt) {
+ next = &tgt->ltd_tgt->dd_lu_dev;
rc = next->ld_ops->ldo_process_config(NULL, next, lcfg);
if (rc)
- CERROR("can't set maxage on #%d: %d\n", i, rc);
+ CERROR("can't set maxage on #%d: %d\n",
+ tgt->ltd_index, rc);
}
lod_putref(lod, &lod->lod_ost_descs);
OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
static int lod_osts_seq_show(struct seq_file *p, void *v)
{
struct obd_device *obd = p->private;
- struct lod_ost_desc *ost_desc = v;
+ struct lu_tgt_desc *ost_desc = v;
struct lod_device *lod;
int idx, rc, active;
struct dt_device *next;
lod = lu2lod_dev(obd->obd_lu_dev);
idx = ost_desc->ltd_index;
- next = OST_TGT(lod,idx)->ltd_ost;
+ next = OST_TGT(lod, idx)->ltd_tgt;
if (next == NULL)
return -EINVAL;
struct pool_desc {
char pool_name[LOV_MAXPOOLNAME + 1];
- struct ost_pool pool_obds;
+ struct lu_tgt_pool pool_obds;
atomic_t pool_refcount;
struct hlist_node pool_hash; /* access by poolname */
struct list_head pool_list; /* serial access */
/* pools */
extern struct cfs_hash_ops pool_hash_operations;
-/* ost_pool methods */
-int lov_ost_pool_init(struct ost_pool *op, unsigned int count);
-int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count);
-int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
-int lov_ost_pool_remove(struct ost_pool *op, __u32 idx);
-int lov_ost_pool_free(struct ost_pool *op);
+/* lu_tgt_pool methods */
+int lov_ost_pool_init(struct lu_tgt_pool *op, unsigned int count);
+int lov_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
+int lov_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count);
+int lov_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx);
+int lov_ost_pool_free(struct lu_tgt_pool *op);
/* high level pool methods */
int lov_pool_new(struct obd_device *obd, char *poolname);
}
#define LOV_POOL_INIT_COUNT 2
-int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
+int lov_ost_pool_init(struct lu_tgt_pool *op, unsigned int count)
{
ENTRY;
}
/* Caller must hold write op_rwlock */
-int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
+int lov_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
{
__u32 *new;
__u32 new_size;
return 0;
}
-int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
+int lov_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
{
int rc = 0, i;
ENTRY;
return rc;
}
-int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
+int lov_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx)
{
int i;
ENTRY;
RETURN(-EINVAL);
}
-int lov_ost_pool_free(struct ost_pool *op)
+int lov_ost_pool_free(struct lu_tgt_pool *op)
{
ENTRY;
obdclass-all-objs += linkea.o
obdclass-all-objs += kernelcomm.o jobid.o
obdclass-all-objs += integrity.o obd_cksum.o
-obdclass-all-objs += lu_qos.o
obdclass-all-objs += lu_tgt_descs.o
@SERVER_TRUE@obdclass-all-objs += acl.o
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- *
- * lustre/obdclass/lu_qos.c
- *
- * Lustre QoS.
- * These are the only exported functions, they provide some generic
- * infrastructure for object allocation QoS
- *
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/random.h>
-#include <libcfs/libcfs.h>
-#include <libcfs/libcfs_hash.h> /* hash_long() */
-#include <libcfs/linux/linux-mem.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <lustre_disk.h>
-#include <lustre_fid.h>
-#include <lu_object.h>
-
-void lu_qos_rr_init(struct lu_qos_rr *lqr)
-{
- spin_lock_init(&lqr->lqr_alloc);
- lqr->lqr_dirty = 1;
-}
-EXPORT_SYMBOL(lu_qos_rr_init);
-
-/**
- * Add a new target to Quality of Service (QoS) target table.
- *
- * Add a new MDT/OST target to the structure representing an OSS. Resort the
- * list of known MDSs/OSSs by the number of MDTs/OSTs attached to each MDS/OSS.
- * The MDS/OSS list is protected internally and no external locking is required.
- *
- * \param[in] qos lu_qos data
- * \param[in] ltd target description
- *
- * \retval 0 on success
- * \retval -ENOMEM on error
- */
-int lqos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd)
-{
- struct lu_svr_qos *svr = NULL;
- struct lu_svr_qos *tempsvr;
- struct obd_export *exp = ltd->ltd_exp;
- int found = 0;
- __u32 id = 0;
- int rc = 0;
-
- ENTRY;
-
- down_write(&qos->lq_rw_sem);
- /*
- * a bit hacky approach to learn NID of corresponding connection
- * but there is no official API to access information like this
- * with OSD API.
- */
- list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
- if (obd_uuid_equals(&svr->lsq_uuid,
- &exp->exp_connection->c_remote_uuid)) {
- found++;
- break;
- }
- if (svr->lsq_id > id)
- id = svr->lsq_id;
- }
-
- if (!found) {
- OBD_ALLOC_PTR(svr);
- if (!svr)
- GOTO(out, rc = -ENOMEM);
- memcpy(&svr->lsq_uuid, &exp->exp_connection->c_remote_uuid,
- sizeof(svr->lsq_uuid));
- ++id;
- svr->lsq_id = id;
- } else {
- /* Assume we have to move this one */
- list_del(&svr->lsq_svr_list);
- }
-
- svr->lsq_tgt_count++;
- ltd->ltd_qos.ltq_svr = svr;
-
- CDEBUG(D_OTHER, "add tgt %s to server %s (%d targets)\n",
- obd_uuid2str(<d->ltd_uuid), obd_uuid2str(&svr->lsq_uuid),
- svr->lsq_tgt_count);
-
- /*
- * Add sorted by # of tgts. Find the first entry that we're
- * bigger than...
- */
- list_for_each_entry(tempsvr, &qos->lq_svr_list, lsq_svr_list) {
- if (svr->lsq_tgt_count > tempsvr->lsq_tgt_count)
- break;
- }
- /*
- * ...and add before it. If we're the first or smallest, tempsvr
- * points to the list head, and we add to the end.
- */
- list_add_tail(&svr->lsq_svr_list, &tempsvr->lsq_svr_list);
-
- qos->lq_dirty = 1;
- qos->lq_rr.lqr_dirty = 1;
-
-out:
- up_write(&qos->lq_rw_sem);
- RETURN(rc);
-}
-EXPORT_SYMBOL(lqos_add_tgt);
-
-/**
- * Remove MDT/OST target from QoS table.
- *
- * Removes given MDT/OST target from QoS table and releases related
- * MDS/OSS structure if no target remain on the MDS/OSS.
- *
- * \param[in] qos lu_qos data
- * \param[in] ltd target description
- *
- * \retval 0 on success
- * \retval -ENOENT if no server was found
- */
-int lqos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd)
-{
- struct lu_svr_qos *svr;
- int rc = 0;
-
- ENTRY;
-
- down_write(&qos->lq_rw_sem);
- svr = ltd->ltd_qos.ltq_svr;
- if (!svr)
- GOTO(out, rc = -ENOENT);
-
- svr->lsq_tgt_count--;
- if (svr->lsq_tgt_count == 0) {
- CDEBUG(D_OTHER, "removing server %s\n",
- obd_uuid2str(&svr->lsq_uuid));
- list_del(&svr->lsq_svr_list);
- ltd->ltd_qos.ltq_svr = NULL;
- OBD_FREE_PTR(svr);
- }
-
- qos->lq_dirty = 1;
- qos->lq_rr.lqr_dirty = 1;
-out:
- up_write(&qos->lq_rw_sem);
- RETURN(rc);
-}
-EXPORT_SYMBOL(lqos_del_tgt);
-
-/**
- * lu_prandom_u64_max - returns a pseudo-random u64 number in interval
- * [0, ep_ro)
- *
- * \param[in] ep_ro right open interval endpoint
- *
- * \retval a pseudo-random 64-bit number that is in interval [0, ep_ro).
- */
-u64 lu_prandom_u64_max(u64 ep_ro)
-{
- u64 rand = 0;
-
- if (ep_ro) {
-#if BITS_PER_LONG == 32
- /*
- * If ep_ro > 32-bit, first generate the high
- * 32 bits of the random number, then add in the low
- * 32 bits (truncated to the upper limit, if needed)
- */
- if (ep_ro > 0xffffffffULL)
- rand = prandom_u32_max((u32)(ep_ro >> 32)) << 32;
-
- if (rand == (ep_ro & 0xffffffff00000000ULL))
- rand |= prandom_u32_max((u32)ep_ro);
- else
- rand |= prandom_u32();
-#else
- rand = ((u64)prandom_u32() << 32 | prandom_u32()) % ep_ro;
-#endif
- }
-
- return rand;
-}
-EXPORT_SYMBOL(lu_prandom_u64_max);
-
-static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt)
-{
- struct obd_statfs *statfs = &tgt->ltd_statfs;
-
- return statfs->os_bavail * statfs->os_bsize;
-}
-
-static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt)
-{
- return tgt->ltd_statfs.os_ffree;
-}
-
-/**
- * Calculate penalties per-tgt and per-server
- *
- * Re-calculate penalties when the configuration changes, active targets
- * change and after statfs refresh (all these are reflected by lq_dirty flag).
- * On every tgt and server: decay the penalty by half for every 8x the update
- * interval that the device has been idle. That gives lots of time for the
- * statfs information to be updated (which the penalty is only a proxy for),
- * and avoids penalizing server/tgt under light load.
- * See lqos_calc_weight() for how penalties are factored into the weight.
- *
- * \param[in] qos lu_qos
- * \param[in] ltd lu_tgt_descs
- * \param[in] active_tgt_nr active tgt number
- * \param[in] maxage qos max age
- * \param[in] is_mdt MDT will count inode usage
- *
- * \retval 0 on success
- * \retval -EAGAIN the number of tgt isn't enough or all tgt spaces are
- * almost the same
- */
-int lqos_calc_penalties(struct lu_qos *qos, struct lu_tgt_descs *ltd,
- __u32 active_tgt_nr, __u32 maxage, bool is_mdt)
-{
- struct lu_tgt_desc *tgt;
- struct lu_svr_qos *svr;
- __u64 ba_max, ba_min, ba;
- __u64 ia_max, ia_min, ia = 1;
- __u32 num_active;
- int prio_wide;
- time64_t now, age;
- int rc;
-
- ENTRY;
-
- if (!qos->lq_dirty)
- GOTO(out, rc = 0);
-
- num_active = active_tgt_nr - 1;
- if (num_active < 1)
- GOTO(out, rc = -EAGAIN);
-
- /* find bavail on each server */
- list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
- svr->lsq_bavail = 0;
- /* if inode is not counted, set to 1 to ignore */
- svr->lsq_iavail = is_mdt ? 0 : 1;
- }
- qos->lq_active_svr_count = 0;
-
- /*
- * How badly user wants to select targets "widely" (not recently chosen
- * and not on recent MDS's). As opposed to "freely" (free space avail.)
- * 0-256
- */
- prio_wide = 256 - qos->lq_prio_free;
-
- ba_min = (__u64)(-1);
- ba_max = 0;
- ia_min = (__u64)(-1);
- ia_max = 0;
- now = ktime_get_real_seconds();
-
- /* Calculate server penalty per object */
- ltd_foreach_tgt(ltd, tgt) {
- if (!tgt->ltd_active)
- continue;
-
- /* when inode is counted, bavail >> 16 to avoid overflow */
- ba = tgt_statfs_bavail(tgt);
- if (is_mdt)
- ba >>= 16;
- else
- ba >>= 8;
- if (!ba)
- continue;
-
- ba_min = min(ba, ba_min);
- ba_max = max(ba, ba_max);
-
- /* Count the number of usable servers */
- if (tgt->ltd_qos.ltq_svr->lsq_bavail == 0)
- qos->lq_active_svr_count++;
- tgt->ltd_qos.ltq_svr->lsq_bavail += ba;
-
- if (is_mdt) {
- /* iavail >> 8 to avoid overflow */
- ia = tgt_statfs_iavail(tgt) >> 8;
- if (!ia)
- continue;
-
- ia_min = min(ia, ia_min);
- ia_max = max(ia, ia_max);
-
- tgt->ltd_qos.ltq_svr->lsq_iavail += ia;
- }
-
- /*
- * per-tgt penalty is
- * prio * bavail * iavail / (num_tgt - 1) / 2
- */
- tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 8;
- do_div(tgt->ltd_qos.ltq_penalty_per_obj, num_active);
- tgt->ltd_qos.ltq_penalty_per_obj >>= 1;
-
- age = (now - tgt->ltd_qos.ltq_used) >> 3;
- if (qos->lq_reset || age > 32 * maxage)
- tgt->ltd_qos.ltq_penalty = 0;
- else if (age > maxage)
- /* Decay tgt penalty. */
- tgt->ltd_qos.ltq_penalty >>= (age / maxage);
- }
-
- num_active = qos->lq_active_svr_count - 1;
- if (num_active < 1) {
- /*
- * If there's only 1 server, we can't penalize it, so instead
- * we have to double the tgt penalty
- */
- num_active = 1;
- ltd_foreach_tgt(ltd, tgt) {
- if (!tgt->ltd_active)
- continue;
-
- tgt->ltd_qos.ltq_penalty_per_obj <<= 1;
- }
- }
-
- /*
- * Per-server penalty is
- * prio * bavail * iavail / server_tgts / (num_svr - 1) / 2
- */
- list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
- ba = svr->lsq_bavail;
- ia = svr->lsq_iavail;
- svr->lsq_penalty_per_obj = prio_wide * ba * ia >> 8;
- do_div(ba, svr->lsq_tgt_count * num_active);
- svr->lsq_penalty_per_obj >>= 1;
-
- age = (now - svr->lsq_used) >> 3;
- if (qos->lq_reset || age > 32 * maxage)
- svr->lsq_penalty = 0;
- else if (age > maxage)
- /* Decay server penalty. */
- svr->lsq_penalty >>= age / maxage;
- }
-
- qos->lq_dirty = 0;
- qos->lq_reset = 0;
-
- /*
- * If each tgt has almost same free space, do rr allocation for better
- * creation performance
- */
- qos->lq_same_space = 0;
- if ((ba_max * (256 - qos->lq_threshold_rr)) >> 8 < ba_min &&
- (ia_max * (256 - qos->lq_threshold_rr)) >> 8 < ia_min) {
- qos->lq_same_space = 1;
- /* Reset weights for the next time we enter qos mode */
- qos->lq_reset = 1;
- }
- rc = 0;
-
-out:
- if (!rc && qos->lq_same_space)
- RETURN(-EAGAIN);
-
- RETURN(rc);
-}
-EXPORT_SYMBOL(lqos_calc_penalties);
-
-bool lqos_is_usable(struct lu_qos *qos, __u32 active_tgt_nr)
-{
- if (!qos->lq_dirty && qos->lq_same_space)
- return false;
-
- if (active_tgt_nr < 2)
- return false;
-
- return true;
-}
-EXPORT_SYMBOL(lqos_is_usable);
-
-/**
- * Calculate weight for a given tgt.
- *
- * The final tgt weight is bavail >> 16 * iavail >> 8 minus the tgt and server
- * penalties. See lqos_calc_ppts() for how penalties are calculated.
- *
- * \param[in] tgt target descriptor
- */
-void lqos_calc_weight(struct lu_tgt_desc *tgt)
-{
- struct lu_tgt_qos *ltq = &tgt->ltd_qos;
- __u64 temp, temp2;
-
- temp = (tgt_statfs_bavail(tgt) >> 16) * (tgt_statfs_iavail(tgt) >> 8);
- temp2 = ltq->ltq_penalty + ltq->ltq_svr->lsq_penalty;
- if (temp < temp2)
- ltq->ltq_weight = 0;
- else
- ltq->ltq_weight = temp - temp2;
-}
-EXPORT_SYMBOL(lqos_calc_weight);
-
-/**
- * Re-calculate weights.
- *
- * The function is called when some target was used for a new object. In
- * this case we should re-calculate all the weights to keep new allocations
- * balanced well.
- *
- * \param[in] qos lu_qos
- * \param[in] ltd lu_tgt_descs
- * \param[in] tgt target where a new object was placed
- * \param[in] active_tgt_nr active tgt number
- * \param[out] total_wt new total weight for the pool
- *
- * \retval 0
- */
-int lqos_recalc_weight(struct lu_qos *qos, struct lu_tgt_descs *ltd,
- struct lu_tgt_desc *tgt, __u32 active_tgt_nr,
- __u64 *total_wt)
-{
- struct lu_tgt_qos *ltq;
- struct lu_svr_qos *svr;
-
- ENTRY;
-
- ltq = &tgt->ltd_qos;
- LASSERT(ltq);
-
- /* Don't allocate on this device anymore, until the next alloc_qos */
- ltq->ltq_usable = 0;
-
- svr = ltq->ltq_svr;
-
- /*
- * Decay old penalty by half (we're adding max penalty, and don't
- * want it to run away.)
- */
- ltq->ltq_penalty >>= 1;
- svr->lsq_penalty >>= 1;
-
- /* mark the server and tgt as recently used */
- ltq->ltq_used = svr->lsq_used = ktime_get_real_seconds();
-
- /* Set max penalties for this tgt and server */
- ltq->ltq_penalty += ltq->ltq_penalty_per_obj * active_tgt_nr;
- svr->lsq_penalty += svr->lsq_penalty_per_obj * active_tgt_nr;
-
- /* Decrease all MDS penalties */
- list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
- if (svr->lsq_penalty < svr->lsq_penalty_per_obj)
- svr->lsq_penalty = 0;
- else
- svr->lsq_penalty -= svr->lsq_penalty_per_obj;
- }
-
- *total_wt = 0;
- /* Decrease all tgt penalties */
- ltd_foreach_tgt(ltd, tgt) {
- if (!tgt->ltd_active)
- continue;
-
- if (ltq->ltq_penalty < ltq->ltq_penalty_per_obj)
- ltq->ltq_penalty = 0;
- else
- ltq->ltq_penalty -= ltq->ltq_penalty_per_obj;
-
- lqos_calc_weight(tgt);
-
- /* Recalc the total weight of usable osts */
- if (ltq->ltq_usable)
- *total_wt += ltq->ltq_weight;
-
- CDEBUG(D_OTHER, "recalc tgt %d usable=%d avail=%llu"
- " tgtppo=%llu tgtp=%llu svrppo=%llu"
- " svrp=%llu wt=%llu\n",
- tgt->ltd_index, ltq->ltq_usable,
- tgt_statfs_bavail(tgt) >> 10,
- ltq->ltq_penalty_per_obj >> 10,
- ltq->ltq_penalty >> 10,
- ltq->ltq_svr->lsq_penalty_per_obj >> 10,
- ltq->ltq_svr->lsq_penalty >> 10,
- ltq->ltq_weight >> 10);
- }
-
- RETURN(0);
-}
-EXPORT_SYMBOL(lqos_recalc_weight);
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/random.h>
#include <libcfs/libcfs.h>
#include <libcfs/libcfs_hash.h> /* hash_long() */
#include <libcfs/linux/linux-mem.h>
#include <lu_object.h>
/**
+ * lu_prandom_u64_max - returns a pseudo-random u64 number in interval
+ * [0, ep_ro)
+ *
+ * \param[in] ep_ro right open interval endpoint
+ *
+ * \retval a pseudo-random 64-bit number that is in interval [0, ep_ro).
+ */
+u64 lu_prandom_u64_max(u64 ep_ro)
+{
+ u64 rand = 0;
+
+ if (ep_ro) {
+#if BITS_PER_LONG == 32
+ /*
+ * If ep_ro > 32-bit, first generate the high
+ * 32 bits of the random number, then add in the low
+ * 32 bits (truncated to the upper limit, if needed)
+ */
+ if (ep_ro > 0xffffffffULL)
+ rand = prandom_u32_max((u32)(ep_ro >> 32)) << 32;
+
+ if (rand == (ep_ro & 0xffffffff00000000ULL))
+ rand |= prandom_u32_max((u32)ep_ro);
+ else
+ rand |= prandom_u32();
+#else
+ rand = ((u64)prandom_u32() << 32 | prandom_u32()) % ep_ro;
+#endif
+ }
+
+ return rand;
+}
+EXPORT_SYMBOL(lu_prandom_u64_max);
+
+void lu_qos_rr_init(struct lu_qos_rr *lqr)
+{
+ spin_lock_init(&lqr->lqr_alloc);
+ lqr->lqr_dirty = 1;
+}
+EXPORT_SYMBOL(lu_qos_rr_init);
+
+/**
+ * Add a new target to Quality of Service (QoS) target table.
+ *
+ * Add a new MDT/OST target to the structure representing an OSS. Resort the
+ * list of known MDSs/OSSs by the number of MDTs/OSTs attached to each MDS/OSS.
+ * The MDS/OSS list is protected internally and no external locking is required.
+ *
+ * \param[in] qos lu_qos data
+ * \param[in] tgt target description
+ *
+ * \retval 0 on success
+ * \retval -ENOMEM on error
+ */
+int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *tgt)
+{
+ struct lu_svr_qos *svr = NULL;
+ struct lu_svr_qos *tempsvr;
+ struct obd_export *exp = tgt->ltd_exp;
+ int found = 0;
+ __u32 id = 0;
+ int rc = 0;
+
+ ENTRY;
+
+ /* tgt not connected, this function will be called again later */
+ if (!exp)
+ RETURN(0);
+
+ down_write(&qos->lq_rw_sem);
+ /*
+ * a bit hacky approach to learn NID of corresponding connection
+ * but there is no official API to access information like this
+ * with OSD API.
+ */
+ list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
+ if (obd_uuid_equals(&svr->lsq_uuid,
+ &exp->exp_connection->c_remote_uuid)) {
+ found++;
+ break;
+ }
+ if (svr->lsq_id > id)
+ id = svr->lsq_id;
+ }
+
+ if (!found) {
+ OBD_ALLOC_PTR(svr);
+ if (!svr)
+ GOTO(out, rc = -ENOMEM);
+ memcpy(&svr->lsq_uuid, &exp->exp_connection->c_remote_uuid,
+ sizeof(svr->lsq_uuid));
+ ++id;
+ svr->lsq_id = id;
+ } else {
+ /* Assume we have to move this one */
+ list_del(&svr->lsq_svr_list);
+ }
+
+ svr->lsq_tgt_count++;
+ tgt->ltd_qos.ltq_svr = svr;
+
+ CDEBUG(D_OTHER, "add tgt %s to server %s (%d targets)\n",
+ obd_uuid2str(&tgt->ltd_uuid), obd_uuid2str(&svr->lsq_uuid),
+ svr->lsq_tgt_count);
+
+ /*
+ * Add sorted by # of tgts. Find the first entry that we're
+ * bigger than...
+ */
+ list_for_each_entry(tempsvr, &qos->lq_svr_list, lsq_svr_list) {
+ if (svr->lsq_tgt_count > tempsvr->lsq_tgt_count)
+ break;
+ }
+ /*
+ * ...and add before it. If we're the first or smallest, tempsvr
+ * points to the list head, and we add to the end.
+ */
+ list_add_tail(&svr->lsq_svr_list, &tempsvr->lsq_svr_list);
+
+ qos->lq_dirty = 1;
+ qos->lq_rr.lqr_dirty = 1;
+
+out:
+ up_write(&qos->lq_rw_sem);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lu_qos_add_tgt);
+
+/**
+ * Remove MDT/OST target from QoS table.
+ *
+ * Removes given MDT/OST target from QoS table and releases related
+ * MDS/OSS structure if no target remain on the MDS/OSS.
+ *
+ * \param[in] qos lu_qos data
+ * \param[in] ltd target description
+ *
+ * \retval 0 on success
+ * \retval -ENOENT if no server was found
+ */
+static int lu_qos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd)
+{
+ struct lu_svr_qos *svr;
+ int rc = 0;
+
+ ENTRY;
+
+ down_write(&qos->lq_rw_sem);
+ svr = ltd->ltd_qos.ltq_svr;
+ if (!svr)
+ GOTO(out, rc = -ENOENT);
+
+ svr->lsq_tgt_count--;
+ if (svr->lsq_tgt_count == 0) {
+ CDEBUG(D_OTHER, "removing server %s\n",
+ obd_uuid2str(&svr->lsq_uuid));
+ list_del(&svr->lsq_svr_list);
+ ltd->ltd_qos.ltq_svr = NULL;
+ OBD_FREE_PTR(svr);
+ }
+
+ qos->lq_dirty = 1;
+ qos->lq_rr.lqr_dirty = 1;
+out:
+ up_write(&qos->lq_rw_sem);
+ RETURN(rc);
+}
+
+static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt)
+{
+ struct obd_statfs *statfs = &tgt->ltd_statfs;
+
+ return statfs->os_bavail * statfs->os_bsize;
+}
+
+static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt)
+{
+ return tgt->ltd_statfs.os_ffree;
+}
+
+/**
+ * Calculate weight for a given tgt.
+ *
+ * The final tgt weight is bavail >> 16 * iavail >> 8 minus the tgt and server
+ * penalties. See ltd_qos_penalties_calc() for how penalties are calculated.
+ *
+ * \param[in] tgt target descriptor
+ */
+void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt)
+{
+ struct lu_tgt_qos *ltq = &tgt->ltd_qos;
+ __u64 temp, temp2;
+
+ temp = (tgt_statfs_bavail(tgt) >> 16) * (tgt_statfs_iavail(tgt) >> 8);
+ temp2 = ltq->ltq_penalty + ltq->ltq_svr->lsq_penalty;
+ if (temp < temp2)
+ ltq->ltq_weight = 0;
+ else
+ ltq->ltq_weight = temp - temp2;
+}
+EXPORT_SYMBOL(lu_tgt_qos_weight_calc);
+
+/**
* Allocate and initialize target table.
*
* A helper function to initialize the target table and allocate
* a bitmap of the available targets.
*
* \param[in] ltd target's table to initialize
+ * \param[in] is_mdt target table for MDTs
*
* \retval 0 on success
* \retval negative negated errno on error
**/
-int lu_tgt_descs_init(struct lu_tgt_descs *ltd)
+int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt)
{
mutex_init(<d->ltd_mutex);
init_rwsem(<d->ltd_rw_sem);
return -ENOMEM;
ltd->ltd_tgts_size = BITS_PER_LONG;
- ltd->ltd_tgtnr = 0;
-
ltd->ltd_death_row = 0;
ltd->ltd_refcount = 0;
+ /* Set up allocation policy (QoS and RR) */
+ INIT_LIST_HEAD(<d->ltd_qos.lq_svr_list);
+ init_rwsem(<d->ltd_qos.lq_rw_sem);
+ ltd->ltd_qos.lq_dirty = 1;
+ ltd->ltd_qos.lq_reset = 1;
+ /* Default priority is toward free space balance */
+ ltd->ltd_qos.lq_prio_free = 232;
+ /* Default threshold for rr (roughly 17%) */
+ ltd->ltd_qos.lq_threshold_rr = 43;
+ ltd->ltd_is_mdt = is_mdt;
+
+ lu_qos_rr_init(<d->ltd_qos.lq_rr);
+
return 0;
}
EXPORT_SYMBOL(lu_tgt_descs_init);
* \retval -ENOMEM if reallocation failed
* -EEXIST if target existed
*/
-int lu_tgt_descs_add(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
+int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
{
__u32 index = tgt->ltd_index;
int rc;
LTD_TGT(ltd, tgt->ltd_index) = tgt;
cfs_bitmap_set(ltd->ltd_tgt_bitmap, tgt->ltd_index);
- ltd->ltd_tgtnr++;
+
+ ltd->ltd_lov_desc.ld_tgt_count++;
+ if (tgt->ltd_active)
+ ltd->ltd_lov_desc.ld_active_tgt_count++;
RETURN(0);
}
-EXPORT_SYMBOL(lu_tgt_descs_add);
+EXPORT_SYMBOL(ltd_add_tgt);
/**
* Delete target from target table
*/
-void lu_tgt_descs_del(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
+void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
{
+ lu_qos_del_tgt(<d->ltd_qos, tgt);
LTD_TGT(ltd, tgt->ltd_index) = NULL;
cfs_bitmap_clear(ltd->ltd_tgt_bitmap, tgt->ltd_index);
- ltd->ltd_tgtnr--;
+ ltd->ltd_lov_desc.ld_tgt_count--;
+ if (tgt->ltd_active)
+ ltd->ltd_lov_desc.ld_active_tgt_count--;
+}
+EXPORT_SYMBOL(ltd_del_tgt);
+
+/**
+ * Whether QoS data is up-to-date and QoS can be applied.
+ */
+bool ltd_qos_is_usable(struct lu_tgt_descs *ltd)
+{
+ if (!ltd->ltd_qos.lq_dirty && ltd->ltd_qos.lq_same_space)
+ return false;
+
+ if (ltd->ltd_lov_desc.ld_active_tgt_count < 2)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL(ltd_qos_is_usable);
+
+/**
+ * Calculate penalties per-tgt and per-server
+ *
+ * Re-calculate penalties when the configuration changes, active targets
+ * change and after statfs refresh (all these are reflected by lq_dirty flag).
+ * On every tgt and server: decay the penalty by half for every 8x the update
+ * interval that the device has been idle. That gives lots of time for the
+ * statfs information to be updated (which the penalty is only a proxy for),
+ * and avoids penalizing server/tgt under light load.
+ * See lu_qos_tgt_weight_calc() for how penalties are factored into the weight.
+ *
+ * \param[in] ltd lu_tgt_descs
+ *
+ * \retval 0 on success
+ * \retval -EAGAIN the number of tgt isn't enough or all tgt spaces are
+ * almost the same
+ */
+int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
+{
+ struct lu_qos *qos = <d->ltd_qos;
+ struct lov_desc *desc = <d->ltd_lov_desc;
+ struct lu_tgt_desc *tgt;
+ struct lu_svr_qos *svr;
+ __u64 ba_max, ba_min, ba;
+ __u64 ia_max, ia_min, ia = 1;
+ __u32 num_active;
+ int prio_wide;
+ time64_t now, age;
+ int rc;
+
+ ENTRY;
+
+ if (!qos->lq_dirty)
+ GOTO(out, rc = 0);
+
+ num_active = desc->ld_active_tgt_count - 1;
+ if (num_active < 1)
+ GOTO(out, rc = -EAGAIN);
+
+ /* find bavail on each server */
+ list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
+ svr->lsq_bavail = 0;
+ /* if inode is not counted, set to 1 to ignore */
+ svr->lsq_iavail = ltd->ltd_is_mdt ? 0 : 1;
+ }
+ qos->lq_active_svr_count = 0;
+
+ /*
+ * How badly user wants to select targets "widely" (not recently chosen
+ * and not on recent MDS's). As opposed to "freely" (free space avail.)
+ * 0-256
+ */
+ prio_wide = 256 - qos->lq_prio_free;
+
+ ba_min = (__u64)(-1);
+ ba_max = 0;
+ ia_min = (__u64)(-1);
+ ia_max = 0;
+ now = ktime_get_real_seconds();
+
+ /* Calculate server penalty per object */
+ ltd_foreach_tgt(ltd, tgt) {
+ if (!tgt->ltd_active)
+ continue;
+
+ /* when inode is counted, bavail >> 16 to avoid overflow */
+ ba = tgt_statfs_bavail(tgt);
+ if (ltd->ltd_is_mdt)
+ ba >>= 16;
+ else
+ ba >>= 8;
+ if (!ba)
+ continue;
+
+ ba_min = min(ba, ba_min);
+ ba_max = max(ba, ba_max);
+
+ /* Count the number of usable servers */
+ if (tgt->ltd_qos.ltq_svr->lsq_bavail == 0)
+ qos->lq_active_svr_count++;
+ tgt->ltd_qos.ltq_svr->lsq_bavail += ba;
+
+ if (ltd->ltd_is_mdt) {
+ /* iavail >> 8 to avoid overflow */
+ ia = tgt_statfs_iavail(tgt) >> 8;
+ if (!ia)
+ continue;
+
+ ia_min = min(ia, ia_min);
+ ia_max = max(ia, ia_max);
+
+ tgt->ltd_qos.ltq_svr->lsq_iavail += ia;
+ }
+
+ /*
+ * per-tgt penalty is
+ * prio * bavail * iavail / (num_tgt - 1) / 2
+ */
+ tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia;
+ do_div(tgt->ltd_qos.ltq_penalty_per_obj, num_active);
+ tgt->ltd_qos.ltq_penalty_per_obj >>= 1;
+
+ age = (now - tgt->ltd_qos.ltq_used) >> 3;
+ if (qos->lq_reset || age > 32 * desc->ld_qos_maxage)
+ tgt->ltd_qos.ltq_penalty = 0;
+ else if (age > desc->ld_qos_maxage)
+ /* Decay tgt penalty. */
+ tgt->ltd_qos.ltq_penalty >>= age / desc->ld_qos_maxage;
+ }
+
+ num_active = qos->lq_active_svr_count - 1;
+ if (num_active < 1) {
+ /*
+ * If there's only 1 server, we can't penalize it, so instead
+ * we have to double the tgt penalty
+ */
+ num_active = 1;
+ ltd_foreach_tgt(ltd, tgt) {
+ if (!tgt->ltd_active)
+ continue;
+
+ tgt->ltd_qos.ltq_penalty_per_obj <<= 1;
+ }
+ }
+
+ /*
+ * Per-server penalty is
+ * prio * bavail * iavail / server_tgts / (num_svr - 1) / 2
+ */
+ list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
+ ba = svr->lsq_bavail;
+ ia = svr->lsq_iavail;
+ svr->lsq_penalty_per_obj = prio_wide * ba * ia;
+ do_div(ba, svr->lsq_tgt_count * num_active);
+ svr->lsq_penalty_per_obj >>= 1;
+
+ age = (now - svr->lsq_used) >> 3;
+ if (qos->lq_reset || age > 32 * desc->ld_qos_maxage)
+ svr->lsq_penalty = 0;
+ else if (age > desc->ld_qos_maxage)
+ /* Decay server penalty. */
+ svr->lsq_penalty >>= age / desc->ld_qos_maxage;
+ }
+
+ qos->lq_dirty = 0;
+ qos->lq_reset = 0;
+
+ /*
+ * If each tgt has almost same free space, do rr allocation for better
+ * creation performance
+ */
+ qos->lq_same_space = 0;
+ if ((ba_max * (256 - qos->lq_threshold_rr)) >> 8 < ba_min &&
+ (ia_max * (256 - qos->lq_threshold_rr)) >> 8 < ia_min) {
+ qos->lq_same_space = 1;
+ /* Reset weights for the next time we enter qos mode */
+ qos->lq_reset = 1;
+ }
+ rc = 0;
+
+out:
+ if (!rc && qos->lq_same_space)
+ RETURN(-EAGAIN);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ltd_qos_penalties_calc);
+
+/**
+ * Re-calculate penalties and weights of all tgts.
+ *
+ * The function is called when some target was used for a new object. In
+ * this case we should re-calculate all the weights to keep new allocations
+ * balanced well.
+ *
+ * \param[in] ltd lu_tgt_descs
+ * \param[in] tgt recently used tgt
+ * \param[out] total_wt new total weight for the pool
+ *
+ * \retval 0
+ */
+int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt,
+ __u64 *total_wt)
+{
+ struct lu_qos *qos = <d->ltd_qos;
+ struct lu_tgt_qos *ltq;
+ struct lu_svr_qos *svr;
+
+ ENTRY;
+
+ ltq = &tgt->ltd_qos;
+ LASSERT(ltq);
+
+ /* Don't allocate on this device anymore, until the next alloc_qos */
+ ltq->ltq_usable = 0;
+
+ svr = ltq->ltq_svr;
+
+ /*
+ * Decay old penalty by half (we're adding max penalty, and don't
+ * want it to run away.)
+ */
+ ltq->ltq_penalty >>= 1;
+ svr->lsq_penalty >>= 1;
+
+ /* mark the server and tgt as recently used */
+ ltq->ltq_used = svr->lsq_used = ktime_get_real_seconds();
+
+ /* Set max penalties for this tgt and server */
+ ltq->ltq_penalty += ltq->ltq_penalty_per_obj *
+ ltd->ltd_lov_desc.ld_active_tgt_count;
+ svr->lsq_penalty += svr->lsq_penalty_per_obj *
+ ltd->ltd_lov_desc.ld_active_tgt_count;
+
+ /* Decrease all MDS penalties */
+ list_for_each_entry(svr, &qos->lq_svr_list, lsq_svr_list) {
+ if (svr->lsq_penalty < svr->lsq_penalty_per_obj)
+ svr->lsq_penalty = 0;
+ else
+ svr->lsq_penalty -= svr->lsq_penalty_per_obj;
+ }
+
+ *total_wt = 0;
+ /* Decrease all tgt penalties */
+ ltd_foreach_tgt(ltd, tgt) {
+ if (!tgt->ltd_active)
+ continue;
+
+ if (ltq->ltq_penalty < ltq->ltq_penalty_per_obj)
+ ltq->ltq_penalty = 0;
+ else
+ ltq->ltq_penalty -= ltq->ltq_penalty_per_obj;
+
+ lu_tgt_qos_weight_calc(tgt);
+
+ /* Recalc the total weight of usable osts */
+ if (ltq->ltq_usable)
+ *total_wt += ltq->ltq_weight;
+
+ CDEBUG(D_OTHER, "recalc tgt %d usable=%d avail=%llu tgtppo=%llu tgtp=%llu svrppo=%llu svrp=%llu wt=%llu\n",
+ tgt->ltd_index, ltq->ltq_usable,
+ tgt_statfs_bavail(tgt) >> 10,
+ ltq->ltq_penalty_per_obj >> 10,
+ ltq->ltq_penalty >> 10,
+ ltq->ltq_svr->lsq_penalty_per_obj >> 10,
+ ltq->ltq_svr->lsq_penalty >> 10,
+ ltq->ltq_weight >> 10);
+ }
+
+ RETURN(0);
}
-EXPORT_SYMBOL(lu_tgt_descs_del);
+EXPORT_SYMBOL(ltd_qos_update);