- osp_statfs() returns -ENOTCONN if the corresponded OST found
not connected. this let us to remove few additional checks in
the allocation policy functions.
- struct obd_statfs gets new field: os_fprecreated
LOD uses this to skip OSPs with no objects ready to use
- osp_statfs() returns number of already precerated objects
in new os_fprecreated field
- OS_STATE_DEGRADED is ignored on the first 2 passes in RR policy
- lod_alloc_specific() to verify and skip OSPs already used in
striping
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Change-Id: I86351bc1dcca7182bc5adf4eb3e03c054e33e95f
Reviewed-on: http://review.whamcloud.com/4242
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Mike Pershin <tappro@whamcloud.com>
Reviewed-by: Johann Lombardi <johann.lombardi@intel.com>
__u32 os_namelen;
__u64 os_maxbytes;
__u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
__u32 os_namelen;
__u64 os_maxbytes;
__u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
+ __u32 os_fprecreated; /* objs available now to the caller */
+ /* used in QoS code to find preferred
+ * OSTs */
__u32 os_spare2;
__u32 os_spare3;
__u32 os_spare4;
__u32 os_spare2;
__u32 os_spare3;
__u32 os_spare4;
LASSERT(ost);
rc = dt_statfs(env, ost->ltd_ost, sfs);
LASSERT(ost);
rc = dt_statfs(env, ost->ltd_ost, sfs);
+ if (rc && rc != -ENOTCONN)
+ CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
/* check whether device has changed state (active, inactive) */
/* check whether device has changed state (active, inactive) */
- if (unlikely(sfs->os_blocks == 0 && ost->ltd_active)) {
+ if (rc != 0 && ost->ltd_active) {
/* turned inactive? */
cfs_spin_lock(&d->lod_desc_lock);
/* turned inactive? */
cfs_spin_lock(&d->lod_desc_lock);
- if (sfs->os_blocks == 0 && ost->ltd_active) {
ost->ltd_active = 0;
LASSERT(d->lod_desc.ld_active_tgt_count > 0);
d->lod_desc.ld_active_tgt_count--;
ost->ltd_active = 0;
LASSERT(d->lod_desc.ld_active_tgt_count > 0);
d->lod_desc.ld_active_tgt_count--;
ost->ltd_exp->exp_obd->obd_name);
}
cfs_spin_unlock(&d->lod_desc_lock);
ost->ltd_exp->exp_obd->obd_name);
}
cfs_spin_unlock(&d->lod_desc_lock);
- } else if (unlikely(sfs->os_blocks && ost->ltd_active == 0)) {
+ } else if (rc == 0 && ost->ltd_active == 0) {
/* turned active? */
LASSERT(d->lod_desc.ld_active_tgt_count < d->lod_ostnr);
cfs_spin_lock(&d->lod_desc_lock);
/* turned active? */
LASSERT(d->lod_desc.ld_active_tgt_count < d->lod_ostnr);
cfs_spin_lock(&d->lod_desc_lock);
- if (sfs->os_blocks && ost->ltd_active == 0) {
+ if (ost->ltd_active == 0) {
ost->ltd_active = 1;
d->lod_desc.ld_active_tgt_count++;
d->lod_qos.lq_dirty = 1;
ost->ltd_active = 1;
d->lod_desc.ld_active_tgt_count++;
d->lod_qos.lq_dirty = 1;
-/*
- * Update statfs data if the current osfs age is older than max_age.
- * If wait is not set, it means that we are called from lov_create()
- * and we should just issue the rpcs without waiting for them to complete.
- * If wait is set, we are called from alloc_qos() and we just have
- * to wait for the request set to complete.
- */
static void lod_qos_statfs_update(const struct lu_env *env,
struct lod_device *lod)
{
static void lod_qos_statfs_update(const struct lu_env *env,
struct lod_device *lod)
{
__u64 max_age, avail;
ENTRY;
__u64 max_age, avail;
ENTRY;
- max_age = cfs_time_shift_64(-2*lod->lod_desc.ld_qos_maxage);
+ max_age = cfs_time_shift_64(-2 * lod->lod_desc.ld_qos_maxage);
if (cfs_time_beforeq_64(max_age, obd->obd_osfs_age))
/* statfs data are quite recent, don't need to refresh it */
if (cfs_time_beforeq_64(max_age, obd->obd_osfs_age))
/* statfs data are quite recent, don't need to refresh it */
avail = OST_TGT(lod,idx)->ltd_statfs.os_bavail;
rc = lod_statfs_and_check(env, lod, idx,
&OST_TGT(lod,idx)->ltd_statfs);
avail = OST_TGT(lod,idx)->ltd_statfs.os_bavail;
rc = lod_statfs_and_check(env, lod, idx,
&OST_TGT(lod,idx)->ltd_statfs);
- if (rc) {
- /* XXX: disable this OST till next refresh? */
- CERROR("can't refresh statfs: %d\n", rc);
if (OST_TGT(lod,idx)->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
lod->lod_qos.lq_dirty = 1;
if (OST_TGT(lod,idx)->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
lod->lod_qos.lq_dirty = 1;
oss->lqo_bavail = 0;
lod->lod_qos.lq_active_oss_count = 0;
oss->lqo_bavail = 0;
lod->lod_qos.lq_active_oss_count = 0;
- /* How badly user wants to select osts "widely" (not recently chosen
- and not on recent oss's). As opposed to "freely" (free space
- avail.) 0-256. */
+ /*
+ * How badly user wants to select OSTs "widely" (not recently chosen
+ * and not on recent OSS's). As opposed to "freely" (free space
+ * avail.) 0-256
+ */
prio_wide = 256 - lod->lod_qos.lq_prio_free;
ba_min = (__u64)(-1);
prio_wide = 256 - lod->lod_qos.lq_prio_free;
ba_min = (__u64)(-1);
age = (now - OST_TGT(lod,i)->ltd_qos.ltq_used) >> 3;
if (lod->lod_qos.lq_reset ||
age = (now - OST_TGT(lod,i)->ltd_qos.ltq_used) >> 3;
if (lod->lod_qos.lq_reset ||
- age > 32 * lod->lod_desc.ld_qos_maxage)
+ age > 32 * lod->lod_desc.ld_qos_maxage)
OST_TGT(lod,i)->ltd_qos.ltq_penalty = 0;
else if (age > lod->lod_desc.ld_qos_maxage)
/* Decay the penalty by half for every 8x the update
OST_TGT(lod,i)->ltd_qos.ltq_penalty = 0;
else if (age > lod->lod_desc.ld_qos_maxage)
/* Decay the penalty by half for every 8x the update
-/* Allocate objects on osts with round-robin algorithm */
+/* Allocate objects on OSTs with round-robin algorithm */
static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
int flags, struct thandle *th)
{
static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
int flags, struct thandle *th)
{
lqr->lqr_offset_idx, osts->op_count, osts->op_count,
array_idx);
lqr->lqr_offset_idx, osts->op_count, osts->op_count,
array_idx);
- for (i = 0; i < osts->op_count;
- i++, array_idx = (array_idx + 1) % osts->op_count) {
+ for (i = 0; i < osts->op_count && stripe_idx < lo->ldo_stripenr;
+ i++, array_idx = (array_idx + 1) % osts->op_count) {
++lqr->lqr_start_idx;
ost_idx = lqr->lqr_pool.op_array[array_idx];
++lqr->lqr_start_idx;
ost_idx = lqr->lqr_pool.op_array[array_idx];
stripe_idx, array_idx, ost_idx);
if ((ost_idx == LOV_QOS_EMPTY) ||
stripe_idx, array_idx, ost_idx);
if ((ost_idx == LOV_QOS_EMPTY) ||
- !cfs_bitmap_check(m->lod_ost_bitmap, ost_idx))
+ !cfs_bitmap_check(m->lod_ost_bitmap, ost_idx))
continue;
/* Fail Check before osc_precreate() is called
continue;
/* Fail Check before osc_precreate() is called
rc = lod_statfs_and_check(env, m, ost_idx, sfs);
if (rc) {
/* this OSP doesn't feel well */
rc = lod_statfs_and_check(env, m, ost_idx, sfs);
if (rc) {
/* this OSP doesn't feel well */
- CERROR("can't statfs #%u: %d\n", ost_idx, rc);
- continue;
- }
-
- /*
- * skip empty devices - usually it means inactive device
- */
- if (sfs->os_blocks == 0) {
- QOS_DEBUG("#%d: inactive\n", ost_idx);
* We expect number of precreated objects in f_ffree at
* the first iteration, skip OSPs with no objects ready
*/
* We expect number of precreated objects in f_ffree at
* the first iteration, skip OSPs with no objects ready
*/
- if (sfs->os_ffree == 0 && speed == 0) {
+ if (sfs->os_fprecreated == 0 && speed == 0) {
QOS_DEBUG("#%d: precreation is empty\n", ost_idx);
continue;
}
QOS_DEBUG("#%d: precreation is empty\n", ost_idx);
continue;
}
/*
* try to use another OSP if this one is degraded
*/
/*
* try to use another OSP if this one is degraded
*/
- if (sfs->os_state == OS_STATE_DEGRADED && speed == 0) {
+ if (sfs->os_state == OS_STATE_DEGRADED && speed < 2) {
QOS_DEBUG("#%d: degraded\n", ost_idx);
continue;
}
QOS_DEBUG("#%d: degraded\n", ost_idx);
continue;
}
lo->ldo_stripe[stripe_idx] = o;
stripe_idx++;
lo->ldo_stripe[stripe_idx] = o;
stripe_idx++;
- /* We have enough stripes */
- if (stripe_idx == lo->ldo_stripenr)
- break;
}
if ((speed < 2) && (stripe_idx < stripe_cnt_min)) {
/* Try again, allowing slower OSCs */
}
if ((speed < 2) && (stripe_idx < stripe_cnt_min)) {
/* Try again, allowing slower OSCs */
struct ost_pool *osts;
ENTRY;
struct ost_pool *osts;
ENTRY;
+ rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+ if (rc)
+ GOTO(out, rc);
+
if (lo->ldo_pool)
pool = lod_find_pool(m, lo->ldo_pool);
if (lo->ldo_pool)
pool = lod_find_pool(m, lo->ldo_pool);
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0)
continue;
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0)
continue;
+ /*
+ * do not put >1 objects on a single OST
+ */
+ if (lod_qos_is_ost_used(env, ost_idx, stripe_num))
+ continue;
+
/* Drop slow OSCs if we can, but not for requested start idx.
*
* This means "if OSC is slow and it is not the requested
/* Drop slow OSCs if we can, but not for requested start idx.
*
* This means "if OSC is slow and it is not the requested
rc = lod_statfs_and_check(env, m, ost_idx, sfs);
if (rc) {
/* this OSP doesn't feel well */
rc = lod_statfs_and_check(env, m, ost_idx, sfs);
if (rc) {
/* this OSP doesn't feel well */
- CERROR("can't statfs #%u: %d\n", ost_idx, rc);
- * skip empty devices - usually it means inactive device
- */
- if (sfs->os_blocks == 0)
- continue;
-
- /*
* We expect number of precreated objects in f_ffree at
* the first iteration, skip OSPs with no objects ready
* don't apply this logic to OST specified with stripe_offset
*/
* We expect number of precreated objects in f_ffree at
* the first iteration, skip OSPs with no objects ready
* don't apply this logic to OST specified with stripe_offset
*/
- if (i != 0 && sfs->os_ffree == 0 && speed == 0)
+ if (i != 0 && sfs->os_fprecreated == 0 && speed == 0)
continue;
o = lod_qos_declare_object_on(env, m, ost_idx, th);
continue;
o = lod_qos_declare_object_on(env, m, ost_idx, th);
-/* Alloc objects on osts with optimization based on:
+/* Alloc objects on OSTs with optimization based on:
- free space
- network resources (shared OSS's)
*/
- free space
- network resources (shared OSS's)
*/
rc = lod_statfs_and_check(env, m, osts->op_array[i], sfs);
if (rc) {
/* this OSP doesn't feel well */
rc = lod_statfs_and_check(env, m, osts->op_array[i], sfs);
if (rc) {
/* this OSP doesn't feel well */
- CERROR("can't statfs #%u: %d\n", i, rc);
- * skip empty devices - usually it means inactive device
- */
- if (sfs->os_blocks == 0)
- continue;
-
- /*
* skip full devices
*/
if (lod_qos_dev_is_full(sfs))
* skip full devices
*/
if (lod_qos_dev_is_full(sfs))
struct obd_device *obd = p->private;
struct lod_ost_desc *ost_desc = v;
struct lod_device *lod;
struct obd_device *obd = p->private;
struct lod_ost_desc *ost_desc = v;
struct lod_device *lod;
struct dt_device *next;
struct obd_statfs sfs;
struct dt_device *next;
struct obd_statfs sfs;
return -EINVAL;
/* XXX: should be non-NULL env, but it's very expensive */
return -EINVAL;
/* XXX: should be non-NULL env, but it's very expensive */
rc = dt_statfs(NULL, next, &sfs);
rc = dt_statfs(NULL, next, &sfs);
+ if (rc == -ENOTCONN) {
+ active = 0;
+ rc = 0;
+ } else if (rc)
return rc;
return seq_printf(p, "%d: %s %sACTIVE\n", idx,
obd_uuid2str(&ost_desc->ltd_uuid),
return rc;
return seq_printf(p, "%d: %s %sACTIVE\n", idx,
obd_uuid2str(&ost_desc->ltd_uuid),
- sfs.os_blocks > 0 ? "" : "IN");
}
struct seq_operations lod_osts_sops = {
}
struct seq_operations lod_osts_sops = {
- if (unlikely(d->opd_imp_active == 0)) {
- /*
- * in case of inactive OST we return nulls
- * so that caller can understand this device
- * is unusable for new objects
- *
- * XXX: shouldn't we take normal statfs and fill
- * just few specific fields with zeroes?
- */
- memset(sfs, 0, sizeof(*sfs));
- sfs->os_bsize = 4096;
- RETURN(0);
- }
+ if (unlikely(d->opd_imp_active == 0))
+ RETURN(-ENOTCONN);
/* return recently updated data */
*sfs = d->opd_statfs;
/* return recently updated data */
*sfs = d->opd_statfs;
* how many objects are available for immediate creation
*/
cfs_spin_lock(&d->opd_pre_lock);
* how many objects are available for immediate creation
*/
cfs_spin_lock(&d->opd_pre_lock);
- sfs->os_ffree = d->opd_pre_last_created - d->opd_pre_next;
+ sfs->os_fprecreated = d->opd_pre_last_created - d->opd_pre_next;
cfs_spin_unlock(&d->opd_pre_lock);
CDEBUG(D_OTHER, "%s: "LPU64" blocks, "LPU64" free, "LPU64" avail, "
cfs_spin_unlock(&d->opd_pre_lock);
CDEBUG(D_OTHER, "%s: "LPU64" blocks, "LPU64" free, "LPU64" avail, "
__swab32s (&os->os_namelen);
__swab64s (&os->os_maxbytes);
__swab32s (&os->os_state);
__swab32s (&os->os_namelen);
__swab64s (&os->os_maxbytes);
__swab32s (&os->os_state);
- CLASSERT(offsetof(typeof(*os), os_spare1) != 0);
+ CLASSERT(offsetof(typeof(*os), os_fprecreated) != 0);
CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
(long long)(int)offsetof(struct obd_statfs, os_state));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
(long long)(int)offsetof(struct obd_statfs, os_state));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare1));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare2));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare2));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_state));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
(long long)(int)offsetof(struct obd_statfs, os_state));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
(long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
- LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, "found %lld\n",
- (long long)(int)offsetof(struct obd_statfs, os_spare1));
- LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare2));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
(long long)(int)offsetof(struct obd_statfs, os_spare2));
LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",