#include <asm/div64.h>
#include <libcfs/libcfs.h>
-#include <obd_class.h>
#include <lustre/lustre_idl.h>
+#include <lustre_swab.h>
+#include <obd_class.h>
+
#include "lod_internal.h"
/*
rc = dt_statfs(env, ost->ltd_ost, sfs);
- if (rc == -ENOSPC)
- RETURN(rc);
+ if (rc == 0 && ((sfs->os_state & OS_STATE_ENOSPC) ||
+ (sfs->os_state & OS_STATE_ENOINO && sfs->os_fprecreated == 0)))
+ RETURN(-ENOSPC);
if (rc && rc != -ENOTCONN)
CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
/* check whether device has changed state (active, inactive) */
if (rc != 0 && ost->ltd_active) {
/* turned inactive? */
- spin_lock(&d->lod_desc_lock);
+ spin_lock(&d->lod_lock);
if (ost->ltd_active) {
ost->ltd_active = 0;
+ if (rc == -ENOTCONN)
+ ost->ltd_connecting = 1;
+
LASSERT(d->lod_desc.ld_active_tgt_count > 0);
d->lod_desc.ld_active_tgt_count--;
d->lod_qos.lq_dirty = 1;
CDEBUG(D_CONFIG, "%s: turns inactive\n",
ost->ltd_exp->exp_obd->obd_name);
}
- spin_unlock(&d->lod_desc_lock);
+ spin_unlock(&d->lod_lock);
} else if (rc == 0 && ost->ltd_active == 0) {
/* turned active? */
LASSERTF(d->lod_desc.ld_active_tgt_count < d->lod_ostnr,
"active tgt count %d, ost nr %d\n",
d->lod_desc.ld_active_tgt_count, d->lod_ostnr);
- spin_lock(&d->lod_desc_lock);
+ spin_lock(&d->lod_lock);
if (ost->ltd_active == 0) {
ost->ltd_active = 1;
+ ost->ltd_connecting = 0;
d->lod_desc.ld_active_tgt_count++;
d->lod_qos.lq_dirty = 1;
d->lod_qos.lq_rr.lqr_dirty = 1;
CDEBUG(D_CONFIG, "%s: turns active\n",
ost->ltd_exp->exp_obd->obd_name);
}
- spin_unlock(&d->lod_desc_lock);
+ spin_unlock(&d->lod_lock);
}
RETURN(rc);
if (ost->ltd_qos.ltq_usable)
*total_wt += ost->ltd_qos.ltq_weight;
- QOS_DEBUG("recalc tgt %d usable=%d avail="LPU64
- " ostppo="LPU64" ostp="LPU64" ossppo="LPU64
- " ossp="LPU64" wt="LPU64"\n",
+ QOS_DEBUG("recalc tgt %d usable=%d avail=%llu"
+ " ostppo=%llu ostp=%llu ossppo=%llu"
+ " ossp=%llu wt=%llu\n",
i, ost->ltd_qos.ltq_usable, TGT_BAVAIL(i) >> 10,
ost->ltd_qos.ltq_penalty_per_obj >> 10,
ost->ltd_qos.ltq_penalty >> 10,
/*
* do not put >1 objects on a single OST
*/
- if (speed && lod_qos_is_ost_used(env, ost_idx, stripe_idx))
+ if (lod_qos_is_ost_used(env, ost_idx, stripe_idx))
goto out_return;
o = lod_qos_declare_object_on(env, m, ost_idx, th);
*/
lod_qos_ost_in_use(env, stripe_idx, ost_idx);
stripe[stripe_idx] = o;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LOV_CREATE_RACE, 2);
stripe_idx++;
*s_idx = stripe_idx;
int rc;
__u32 ost_start_idx_temp;
int speed = 0;
+ int ost_connecting = 0;
__u32 stripe_idx = 0;
__u32 stripe_cnt = lo->ldo_stripenr;
__u32 stripe_cnt_min = min_stripe_count(stripe_cnt, flags);
rc = lod_check_and_reserve_ost(env, m, sfs, ost_idx, speed,
&stripe_idx, stripe, th);
spin_lock(&lqr->lqr_alloc);
+
+ if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
+ ost_connecting = 1;
}
if ((speed < 2) && (stripe_idx < stripe_cnt_min)) {
/* Try again, allowing slower OSCs */
speed++;
lqr->lqr_start_idx = ost_start_idx_temp;
+
+ ost_connecting = 0;
goto repeat_find;
}
rc = 0;
} else {
/* nobody provided us with a single object */
- rc = -ENOSPC;
+ if (ost_connecting)
+ rc = -EINPROGRESS;
+ else
+ rc = -ENOSPC;
}
out:
v3 = (struct lov_user_md_v3 *)lum;
for (i = 0; i < lo->ldo_stripenr; i++) {
- if (v3->lmm_objects[i].l_ost_idx == lo->ldo_def_stripe_offset) {
+ if (v3->lmm_objects[i].l_ost_idx == lo->ldo_stripe_offset) {
array_idx = i;
break;
}
if (i == lo->ldo_stripenr) {
CDEBUG(D_OTHER,
"%s: start index %d not in the specified list of OSTs\n",
- lod2obd(m)->obd_name, lo->ldo_def_stripe_offset);
+ lod2obd(m)->obd_name, lo->ldo_stripe_offset);
RETURN(-EINVAL);
}
/**
* Allocate a striping on a predefined set of OSTs.
*
- * Allocates new layout starting from OST index in lo->ldo_def_stripe_offset.
+ * Allocates new layout starting from OST index in lo->ldo_stripe_offset.
* Full OSTs are not considered. The exact order of OSTs is not important and
* varies depending on OST status. The allocation procedure prefers the targets
* with precreated objects ready. The number of stripes needed and stripe
/* search loi_ost_idx in ost array */
array_idx = 0;
for (i = 0; i < ost_count; i++) {
- if (osts->op_array[i] == lo->ldo_def_stripe_offset) {
+ if (osts->op_array[i] == lo->ldo_stripe_offset) {
array_idx = i;
break;
}
}
if (i == ost_count) {
CERROR("Start index %d not found in pool '%s'\n",
- lo->ldo_def_stripe_offset,
- lo->ldo_pool ? lo->ldo_pool : "");
+ lo->ldo_stripe_offset, lo->ldo_pool ?: "");
GOTO(out, rc = -EINVAL);
}
struct dt_object **stripe, int flags,
struct thandle *th)
{
- struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
struct lod_tgt_desc *ost;
- struct dt_object *o;
- __u64 total_weight = 0;
- unsigned int i;
- int rc = 0;
- __u32 nfound, good_osts;
- __u32 stripe_cnt = lo->ldo_stripenr;
- __u32 stripe_cnt_min;
- struct pool_desc *pool = NULL;
- struct ost_pool *osts;
+ struct dt_object *o;
+ __u64 total_weight = 0;
+ __u32 nfound, good_osts;
+ __u32 stripe_cnt = lo->ldo_stripenr;
+ __u32 stripe_cnt_min;
+ struct pool_desc *pool = NULL;
+ struct ost_pool *osts;
+ unsigned int i;
+ int rc = 0;
ENTRY;
stripe_cnt_min = min_stripe_count(stripe_cnt, flags);
RETURN(-EINVAL);
if (lo->ldo_pool)
- pool = lod_find_pool(m, lo->ldo_pool);
+ pool = lod_find_pool(lod, lo->ldo_pool);
if (pool != NULL) {
down_read(&pool_tgt_rw_sem(pool));
osts = &(pool->pool_obds);
} else {
- osts = &(m->lod_pool_info);
+ osts = &(lod->lod_pool_info);
}
/* Detect -EAGAIN early, before expensive lock is taken. */
- if (!lod_qos_is_usable(m))
+ if (!lod_qos_is_usable(lod))
GOTO(out_nolock, rc = -EAGAIN);
/* Do actual allocation, use write lock here. */
- down_write(&m->lod_qos.lq_rw_sem);
+ down_write(&lod->lod_qos.lq_rw_sem);
/*
* Check again, while we were sleeping on @lq_rw_sem things could
* change.
*/
- if (!lod_qos_is_usable(m))
+ if (!lod_qos_is_usable(lod))
GOTO(out, rc = -EAGAIN);
- rc = lod_qos_calc_ppo(m);
+ rc = lod_qos_calc_ppo(lod);
if (rc)
GOTO(out, rc);
good_osts = 0;
/* Find all the OSTs that are valid stripe candidates */
for (i = 0; i < osts->op_count; i++) {
- if (!cfs_bitmap_check(m->lod_ost_bitmap, osts->op_array[i]))
+ if (!cfs_bitmap_check(lod->lod_ost_bitmap, osts->op_array[i]))
continue;
- rc = lod_statfs_and_check(env, m, osts->op_array[i], sfs);
+ ost = OST_TGT(lod, osts->op_array[i]);
+ ost->ltd_qos.ltq_usable = 0;
+
+ rc = lod_statfs_and_check(env, lod, osts->op_array[i], sfs);
if (rc) {
/* this OSP doesn't feel well */
continue;
}
+ if (sfs->os_state & OS_STATE_DEGRADED)
+ continue;
+
/* Fail Check before osc_precreate() is called
so we can only 'fail' single OSC. */
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) &&
osts->op_array[i] == 0)
continue;
- ost = OST_TGT(m,osts->op_array[i]);
ost->ltd_qos.ltq_usable = 1;
- lod_qos_calc_weight(m, osts->op_array[i]);
+ lod_qos_calc_weight(lod, osts->op_array[i]);
total_weight += ost->ltd_qos.ltq_weight;
good_osts++;
for (i = 0; i < osts->op_count; i++) {
__u32 idx = osts->op_array[i];
- if (!cfs_bitmap_check(m->lod_ost_bitmap, idx))
+ if (!cfs_bitmap_check(lod->lod_ost_bitmap, idx))
continue;
- ost = OST_TGT(m,idx);
+ ost = OST_TGT(lod, idx);
if (!ost->ltd_qos.ltq_usable)
continue;
cur_weight += ost->ltd_qos.ltq_weight;
- QOS_DEBUG("stripe_cnt=%d nfound=%d cur_weight="LPU64
- " rand="LPU64" total_weight="LPU64"\n",
+ QOS_DEBUG("stripe_cnt=%d nfound=%d cur_weight=%llu"
+ " rand=%llu total_weight=%llu\n",
stripe_cnt, nfound, cur_weight, rand,
total_weight);
continue;
lod_qos_ost_in_use(env, nfound, idx);
- o = lod_qos_declare_object_on(env, m, idx, th);
+ o = lod_qos_declare_object_on(env, lod, idx, th);
if (IS_ERR(o)) {
QOS_DEBUG("can't declare object on #%u: %d\n",
idx, (int) PTR_ERR(o));
continue;
}
stripe[nfound++] = o;
- lod_qos_used(m, osts, idx, &total_weight);
+ lod_qos_used(lod, osts, idx, &total_weight);
rc = 0;
break;
}
* so it's possible OSP won't be able to provide us with
* an object due to just changed state
*/
- LCONSOLE_INFO("wanted %d, found %d\n", stripe_cnt, nfound);
+ QOS_DEBUG("%s: wanted %d objects, found only %d\n",
+ lod2obd(lod)->obd_name, stripe_cnt, nfound);
for (i = 0; i < nfound; i++) {
LASSERT(stripe[i] != NULL);
lu_object_put(env, &stripe[i]->do_lu);
}
/* makes sense to rebalance next time */
- m->lod_qos.lq_dirty = 1;
- m->lod_qos.lq_same_space = 0;
+ lod->lod_qos.lq_dirty = 1;
+ lod->lod_qos.lq_same_space = 0;
rc = -EAGAIN;
}
out:
- up_write(&m->lod_qos.lq_rw_sem);
+ up_write(&lod->lod_qos.lq_rw_sem);
out_nolock:
if (pool != NULL) {
if (v1->lmm_stripe_count > 0)
lo->ldo_stripenr = v1->lmm_stripe_count;
- lo->ldo_def_stripe_offset = v1->lmm_stripe_offset;
+ lo->ldo_stripe_offset = v1->lmm_stripe_offset;
lod_object_set_pool(lo, NULL);
if (pool_name != NULL) {
/* coverity[overrun-buffer-val] */
pool = lod_find_pool(d, pool_name);
if (pool != NULL) {
- if (lo->ldo_def_stripe_offset != LOV_OFFSET_DEFAULT) {
+ if (lo->ldo_stripe_offset != LOV_OFFSET_DEFAULT) {
rc = lod_check_index_in_pool(
- lo->ldo_def_stripe_offset, pool);
+ lo->ldo_stripe_offset, pool);
if (rc < 0) {
lod_pool_putref(pool);
CERROR("%s: invalid offset, %u\n",
lod2obd(d)->obd_name,
- lo->ldo_def_stripe_offset);
+ lo->ldo_stripe_offset);
RETURN(-EINVAL);
}
}
if (lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
rc = lod_alloc_ost_list(env, lo, stripe, lum, th);
- } else if (lo->ldo_def_stripe_offset == LOV_OFFSET_DEFAULT) {
+ } else if (lo->ldo_stripe_offset == LOV_OFFSET_DEFAULT) {
rc = lod_alloc_qos(env, lo, stripe, flag, th);
if (rc == -EAGAIN)
rc = lod_alloc_rr(env, lo, stripe, flag, th);