*/
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
struct lu_tgt_descs *ltd,
- struct lu_tgt_desc *tgt)
+ struct lu_tgt_desc *tgt, __u64 reserve)
{
+ struct obd_statfs_info info = { 0 };
struct lov_desc *desc = <d->ltd_lov_desc;
int rc;
ENTRY;
LASSERT(d);
LASSERT(tgt);
- rc = dt_statfs(env, tgt->ltd_tgt, &tgt->ltd_statfs);
+ info.os_enable_pre = 1;
+ rc = dt_statfs_info(env, tgt->ltd_tgt, &tgt->ltd_statfs, &info);
if (rc && rc != -ENOTCONN)
CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
return rc;
}
+ if (reserve &&
+ (reserve + (info.os_reserved_mb_low << 20) >
+ tgt->ltd_statfs.os_bavail * tgt->ltd_statfs.os_bsize))
+ return -ENOSPC;
+
/* check whether device has changed state (active, inactive) */
if (rc != 0 && tgt->ltd_active) {
/* turned inactive? */
LASSERT(desc->ld_active_tgt_count > 0);
desc->ld_active_tgt_count--;
- ltd->ltd_qos.lq_dirty = 1;
- ltd->ltd_qos.lq_rr.lqr_dirty = 1;
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags);
CDEBUG(D_CONFIG, "%s: turns inactive\n",
tgt->ltd_exp->exp_obd->obd_name);
}
tgt->ltd_active = 1;
tgt->ltd_connecting = 0;
desc->ld_active_tgt_count++;
- ltd->ltd_qos.lq_dirty = 1;
- ltd->ltd_qos.lq_rr.lqr_dirty = 1;
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags);
CDEBUG(D_CONFIG, "%s: turns active\n",
tgt->ltd_exp->exp_obd->obd_name);
}
ltd_foreach_tgt(ltd, tgt) {
avail = tgt->ltd_statfs.os_bavail;
- if (lod_statfs_and_check(env, lod, ltd, tgt))
+ if (lod_statfs_and_check(env, lod, ltd, tgt, 0))
continue;
if (tgt->ltd_statfs.os_bavail != avail)
/* recalculate weigths */
- ltd->ltd_qos.lq_dirty = 1;
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
}
obd->obd_osfs_age = ktime_get_seconds();
int rc;
ENTRY;
- if (!lqr->lqr_dirty) {
+ if (!test_bit(LQ_DIRTY, &lqr->lqr_flags)) {
LASSERT(lqr->lqr_pool.op_size);
RETURN(0);
}
* Check again. While we were sleeping on @lq_rw_sem something could
* change.
*/
- if (!lqr->lqr_dirty) {
+ if (!test_bit(LQ_DIRTY, &lqr->lqr_flags)) {
LASSERT(lqr->lqr_pool.op_size);
up_write(<d->ltd_qos.lq_rw_sem);
RETURN(0);
}
}
- lqr->lqr_dirty = 0;
+ clear_bit(LQ_DIRTY, &lqr->lqr_flags);
up_write(<d->ltd_qos.lq_rw_sem);
if (placed != real_count) {
LCONSOLE(D_WARNING, "rr #%d tgt idx=%d\n", i,
lqr->lqr_pool.op_array[i]);
}
- lqr->lqr_dirty = 1;
+ set_bit(LQ_DIRTY, &lqr->lqr_flags);
RETURN(-EAGAIN);
}
static struct dt_object *lod_qos_declare_object_on(const struct lu_env *env,
struct lod_device *d,
__u32 ost_idx,
+ bool can_block,
struct thandle *th)
{
+ struct dt_allocation_hint *ah = &lod_env_info(env)->lti_ah;
struct lod_tgt_desc *ost;
struct lu_object *o, *n;
struct lu_device *nd;
*/
o = lu_object_anon(env, nd, NULL);
if (IS_ERR(o))
- GOTO(out, dt = ERR_PTR(PTR_ERR(o)));
+ GOTO(out, dt = ERR_CAST(o));
n = lu_object_locate(o->lo_header, nd->ld_type);
if (unlikely(n == NULL)) {
dt = container_of(n, struct dt_object, do_lu);
- rc = lod_sub_declare_create(env, dt, NULL, NULL, NULL, th);
+ ah->dah_can_block = can_block;
+ rc = lod_sub_declare_create(env, dt, NULL, ah, NULL, th);
if (rc < 0) {
CDEBUG(D_OTHER, "can't declare creation on #%u: %d\n",
ost_idx, rc);
return false;
/* if the OSS has been used, check whether the OST has been used */
- if (!cfs_bitmap_check(lag->lag_ost_avoid_bitmap, index))
+ if (!test_bit(index, lag->lag_ost_avoid_bitmap))
used = false;
else
QOS_DEBUG("OST%d: been used in conflicting mirror component\n",
struct dt_object **stripe,
__u32 *ost_indices,
struct thandle *th,
- bool *overstriped)
+ bool *overstriped,
+ __u64 reserve)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
ENTRY;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost, reserve);
if (rc)
RETURN(rc);
RETURN(rc);
}
- o = lod_qos_declare_object_on(env, lod, ost_idx, th);
+ o = lod_qos_declare_object_on(env, lod, ost_idx, true, th);
if (IS_ERR(o)) {
CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
ost_idx, (int) PTR_ERR(o));
*/
static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
spin_unlock(&lqr->lqr_alloc);
rc = lod_check_and_reserve_ost(env, lo, lod_comp, ost_idx,
speed, &stripe_idx, stripe,
- ost_indices, th, &overstriped);
+ ost_indices, th, &overstriped,
+ reserve);
spin_lock(&lqr->lqr_alloc);
if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
*/
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- struct thandle *th, int comp_idx)
+ struct thandle *th, int comp_idx, __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
}
rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
- LTD_TGT(&m->lod_ost_descs, ost_idx));
+ LTD_TGT(&m->lod_ost_descs, ost_idx),
+ reserve);
if (rc < 0) /* this OSP doesn't feel well */
break;
- o = lod_qos_declare_object_on(env, m, ost_idx, th);
+ o = lod_qos_declare_object_on(env, m, ost_idx, true, th);
if (IS_ERR(o)) {
rc = PTR_ERR(o);
CDEBUG(D_OTHER,
static int lod_ost_alloc_specific(const struct lu_env *env,
struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
* start OST, then it can be skipped, otherwise skip it only
* if it is inactive/recovering/out-of-space." */
- rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, tgt);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
+ tgt, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
if (i && !tgt->ltd_statfs.os_fprecreated && !speed)
continue;
- o = lod_qos_declare_object_on(env, m, ost_idx, th);
+ o = lod_qos_declare_object_on(env, m, ost_idx, true, th);
if (IS_ERR(o)) {
CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
ost_idx, (int) PTR_ERR(o));
*/
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
__u32 nfound, good_osts, stripe_count, stripe_count_min;
bool overstriped = false;
int stripes_per_ost = 1;
+ bool slow = false;
int rc = 0;
ENTRY;
ost = OST_TGT(lod, osts->op_array[i]);
ost->ltd_qos.ltq_usable = 0;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs,
+ ost, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
*/
for (i = 0; i < osts->op_count; i++) {
__u32 idx = osts->op_array[i];
+ struct lod_tgt_desc *ost = OST_TGT(lod, idx);
if (lod_should_avoid_ost(lo, lag, idx))
continue;
continue;
}
- o = lod_qos_declare_object_on(env, lod, idx, th);
+ o = lod_qos_declare_object_on(env, lod, idx, slow, th);
if (IS_ERR(o)) {
QOS_DEBUG("can't declare object on #%u: %d\n",
idx, (int) PTR_ERR(o));
break;
}
+ if (rc && !slow && nfound < stripe_count) {
+ /* couldn't allocate using precreated objects
+ * so try to wait for new precreations */
+ slow = true;
+ rc = 0;
+ }
+
if (rc) {
/* no OST found on this iteration, give up */
break;
}
/* makes sense to rebalance next time */
- lod->lod_ost_descs.ltd_qos.lq_dirty = 1;
- lod->lod_ost_descs.ltd_qos.lq_same_space = 0;
-
+ set_bit(LQ_DIRTY, &lod->lod_ost_descs.ltd_qos.lq_flags);
+ clear_bit(LQ_SAME_SPACE, &lod->lod_ost_descs.ltd_qos.lq_flags);
rc = -EAGAIN;
}
}
/* makes sense to rebalance next time */
- ltd->ltd_qos.lq_dirty = 1;
- ltd->ltd_qos.lq_same_space = 0;
+ set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
+ clear_bit(LQ_SAME_SPACE, <d->ltd_qos.lq_flags);
rc = -EAGAIN;
} else {
*
* \param[in] lod LOD device
* \param[in] lo The lod_object
+ * \param[in] comp_idx The component id, which the amount of stripes is
+ calculated for
* \param[in] stripe_count count the caller would like to use
*
* \retval the maximum usable stripe count
*/
__u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
- __u16 stripe_count, bool overstriping)
+ int comp_idx, __u16 stripe_count, bool overstriping)
{
__u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
/* max stripe count is based on OSD ea size */
unsigned int easize = lod->lod_osd_max_easize;
int i;
-
if (!stripe_count)
stripe_count =
lod->lod_ost_descs.ltd_lov_desc.ld_default_stripe_count;
lo->ldo_comp_cnt;
for (i = 0; i < lo->ldo_comp_cnt; i++) {
+ unsigned int stripes;
+
+ if (i == comp_idx)
+ continue;
+
lod_comp = &lo->ldo_comp_entries[i];
- comp_sz = lov_mds_md_size(lod_comp->llc_stripe_count,
- LOV_MAGIC_V3);
+ /* Extension comp is never inited - 0 stripes on disk */
+ stripes = lod_comp->llc_flags & LCME_FL_EXTENSION ? 0 :
+ lod_comp->llc_stripe_count;
+
+ comp_sz = lov_mds_md_size(stripes, LOV_MAGIC_V3);
total_comp_sz += comp_sz;
if (lod_comp->llc_flags & LCME_FL_INIT)
init_comp_sz += comp_sz;
}
pool_name = NULL;
+ if (def_pool[0] != '\0')
+ pool_name = def_pool;
+
if (v1->lmm_magic == LOV_USER_MAGIC_V3 ||
v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
v3 = (struct lov_user_md_v3 *)v1;
+
if (v3->lmm_pool_name[0] != '\0')
pool_name = v3->lmm_pool_name;
rc = lod_comp_copy_ost_lists(lod_comp, v3);
if (rc)
GOTO(free_comp, rc);
+
+ pool_name = NULL;
}
}
- if (pool_name == NULL && def_pool[0] != '\0')
- pool_name = def_pool;
-
if (v1->lmm_pattern == 0)
v1->lmm_pattern = LOV_PATTERN_RAID0;
if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0 &&
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
- struct cfs_bitmap *bitmap = NULL;
+ unsigned long *bitmap = NULL;
__u32 *new_oss = NULL;
lag->lag_ost_avail = lod->lod_ost_count;
/* init OST avoid guide bitmap */
if (lag->lag_ost_avoid_bitmap) {
- if (lod->lod_ost_count <= lag->lag_ost_avoid_bitmap->size) {
- CFS_RESET_BITMAP(lag->lag_ost_avoid_bitmap);
+ if (lod->lod_ost_count <= lag->lag_ost_avoid_size) {
+ bitmap_zero(lag->lag_ost_avoid_bitmap,
+ lag->lag_ost_avoid_size);
} else {
- CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap);
+ bitmap_free(lag->lag_ost_avoid_bitmap);
lag->lag_ost_avoid_bitmap = NULL;
}
}
if (!lag->lag_ost_avoid_bitmap) {
- bitmap = CFS_ALLOCATE_BITMAP(lod->lod_ost_count);
+ bitmap = bitmap_zalloc(lod->lod_ost_count, GFP_KERNEL);
if (!bitmap)
return -ENOMEM;
}
*/
OBD_ALLOC_PTR_ARRAY(new_oss, lod->lod_ost_count);
if (!new_oss) {
- CFS_FREE_BITMAP(bitmap);
+ bitmap_free(bitmap);
return -ENOMEM;
}
}
lag->lag_oss_avoid_array = new_oss;
lag->lag_oaa_size = lod->lod_ost_count;
}
- if (bitmap)
+ if (bitmap) {
lag->lag_ost_avoid_bitmap = bitmap;
+ lag->lag_ost_avoid_size = lod->lod_ost_count;
+ }
return 0;
}
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[comp_idx];
- struct cfs_bitmap *bitmap = lag->lag_ost_avoid_bitmap;
+ unsigned long *bitmap = lag->lag_ost_avoid_bitmap;
int i, j;
/* iterate mirrors */
ost = OST_TGT(lod, comp->llc_ost_indices[j]);
lsq = ost->ltd_qos.ltq_svr;
- if (cfs_bitmap_check(bitmap, ost->ltd_index))
+ if (test_bit(ost->ltd_index, bitmap))
continue;
QOS_DEBUG("OST%d used in conflicting mirror "
"component\n", ost->ltd_index);
- cfs_bitmap_set(bitmap, ost->ltd_index);
+ set_bit(ost->ltd_index, bitmap);
lag->lag_ost_avail--;
for (k = 0; k < lag->lag_oaa_count; k++) {
*/
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
struct lu_attr *attr, struct thandle *th,
- int comp_idx)
+ int comp_idx, __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
* could be changed if some OSTs are [de]activated manually.
*/
lod_qos_statfs_update(env, d, &d->lod_ost_descs);
- stripe_len = lod_get_stripe_count(d, lo,
+ stripe_len = lod_get_stripe_count(d, lo, comp_idx,
lod_comp->llc_stripe_count,
lod_comp->llc_pattern &
LOV_PATTERN_OVERSTRIPING);
if (!ost_indices)
GOTO(out, rc = -ENOMEM);
+repeat:
lod_getref(&d->lod_ost_descs);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripe_count %d\n",
if (lod_comp->llc_ostlist.op_array &&
lod_comp->llc_ostlist.op_count) {
rc = lod_alloc_ost_list(env, lo, stripe, ost_indices,
- th, comp_idx);
+ th, comp_idx, reserve);
} else if (lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT) {
/**
* collect OSTs and OSSs used in other mirrors whose
lod_collect_avoidance(lo, lag, comp_idx);
rc = lod_ost_alloc_qos(env, lo, stripe, ost_indices,
- flag, th, comp_idx);
+ flag, th, comp_idx, reserve);
if (rc == -EAGAIN)
rc = lod_ost_alloc_rr(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx, reserve);
} else {
rc = lod_ost_alloc_specific(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx, reserve);
}
put_ldts:
lod_putref(d, &d->lod_ost_descs);
for (i = 0; i < stripe_len; i++)
if (stripe[i] != NULL)
dt_object_put(env, stripe[i]);
+
+ /* In case there is no space on any OST, let's ignore
+ * the @reserve space to avoid an error at the init
+ * time, probably the actual IO will be less than the
+ * given @reserve space (aka extension_size). */
+ if (reserve) {
+ reserve = 0;
+ goto repeat;
+ }
lod_comp->llc_stripe_count = 0;
} else {
lod_comp->llc_stripe = stripe;
extent = &lod_comp->llc_extent;
QOS_DEBUG("comp[%d] %lld "DEXT"\n", i, size, PEXT(extent));
if (!lo->ldo_is_composite || size >= extent->e_start) {
- rc = lod_qos_prep_create(env, lo, attr, th, i);
+ rc = lod_qos_prep_create(env, lo, attr, th, i, 0);
if (rc)
break;
}