const struct lu_buf *buf);
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
struct lu_attr *attr, struct thandle *th,
- int comp_idx);
+ int comp_idx, __u64 reserve);
__u16 lod_comp_entry_stripe_count(struct lod_object *lo,
struct lod_layout_component *entry,
bool is_dir);
new_comp->llc_flags &= ~LCME_FL_INIT;
new_comp->llc_stripe = NULL;
new_comp->llc_stripes_allocated = 0;
+ new_comp->llc_ost_indices = NULL;
new_comp->llc_stripe_offset = LOV_OFFSET_DEFAULT;
/* for uninstantiated components, layout gen stores default stripe
* offset */
}
static int lod_declare_instantiate_components(const struct lu_env *env,
- struct lod_object *lo, struct thandle *th)
+ struct lod_object *lo,
+ struct thandle *th,
+ __u64 reserve)
{
struct lod_thread_info *info = lod_env_info(env);
int i;
for (i = 0; i < info->lti_count; i++) {
rc = lod_qos_prep_create(env, lo, NULL, th,
- info->lti_comp_idx[i]);
+ info->lti_comp_idx[i], reserve);
if (rc)
break;
}
*/
static bool lod_sel_osts_allowed(const struct lu_env *env,
struct lod_object *lo,
- int index, __u64 extension_size,
+ int index, __u64 reserve,
struct lu_extent *extent,
struct lu_extent *comp_extent, int write)
{
struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[index];
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
- struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
+ struct lod_thread_info *tinfo = lod_env_info(env);
+ struct obd_statfs *sfs = &tinfo->lti_osfs;
__u64 available = 0;
- __u64 size;
bool ret = true;
int i, rc;
LASSERT(lod_comp->llc_stripe_count != 0);
- if (write == 0 ||
- (extent->e_start == 0 && extent->e_end == OBD_OBJECT_EOF)) {
- /* truncate or append */
- size = extension_size;
- } else {
- /* In case of write op, check the real write extent,
- * it may be larger than the extension_size */
- size = roundup(min(extent->e_end, comp_extent->e_end) -
- max(extent->e_start, comp_extent->e_start),
- extension_size);
- }
- /* extension_size is file level, so we must divide by stripe count to
- * compare it to available space on a single OST */
- size /= lod_comp->llc_stripe_count;
-
lod_getref(&lod->lod_ost_descs);
for (i = 0; i < lod_comp->llc_stripe_count; i++) {
int index = lod_comp->llc_ost_indices[i];
(100ull * sfs->os_bavail) / sfs->os_blocks,
(100ull * sfs->os_bfree) / sfs->os_blocks);
- if (size * repeated > available) {
+ if (reserve * repeated > available) {
ret = false;
CDEBUG(D_LAYOUT, "low space on ost %d, available %llu "
- "< extension size %llu\n", index, available,
- extension_size);
+ "< extension size %llu repeated %d\n", index,
+ available, reserve, repeated);
break;
}
}
return new_end;
}
+/**
+ * Calculate the exact reservation (per-OST extension_size) on the OSTs being
+ * instantiated. It needs to be calculated in advance and taken into account at
+ * the instantiation time, because otherwise lod_statfs_and_check() may consider
+ * an OST as OK, but SEL needs its extension_size to fit the free space and the
+ * OST may turn out to be low-on-space, thus inappropriate OST may be used and
+ * ENOSPC occurs.
+ *
+ * \param[in] lod_comp lod component we are checking
+ *
+ * \retval size to reserved on each OST of lod_comp's stripe.
+ */
+static __u64 lod_sel_stripe_reserved(struct lod_layout_component *lod_comp)
+{
+ /* extension_size is file level, so we must divide by stripe count to
+ * compare it to available space on a single OST */
+ return lod_comp->llc_stripe_size * SEL_UNIT_SIZE /
+ lod_comp->llc_stripe_count;
+}
+
/* As lod_sel_handler() could be re-entered for the same component several
* times, this is the data for the next call. Fields could be changed to
* component indexes when needed, (e.g. if there is no need to instantiate
struct lod_layout_component *lod_comp;
struct lod_layout_component *prev;
struct lod_layout_component *next = NULL;
- __u64 extension_size;
+ __u64 extension_size, reserve;
__u64 new_end = 0;
bool repeated;
int change = 0;
RETURN(-EINVAL);
}
+ reserve = lod_sel_stripe_reserved(lod_comp);
+
if (!prev->llc_stripe) {
CDEBUG(D_LAYOUT, "Previous component not inited\n");
info->lti_count = 1;
info->lti_comp_idx[0] = index - 1;
- rc = lod_declare_instantiate_components(env, lo, th);
+ rc = lod_declare_instantiate_components(env, lo, th, reserve);
/* ENOSPC tells us we can't use this component. If there is
* a next or we are repeating, we either spill over (next) or
* extend the original comp (repeat). Otherwise, return the
}
if (sd->sd_force == 0 && rc == 0)
- rc = !lod_sel_osts_allowed(env, lo, index - 1,
- extension_size, extent,
+ rc = !lod_sel_osts_allowed(env, lo, index - 1, reserve, extent,
&lod_comp->llc_extent, write);
repeated = !!(sd->sd_repeat);
RETURN(-EALREADY);
lod_obj_inc_layout_gen(lo);
- rc = lod_declare_instantiate_components(env, lo, th);
+ rc = lod_declare_instantiate_components(env, lo, th, 0);
EXIT;
out:
if (rc)
lo->ldo_layout_gen = layout_version & 0xffff;
}
- rc = lod_declare_instantiate_components(env, lo, th);
+ rc = lod_declare_instantiate_components(env, lo, th, 0);
if (rc)
GOTO(out, rc);
lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
}
- rc = lod_declare_instantiate_components(env, lo, th);
+ rc = lod_declare_instantiate_components(env, lo, th, 0);
if (rc)
GOTO(out, rc);
*/
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
struct lu_tgt_descs *ltd,
- struct lu_tgt_desc *tgt)
+ struct lu_tgt_desc *tgt, __u64 reserve)
{
+ struct obd_statfs_info info = { 0 };
struct lov_desc *desc = <d->ltd_lov_desc;
int rc;
ENTRY;
LASSERT(d);
LASSERT(tgt);
- rc = dt_statfs(env, tgt->ltd_tgt, &tgt->ltd_statfs);
+ info.os_enable_pre = 1;
+ rc = dt_statfs_info(env, tgt->ltd_tgt, &tgt->ltd_statfs, &info);
if (rc && rc != -ENOTCONN)
CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
return rc;
}
+ if (reserve &&
+ (reserve + (info.os_reserved_mb_low << 20) >
+ tgt->ltd_statfs.os_bavail * tgt->ltd_statfs.os_bsize))
+ return -ENOSPC;
+
/* check whether device has changed state (active, inactive) */
if (rc != 0 && tgt->ltd_active) {
/* turned inactive? */
ltd_foreach_tgt(ltd, tgt) {
avail = tgt->ltd_statfs.os_bavail;
- if (lod_statfs_and_check(env, lod, ltd, tgt))
+ if (lod_statfs_and_check(env, lod, ltd, tgt, 0))
continue;
if (tgt->ltd_statfs.os_bavail != avail)
struct dt_object **stripe,
__u32 *ost_indices,
struct thandle *th,
- bool *overstriped)
+ bool *overstriped,
+ __u64 reserve)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
ENTRY;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost, reserve);
if (rc)
RETURN(rc);
*/
static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
spin_unlock(&lqr->lqr_alloc);
rc = lod_check_and_reserve_ost(env, lo, lod_comp, ost_idx,
speed, &stripe_idx, stripe,
- ost_indices, th, &overstriped);
+ ost_indices, th, &overstriped,
+ reserve);
spin_lock(&lqr->lqr_alloc);
if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
*/
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- struct thandle *th, int comp_idx)
+ struct thandle *th, int comp_idx, __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
}
rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
- LTD_TGT(&m->lod_ost_descs, ost_idx));
+ LTD_TGT(&m->lod_ost_descs, ost_idx),
+ reserve);
if (rc < 0) /* this OSP doesn't feel well */
break;
static int lod_ost_alloc_specific(const struct lu_env *env,
struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
* start OST, then it can be skipped, otherwise skip it only
* if it is inactive/recovering/out-of-space." */
- rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, tgt);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
+ tgt, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
*/
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
ost = OST_TGT(lod, osts->op_array[i]);
ost->ltd_qos.ltq_usable = 0;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs,
+ ost, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
*/
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
struct lu_attr *attr, struct thandle *th,
- int comp_idx)
+ int comp_idx, __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
if (!ost_indices)
GOTO(out, rc = -ENOMEM);
+repeat:
lod_getref(&d->lod_ost_descs);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripe_count %d\n",
if (lod_comp->llc_ostlist.op_array &&
lod_comp->llc_ostlist.op_count) {
rc = lod_alloc_ost_list(env, lo, stripe, ost_indices,
- th, comp_idx);
+ th, comp_idx, reserve);
} else if (lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT) {
/**
* collect OSTs and OSSs used in other mirrors whose
lod_collect_avoidance(lo, lag, comp_idx);
rc = lod_ost_alloc_qos(env, lo, stripe, ost_indices,
- flag, th, comp_idx);
+ flag, th, comp_idx, reserve);
if (rc == -EAGAIN)
rc = lod_ost_alloc_rr(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx, reserve);
} else {
rc = lod_ost_alloc_specific(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx, reserve);
}
put_ldts:
lod_putref(d, &d->lod_ost_descs);
for (i = 0; i < stripe_len; i++)
if (stripe[i] != NULL)
dt_object_put(env, stripe[i]);
+
+ /* In case there is no space on any OST, let's ignore
+ * the @reserve space to avoid an error at the init
+ * time, probably the actual IO will be less than the
+ * given @reserve space (aka extension_size). */
+ if (reserve) {
+ reserve = 0;
+ goto repeat;
+ }
lod_comp->llc_stripe_count = 0;
} else {
lod_comp->llc_stripe = stripe;
extent = &lod_comp->llc_extent;
QOS_DEBUG("comp[%d] %lld "DEXT"\n", i, size, PEXT(extent));
if (!lo->ldo_is_composite || size >= extent->e_start) {
- rc = lod_qos_prep_create(env, lo, attr, th, i);
+ rc = lod_qos_prep_create(env, lo, attr, th, i, 0);
if (rc)
break;
}
if (d->opd_pre == NULL)
RETURN(0);
- CDEBUG(D_OTHER,
- "%s: %llu blocks, %llu free, %llu avail, %u reserved mb low, %u reserved mb high, %llu files, %llu free files\n",
- d->opd_obd->obd_name,
- sfs->os_blocks, sfs->os_bfree, sfs->os_bavail,
+ CDEBUG(D_OTHER, "%s: %llu blocks, %llu free, %llu avail, "
+ "%u bsize, %u reserved mb low, %u reserved mb high, "
+ "%llu files, %llu free files\n", d->opd_obd->obd_name,
+ sfs->os_blocks, sfs->os_bfree, sfs->os_bavail, sfs->os_bsize,
d->opd_reserved_mb_low, d->opd_reserved_mb_high,
sfs->os_files, sfs->os_ffree);
union ptlrpc_async_args *aa = args;
struct obd_import *imp = req->rq_import;
struct obd_statfs *msfs;
+ struct obd_statfs *sfs;
struct osp_device *d;
u64 maxage_ns;
jiffies + cfs_time_seconds(d->opd_statfs_maxage));
d->opd_statfs_update_in_progress = 0;
- CDEBUG(D_CACHE, "updated statfs %p\n", d);
+ sfs = &d->opd_statfs;
+ CDEBUG(D_CACHE, "%s (%p): %llu blocks, %llu free, %llu avail, "
+ "%u bsize, %u reserved mb low, %u reserved mb high,"
+ "%llu files, %llu free files\n", d->opd_obd->obd_name, d,
+ sfs->os_blocks, sfs->os_bfree, sfs->os_bavail, sfs->os_bsize,
+ d->opd_reserved_mb_low, d->opd_reserved_mb_high,
+ sfs->os_files, sfs->os_ffree);
RETURN(0);
out:
wait_mds_ost_sync
# First component is on OST0
- $LFS setstripe -E 256M -i 0 -z 64M -E -1 -z 1G $comp_file ||
+ $LFS setstripe -E 256M -i 0 -z 64M -E -1 -z 128M $comp_file ||
error "Create $comp_file failed"
# write past end of first component, so it is extended
$LFS getstripe $comp_file
flg_opts="--comp-flags init"
- found=$($LFS find --comp-start 128M -E 1152M $flg_opts $comp_file | \
+ found=$($LFS find --comp-start 128M -E 256M $flg_opts $comp_file | \
wc -l)
[ $found -eq 1 ] || error "Write: third component not found"
flg_opts="--comp-flags extension"
- found=$($LFS find --comp-start 1152M -E EOF $flg_opts $comp_file |wc -l)
+ found=$($LFS find --comp-start 256M -E EOF $flg_opts $comp_file |wc -l)
[ $found -eq 1 ] || error "Write: fourth extension component not found"
sel_layout_sanity $comp_file 3
# normal component to 10M, extendable component to 1G
# further extendable to EOF
- $LFS setstripe -E 10M -E 1G -p $TESTNAME -z 64M -E -1 -p "" -z 512M \
+ $LFS setstripe -E 10M -E 1G -p $TESTNAME -z 64M -E -1 -p "" -z 128M \
$comp_file || error "Create $comp_file failed"
replay_barrier $SINGLEMDS
[ $found -eq 0 ] || error "Write: zero length component still present"
flg_opts="--comp-flags init"
- found=$($LFS find --comp-start 10M -E 522M $flg_opts $comp_file | wc -l)
+ found=$($LFS find --comp-start 10M -E 138M $flg_opts $comp_file | wc -l)
[ $found -eq 1 ] || error "Write: second component not found"
flg_opts="--comp-flags extension"
- found=$($LFS find --comp-start 522M -E EOF $flg_opts $comp_file | wc -l)
+ found=$($LFS find --comp-start 138M -E EOF $flg_opts $comp_file | wc -l)
[ $found -eq 1 ] || error "Write: third component not found"
fail $SINGLEMDS
[ $found -eq 0 ] || error "Failover: 0-length component still present"
flg_opts="--comp-flags init"
- found=$($LFS find --comp-start 10M -E 522M $flg_opts $comp_file | wc -l)
+ found=$($LFS find --comp-start 10M -E 138M $flg_opts $comp_file | wc -l)
[ $found -eq 1 ] || error "Failover: second component not found"
flg_opts="--comp-flags extension"
- found=$($LFS find --comp-start 522M -E EOF $flg_opts $comp_file | wc -l)
+ found=$($LFS find --comp-start 138M -E EOF $flg_opts $comp_file | wc -l)
[ $found -eq 1 ] || error "Failover: third component not found"
sel_layout_sanity $comp_file 3
test_mkdir -p $DIR/$tdir
# DoM, extendable component, further extendable component
- $LFS setstripe -E 1M -L mdt -E 256M -i 0 -z 64M -E -1 -z 1G \
+ $LFS setstripe -E 1M -L mdt -E 256M -i 0 -z 64M -E -1 -z 128M \
$comp_file || error "Create $comp_file failed"
found=$($LFS find --comp-start 1M -E 1M $flg_opts $comp_file | wc -l)
[ $found -eq 0 ] || error "Write: Zero length component still present"
flg_opts="--comp-flags init"
- found=$($LFS find --comp-start 1M -E 1025M $flg_opts $comp_file | wc -l)
+ found=$($LFS find --comp-start 1M -E 129M $flg_opts $comp_file | wc -l)
[ $found -eq 1 ] || error "Write: extended component not found"
flg_opts="--comp-flags extension"
- found=$($LFS find --comp-start 1025M -E EOF $flg_opts $comp_file |wc -l)
+ found=$($LFS find --comp-start 129M -E EOF $flg_opts $comp_file |wc -l)
[ $found -eq 1 ] || error "Write: extension component not found"
sel_layout_sanity $comp_file 3