Whamcloud - gitweb
git://git.whamcloud.com
/
fs
/
lustre-release.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
LU-13366 lod: check for extension size at instantiation time
[fs/lustre-release.git]
/
lustre
/
lod
/
lod_qos.c
diff --git
a/lustre/lod/lod_qos.c
b/lustre/lod/lod_qos.c
index
69221ce
..
644ced9
100644
(file)
--- a/
lustre/lod/lod_qos.c
+++ b/
lustre/lod/lod_qos.c
@@
-65,17
+65,17
@@
static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
{
struct obd_statfs *sfs = &tgt->ltd_statfs;
- if (((sfs->os_state & OS_STAT
E
_ENOSPC) ||
- (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
E
_ENOINO &&
+ if (((sfs->os_state & OS_STAT
FS
_ENOSPC) ||
+ (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
FS
_ENOINO &&
sfs->os_fprecreated == 0)))
return -ENOSPC;
/* If the OST is readonly then we can't allocate objects there */
- if (sfs->os_state & OS_STAT
E
_READONLY)
+ if (sfs->os_state & OS_STAT
FS
_READONLY)
return -EROFS;
/* object precreation is skipped on the OST with max_create_count=0 */
- if (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
E
_NOPRECREATE)
+ if (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
FS
_NOPRECREATE)
return -ENOBUFS;
return 0;
@@
-100,8
+100,9
@@
static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
*/
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
struct lu_tgt_descs *ltd,
- struct lu_tgt_desc *tgt)
+ struct lu_tgt_desc *tgt
, __u64 reserve
)
{
+ struct obd_statfs_info info = { 0 };
struct lov_desc *desc = <d->ltd_lov_desc;
int rc;
ENTRY;
@@
-109,7
+110,8
@@
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
LASSERT(d);
LASSERT(tgt);
- rc = dt_statfs(env, tgt->ltd_tgt, &tgt->ltd_statfs);
+ info.os_enable_pre = 1;
+ rc = dt_statfs_info(env, tgt->ltd_tgt, &tgt->ltd_statfs, &info);
if (rc && rc != -ENOTCONN)
CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc);
@@
-119,6
+121,11
@@
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
return rc;
}
+ if (reserve &&
+ (reserve + (info.os_reserved_mb_low << 20) >
+ tgt->ltd_statfs.os_bavail * tgt->ltd_statfs.os_bsize))
+ return -ENOSPC;
+
/* check whether device has changed state (active, inactive) */
if (rc != 0 && tgt->ltd_active) {
/* turned inactive? */
@@
-211,7
+218,7
@@
void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod,
ltd_foreach_tgt(ltd, tgt) {
avail = tgt->ltd_statfs.os_bavail;
- if (lod_statfs_and_check(env, lod, ltd, tgt))
+ if (lod_statfs_and_check(env, lod, ltd, tgt
, 0
))
continue;
if (tgt->ltd_statfs.os_bavail != avail)
@@
-296,8
+303,8
@@
static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd,
for (i = 0; i < lqr->lqr_pool.op_count; i++) {
int next;
- if (!
cfs_bitmap_check(ltd->ltd_tgt_bitmap
,
-
src_pool->op_array[i]
))
+ if (!
test_bit(src_pool->op_array[i]
,
+
ltd->ltd_tgt_bitmap
))
continue;
tgt = LTD_TGT(ltd, src_pool->op_array[i]);
@@
-387,7
+394,7
@@
static struct dt_object *lod_qos_declare_object_on(const struct lu_env *env,
*/
o = lu_object_anon(env, nd, NULL);
if (IS_ERR(o))
- GOTO(out, dt = ERR_
PTR(PTR_ERR(o)
));
+ GOTO(out, dt = ERR_
CAST(o
));
n = lu_object_locate(o->lo_header, nd->ld_type);
if (unlikely(n == NULL)) {
@@
-572,7
+579,7
@@
static inline bool lod_should_avoid_ost(struct lod_object *lo,
bool used = false;
int i;
- if (!
cfs_bitmap_check(lod->lod_ost_bitmap, index
)) {
+ if (!
test_bit(index, lod->lod_ost_bitmap
)) {
QOS_DEBUG("OST%d: been used in conflicting mirror component\n",
index);
return true;
@@
-615,7
+622,8
@@
static int lod_check_and_reserve_ost(const struct lu_env *env,
struct dt_object **stripe,
__u32 *ost_indices,
struct thandle *th,
- bool *overstriped)
+ bool *overstriped,
+ __u64 reserve)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
@@
-626,7
+634,7
@@
static int lod_check_and_reserve_ost(const struct lu_env *env,
ENTRY;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost
, reserve
);
if (rc)
RETURN(rc);
@@
-642,7
+650,7
@@
static int lod_check_and_reserve_ost(const struct lu_env *env,
/*
* try to use another OSP if this one is degraded
*/
- if (ost->ltd_statfs.os_state & OS_STAT
E
_DEGRADED && speed < 2) {
+ if (ost->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED && speed < 2) {
QOS_DEBUG("#%d: degraded\n", ost_idx);
RETURN(rc);
}
@@
-725,7
+733,8
@@
static int lod_check_and_reserve_ost(const struct lu_env *env,
*/
static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@
-808,7
+817,7
@@
repeat_find:
stripe_idx, array_idx, ost_idx);
if ((ost_idx == LOV_QOS_EMPTY) ||
- !
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
))
+ !
test_bit(ost_idx, m->lod_ost_bitmap
))
continue;
/* Fail Check before osc_precreate() is called
@@
-819,7
+828,8
@@
repeat_find:
spin_unlock(&lqr->lqr_alloc);
rc = lod_check_and_reserve_ost(env, lo, lod_comp, ost_idx,
speed, &stripe_idx, stripe,
- ost_indices, th, &overstriped);
+ ost_indices, th, &overstriped,
+ reserve);
spin_lock(&lqr->lqr_alloc);
if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
@@
-890,7
+900,7
@@
lod_qos_mdt_in_use_init(const struct lu_env *env,
for (j = 0; j < pool->op_count; j++) {
mdt_idx = pool->op_array[j];
- if (!
cfs_bitmap_check(ltd->ltd_tgt_bitmap, mdt_idx
))
+ if (!
test_bit(mdt_idx, ltd->ltd_tgt_bitmap
))
continue;
mdt = LTD_TGT(ltd, mdt_idx);
@@
-993,7
+1003,7
@@
repeat_find:
stripe_idx, pool_idx, mdt_idx);
if (mdt_idx == LOV_QOS_EMPTY ||
- !
cfs_bitmap_check(ltd->ltd_tgt_bitmap, mdt_idx
))
+ !
test_bit(mdt_idx, ltd->ltd_tgt_bitmap
))
continue;
/* do not put >1 objects on one MDT */
@@
-1008,14
+1018,14
@@
repeat_find:
}
/* try to use another OSP if this one is degraded */
- if (mdt->ltd_statfs.os_state & OS_STAT
E
_DEGRADED &&
+ if (mdt->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED &&
!use_degraded) {
QOS_DEBUG("#%d: degraded\n", mdt_idx);
continue;
}
spin_unlock(&lqr->lqr_alloc);
- rc =
obd_fid_alloc(env, mdt->ltd_exp, &fid
, NULL);
+ rc =
dt_fid_alloc(env, mdt->ltd_tgt, &fid, NULL
, NULL);
if (rc < 0) {
QOS_DEBUG("#%d: alloc FID failed: %dl\n", mdt_idx, rc);
spin_lock(&lqr->lqr_alloc);
@@
-1089,7
+1099,7
@@
repeat_find:
*/
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- struct thandle *th, int comp_idx)
+ struct thandle *th, int comp_idx
, __u64 reserve
)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@
-1132,7
+1142,7
@@
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
i++, array_idx = (array_idx + 1) % lod_comp->llc_stripe_count) {
__u32 ost_idx = lod_comp->llc_ostlist.op_array[array_idx];
- if (!
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
)) {
+ if (!
test_bit(ost_idx, m->lod_ost_bitmap
)) {
rc = -ENODEV;
break;
}
@@
-1147,7
+1157,8
@@
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
}
rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
- LTD_TGT(&m->lod_ost_descs, ost_idx));
+ LTD_TGT(&m->lod_ost_descs, ost_idx),
+ reserve);
if (rc < 0) /* this OSP doesn't feel well */
break;
@@
-1201,7
+1212,8
@@
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
static int lod_ost_alloc_specific(const struct lu_env *env,
struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@
-1260,7
+1272,7
@@
repeat_find:
i++, array_idx = (array_idx + 1) % ost_count) {
ost_idx = osts->op_array[array_idx];
- if (!
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
))
+ if (!
test_bit(ost_idx, m->lod_ost_bitmap
))
continue;
/* Fail Check before osc_precreate() is called
@@
-1294,7
+1306,8
@@
repeat_find:
* start OST, then it can be skipped, otherwise skip it only
* if it is inactive/recovering/out-of-space." */
- rc = lod_statfs_and_check(env, m, &m->lod_ost_descs, tgt);
+ rc = lod_statfs_and_check(env, m, &m->lod_ost_descs,
+ tgt, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
@@
-1395,7
+1408,8
@@
out:
*/
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
struct dt_object **stripe, __u32 *ost_indices,
- int flags, struct thandle *th, int comp_idx)
+ int flags, struct thandle *th, int comp_idx,
+ __u64 reserve)
{
struct lod_layout_component *lod_comp;
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
@@
-1458,19
+1472,20
@@
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
good_osts = 0;
/* Find all the OSTs that are valid stripe candidates */
for (i = 0; i < osts->op_count; i++) {
- if (!
cfs_bitmap_check(lod->lod_ost_bitmap, osts->op_array[i]
))
+ if (!
test_bit(osts->op_array[i], lod->lod_ost_bitmap
))
continue;
ost = OST_TGT(lod, osts->op_array[i]);
ost->ltd_qos.ltq_usable = 0;
- rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs, ost);
+ rc = lod_statfs_and_check(env, lod, &lod->lod_ost_descs,
+ ost, reserve);
if (rc) {
/* this OSP doesn't feel well */
continue;
}
- if (ost->ltd_statfs.os_state & OS_STAT
E
_DEGRADED)
+ if (ost->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED)
continue;
/* Fail Check before osc_precreate() is called
@@
-1702,7
+1717,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
good_mdts = 0;
/* Find all the MDTs that are valid stripe candidates */
for (i = 0; i < pool->op_count; i++) {
- if (!
cfs_bitmap_check(ltd->ltd_tgt_bitmap, pool->op_array[i]
))
+ if (!
test_bit(pool->op_array[i], ltd->ltd_tgt_bitmap
))
continue;
mdt = LTD_TGT(ltd, pool->op_array[i]);
@@
-1712,7
+1727,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
if (rc)
continue;
- if (mdt->ltd_statfs.os_state & OS_STAT
E
_DEGRADED)
+ if (mdt->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED)
continue;
mdt->ltd_qos.ltq_usable = 1;
@@
-1762,7
+1777,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
if (lod_qos_is_tgt_used(env, mdt_idx, stripe_idx))
continue;
- rc2 =
obd_fid_alloc(env, mdt->ltd_exp, &fid
, NULL);
+ rc2 =
dt_fid_alloc(env, mdt->ltd_tgt, &fid, NULL
, NULL);
if (rc2 < 0) {
QOS_DEBUG("can't alloc FID on #%u: %d\n",
mdt_idx, rc2);
@@
-2306,8
+2321,7
@@
int lod_prepare_avoidance(const struct lu_env *env, struct lod_object *lo)
lag->lag_oaa_count = 0;
if (lag->lag_oss_avoid_array &&
lag->lag_oaa_size < lod->lod_ost_count) {
- OBD_FREE(lag->lag_oss_avoid_array,
- sizeof(__u32) * lag->lag_oaa_size);
+ OBD_FREE_PTR_ARRAY(lag->lag_oss_avoid_array, lag->lag_oaa_size);
lag->lag_oss_avoid_array = NULL;
lag->lag_oaa_size = 0;
}
@@
-2335,7
+2349,7
@@
int lod_prepare_avoidance(const struct lu_env *env, struct lod_object *lo)
* using OST count to allocate the array to store the OSS
* id.
*/
- OBD_ALLOC
(new_oss, sizeof(*new_oss) *
lod->lod_ost_count);
+ OBD_ALLOC
_PTR_ARRAY(new_oss,
lod->lod_ost_count);
if (!new_oss) {
CFS_FREE_BITMAP(bitmap);
return -ENOMEM;
@@
-2451,7
+2465,7
@@
void lod_collect_avoidance(struct lod_object *lo, struct lod_avoid_guide *lag,
*/
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
struct lu_attr *attr, struct thandle *th,
- int comp_idx)
+ int comp_idx
, __u64 reserve
)
{
struct lod_layout_component *lod_comp;
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
@@
-2494,13
+2508,14
@@
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
if (stripe_len == 0)
GOTO(out, rc = -ERANGE);
lod_comp->llc_stripe_count = stripe_len;
- OBD_ALLOC
(stripe, sizeof(stripe[0]) *
stripe_len);
+ OBD_ALLOC
_PTR_ARRAY(stripe,
stripe_len);
if (stripe == NULL)
GOTO(out, rc = -ENOMEM);
- OBD_ALLOC
(ost_indices, sizeof(*ost_indices) *
stripe_len);
+ OBD_ALLOC
_PTR_ARRAY(ost_indices,
stripe_len);
if (!ost_indices)
GOTO(out, rc = -ENOMEM);
+repeat:
lod_getref(&d->lod_ost_descs);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripe_count %d\n",
@@
-2509,7
+2524,7
@@
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
if (lod_comp->llc_ostlist.op_array &&
lod_comp->llc_ostlist.op_count) {
rc = lod_alloc_ost_list(env, lo, stripe, ost_indices,
- th, comp_idx);
+ th, comp_idx
, reserve
);
} else if (lod_comp->llc_stripe_offset == LOV_OFFSET_DEFAULT) {
/**
* collect OSTs and OSSs used in other mirrors whose
@@
-2524,15
+2539,15
@@
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
lod_collect_avoidance(lo, lag, comp_idx);
rc = lod_ost_alloc_qos(env, lo, stripe, ost_indices,
- flag, th, comp_idx);
+ flag, th, comp_idx
, reserve
);
if (rc == -EAGAIN)
rc = lod_ost_alloc_rr(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx
, reserve
);
} else {
rc = lod_ost_alloc_specific(env, lo, stripe,
ost_indices, flag, th,
- comp_idx);
+ comp_idx
, reserve
);
}
put_ldts:
lod_putref(d, &d->lod_ost_descs);
@@
-2540,6
+2555,15
@@
put_ldts:
for (i = 0; i < stripe_len; i++)
if (stripe[i] != NULL)
dt_object_put(env, stripe[i]);
+
+ /* In case there is no space on any OST, let's ignore
+ * the @reserve space to avoid an error at the init
+ * time, probably the actual IO will be less than the
+ * given @reserve space (aka extension_size). */
+ if (reserve) {
+ reserve = 0;
+ goto repeat;
+ }
lod_comp->llc_stripe_count = 0;
} else {
lod_comp->llc_stripe = stripe;
@@
-2576,10
+2600,9
@@
put_ldts:
out:
if (rc < 0) {
if (stripe)
- OBD_FREE
(stripe, sizeof(stripe[0]) *
stripe_len);
+ OBD_FREE
_PTR_ARRAY(stripe,
stripe_len);
if (ost_indices)
- OBD_FREE(ost_indices,
- sizeof(*ost_indices) * stripe_len);
+ OBD_FREE_PTR_ARRAY(ost_indices, stripe_len);
}
RETURN(rc);
}
@@
-2631,7
+2654,7
@@
int lod_prepare_create(const struct lu_env *env, struct lod_object *lo,
extent = &lod_comp->llc_extent;
QOS_DEBUG("comp[%d] %lld "DEXT"\n", i, size, PEXT(extent));
if (!lo->ldo_is_composite || size >= extent->e_start) {
- rc = lod_qos_prep_create(env, lo, attr, th, i);
+ rc = lod_qos_prep_create(env, lo, attr, th, i
, 0
);
if (rc)
break;
}