Whamcloud - gitweb
git://git.whamcloud.com
/
fs
/
lustre-release.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
LU-9859 libcfs: move tgt_descs to standard Linux bitmaps.
[fs/lustre-release.git]
/
lustre
/
lod
/
lod_qos.c
diff --git
a/lustre/lod/lod_qos.c
b/lustre/lod/lod_qos.c
index
a027947
..
7fe0826
100644
(file)
--- a/
lustre/lod/lod_qos.c
+++ b/
lustre/lod/lod_qos.c
@@
-65,17
+65,17
@@
static inline int lod_statfs_check(struct lu_tgt_descs *ltd,
{
struct obd_statfs *sfs = &tgt->ltd_statfs;
- if (((sfs->os_state & OS_STAT
E
_ENOSPC) ||
- (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
E
_ENOINO &&
+ if (((sfs->os_state & OS_STAT
FS
_ENOSPC) ||
+ (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
FS
_ENOINO &&
sfs->os_fprecreated == 0)))
return -ENOSPC;
/* If the OST is readonly then we can't allocate objects there */
- if (sfs->os_state & OS_STAT
E
_READONLY)
+ if (sfs->os_state & OS_STAT
FS
_READONLY)
return -EROFS;
/* object precreation is skipped on the OST with max_create_count=0 */
- if (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
E
_NOPRECREATE)
+ if (!ltd->ltd_is_mdt && sfs->os_state & OS_STAT
FS
_NOPRECREATE)
return -ENOBUFS;
return 0;
@@
-104,6
+104,7
@@
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
{
struct lov_desc *desc = <d->ltd_lov_desc;
int rc;
+ ENTRY;
LASSERT(d);
LASSERT(tgt);
@@
-152,8
+153,15
@@
static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
}
spin_unlock(&d->lod_lock);
}
+ if (rc == -ENOTCONN) {
+ /* In case that the ENOTCONN for inactive OST state is
+ * mistreated as MDT disconnection state by the client,
+ * this error should be changed to someone else.
+ */
+ rc = -EREMOTEIO;
+ }
-
return rc
;
+
RETURN(rc)
;
}
static int lod_is_tgt_usable(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
@@
-272,7
+280,7
@@
static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd,
deleting from the pool. The lq_rw_sem insures that nobody else
is reading. */
lqr->lqr_pool.op_count = real_count;
- rc =
lod_
tgt_pool_extend(&lqr->lqr_pool, real_count);
+ rc = tgt_pool_extend(&lqr->lqr_pool, real_count);
if (rc) {
up_write(<d->ltd_qos.lq_rw_sem);
RETURN(rc);
@@
-288,8
+296,8
@@
static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd,
for (i = 0; i < lqr->lqr_pool.op_count; i++) {
int next;
- if (!
cfs_bitmap_check(ltd->ltd_tgt_bitmap
,
-
src_pool->op_array[i]
))
+ if (!
test_bit(src_pool->op_array[i]
,
+
ltd->ltd_tgt_bitmap
))
continue;
tgt = LTD_TGT(ltd, src_pool->op_array[i]);
@@
-442,7
+450,7
@@
static inline int lod_qos_tgt_in_use_clear(const struct lu_env *env,
if (info->lti_ea_store_size < sizeof(int) * stripes)
lod_ea_store_resize(info, stripes * sizeof(int));
if (info->lti_ea_store_size < sizeof(int) * stripes) {
- CERROR("can't allocate memory for
os
t-in-use array\n");
+ CERROR("can't allocate memory for
tg
t-in-use array\n");
return -ENOMEM;
}
memset(info->lti_ea_store, -1, sizeof(int) * stripes);
@@
-564,7
+572,7
@@
static inline bool lod_should_avoid_ost(struct lod_object *lo,
bool used = false;
int i;
- if (!
cfs_bitmap_check(lod->lod_ost_bitmap, index
)) {
+ if (!
test_bit(index, lod->lod_ost_bitmap
)) {
QOS_DEBUG("OST%d: been used in conflicting mirror component\n",
index);
return true;
@@
-634,7
+642,7
@@
static int lod_check_and_reserve_ost(const struct lu_env *env,
/*
* try to use another OSP if this one is degraded
*/
- if (ost->ltd_statfs.os_state & OS_STAT
E
_DEGRADED && speed < 2) {
+ if (ost->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED && speed < 2) {
QOS_DEBUG("#%d: degraded\n", ost_idx);
RETURN(rc);
}
@@
-800,7
+808,7
@@
repeat_find:
stripe_idx, array_idx, ost_idx);
if ((ost_idx == LOV_QOS_EMPTY) ||
- !
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
))
+ !
test_bit(ost_idx, m->lod_ost_bitmap
))
continue;
/* Fail Check before osc_precreate() is called
@@
-857,6
+865,44
@@
out:
RETURN(rc);
}
+static int
+lod_qos_mdt_in_use_init(const struct lu_env *env,
+ const struct lu_tgt_descs *ltd,
+ u32 stripe_idx, u32 stripe_count,
+ const struct lu_tgt_pool *pool,
+ struct dt_object **stripes)
+{
+ u32 mdt_idx;
+ struct lu_tgt_desc *mdt;
+ int i, j;
+ int rc;
+
+ rc = lod_qos_tgt_in_use_clear(env, stripe_count);
+ if (rc)
+ return rc;
+
+ /* if stripe_idx > 1, we are splitting directory, mark existing stripes
+ * in_use. Because for either split or creation, stripe 0 is local,
+ * don't mark it in use.
+ */
+ for (i = 1; i < stripe_idx; i++) {
+ LASSERT(stripes[i]);
+ for (j = 0; j < pool->op_count; j++) {
+ mdt_idx = pool->op_array[j];
+
+ if (!test_bit(mdt_idx, ltd->ltd_tgt_bitmap))
+ continue;
+
+ mdt = LTD_TGT(ltd, mdt_idx);
+ if (&mdt->ltd_tgt->dd_lu_dev ==
+ stripes[i]->do_lu.lo_dev)
+ lod_qos_tgt_in_use(env, i, mdt_idx);
+ }
+ }
+
+ return 0;
+}
+
/**
* Allocate a striping using round-robin algorithm.
*
@@
-870,7
+916,7
@@
out:
*
* \param[in] env execution environment for this thread
* \param[in] lo LOD object
- * \param[out] stripe
striping created
+ * \param[out] stripe
s
striping created
*
* \retval positive stripe objects allocated, including the first stripe
* allocated outside
@@
-878,7
+924,8
@@
out:
* \retval negative negated errno for other failures
*/
int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
- struct dt_object **stripe)
+ struct dt_object **stripes, u32 stripe_idx,
+ u32 stripe_count)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
@@
-890,9
+937,8
@@
int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
struct dt_object *dto;
unsigned int pool_idx;
unsigned int i;
- u32 start_idx_temp;
- u32 stripe_count = lo->ldo_dir_stripe_count;
- u32 stripe_idx = 1;
+ u32 saved_idx = stripe_idx;
+ u32 start_mdt;
u32 mdt_idx;
bool use_degraded = false;
int tgt_connecting = 0;
@@
-906,7
+952,8
@@
int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
if (rc)
RETURN(rc);
- rc = lod_qos_tgt_in_use_clear(env, stripe_count);
+ rc = lod_qos_mdt_in_use_init(env, ltd, stripe_idx, stripe_count, pool,
+ stripes);
if (rc)
RETURN(rc);
@@
-927,10
+974,10
@@
int lod_mdt_alloc_rr(const struct lu_env *env, struct lod_object *lo,
(pool->op_count % (stripe_count - 1)) != 1)
++lqr->lqr_offset_idx;
}
- start_
idx_temp
= lqr->lqr_start_idx;
+ start_
mdt
= lqr->lqr_start_idx;
repeat_find:
- QOS_DEBUG("want
%d start_idx %d start_count %d offset %d active %d count
%d\n",
+ QOS_DEBUG("want
=%d start_idx=%d start_count=%d offset=%d active=%d count=
%d\n",
stripe_count - 1, lqr->lqr_start_idx, lqr->lqr_start_count,
lqr->lqr_offset_idx, pool->op_count, pool->op_count);
@@
-946,7
+993,7
@@
repeat_find:
stripe_idx, pool_idx, mdt_idx);
if (mdt_idx == LOV_QOS_EMPTY ||
- !
cfs_bitmap_check(ltd->ltd_tgt_bitmap, mdt_idx
))
+ !
test_bit(mdt_idx, ltd->ltd_tgt_bitmap
))
continue;
/* do not put >1 objects on one MDT */
@@
-961,15
+1008,15
@@
repeat_find:
}
/* try to use another OSP if this one is degraded */
- if (mdt->ltd_statfs.os_state & OS_STAT
E
_DEGRADED &&
+ if (mdt->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED &&
!use_degraded) {
QOS_DEBUG("#%d: degraded\n", mdt_idx);
continue;
}
spin_unlock(&lqr->lqr_alloc);
- rc =
obd_fid_alloc(env, mdt->ltd_exp, &fid
, NULL);
- if (rc) {
+ rc =
dt_fid_alloc(env, mdt->ltd_tgt, &fid, NULL
, NULL);
+ if (rc
< 0
) {
QOS_DEBUG("#%d: alloc FID failed: %dl\n", mdt_idx, rc);
spin_lock(&lqr->lqr_alloc);
continue;
@@
-990,14
+1037,13
@@
repeat_find:
}
lod_qos_tgt_in_use(env, stripe_idx, mdt_idx);
- stripe[stripe_idx] = dto;
- stripe_idx++;
+ stripes[stripe_idx++] = dto;
}
if (!use_degraded && stripe_idx < stripe_count) {
- /* Try again, allowing slower
OSC
s */
+ /* Try again, allowing slower
MDT
s */
use_degraded = true;
- lqr->lqr_start_idx = start_
idx_temp
;
+ lqr->lqr_start_idx = start_
mdt
;
tgt_connecting = 0;
goto repeat_find;
@@
-1005,7
+1051,7
@@
repeat_find:
spin_unlock(&lqr->lqr_alloc);
up_read(<d->ltd_qos.lq_rw_sem);
- if (stripe_idx >
1
)
+ if (stripe_idx >
saved_idx
)
/* at least one stripe is allocated */
RETURN(stripe_idx);
@@
-1086,7
+1132,7
@@
static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
i++, array_idx = (array_idx + 1) % lod_comp->llc_stripe_count) {
__u32 ost_idx = lod_comp->llc_ostlist.op_array[array_idx];
- if (!
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
)) {
+ if (!
test_bit(ost_idx, m->lod_ost_bitmap
)) {
rc = -ENODEV;
break;
}
@@
-1214,7
+1260,7
@@
repeat_find:
i++, array_idx = (array_idx + 1) % ost_count) {
ost_idx = osts->op_array[array_idx];
- if (!
cfs_bitmap_check(m->lod_ost_bitmap, ost_idx
))
+ if (!
test_bit(ost_idx, m->lod_ost_bitmap
))
continue;
/* Fail Check before osc_precreate() is called
@@
-1412,7
+1458,7
@@
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
good_osts = 0;
/* Find all the OSTs that are valid stripe candidates */
for (i = 0; i < osts->op_count; i++) {
- if (!
cfs_bitmap_check(lod->lod_ost_bitmap, osts->op_array[i]
))
+ if (!
test_bit(osts->op_array[i], lod->lod_ost_bitmap
))
continue;
ost = OST_TGT(lod, osts->op_array[i]);
@@
-1424,7
+1470,7
@@
static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
continue;
}
- if (ost->ltd_statfs.os_state & OS_STAT
E
_DEGRADED)
+ if (ost->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED)
continue;
/* Fail Check before osc_precreate() is called
@@
-1590,6
+1636,9
@@
out_nolock:
*
* \param[in] env execution environment for this thread
* \param[in] lo LOD object
+ * \param[in] stripe_idx starting stripe index to allocate, if it's not
+ * 0, we are restriping directory
+ * \param[in] stripe_count total stripe count
* \param[out] stripes striping created
*
* \retval positive stripes allocated, and it should be equal to
@@
-1599,7
+1648,8
@@
out_nolock:
* \retval negative errno on failure
*/
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
- struct dt_object **stripes)
+ struct dt_object **stripes, u32 stripe_idx,
+ u32 stripe_count)
{
struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lu_tgt_descs *ltd = &lod->lod_mdt_descs;
@@
-1609,23
+1659,32
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
struct lu_tgt_desc *mdt;
struct dt_object *dto;
u64 total_weight = 0;
- u32 s
tripe_count = lo->ldo_dir_stripe_count
;
- u
nsigned int nfound
;
+ u32 s
aved_idx = stripe_idx
;
+ u
32 mdt_idx
;
unsigned int good_mdts;
unsigned int i;
int rc = 0;
ENTRY;
- if (stripe_count == 1)
- RETURN(1);
+ LASSERT(stripe_idx <= stripe_count);
+ if (stripe_idx == stripe_count)
+ RETURN(stripe_count);
+ /* use MDT pool in @ltd, once MDT pool is supported in the future, it
+ * can be passed in as argument like OST object allocation.
+ */
pool = <d->ltd_tgt_pool;
/* Detect -EAGAIN early, before expensive lock is taken. */
if (!ltd_qos_is_usable(ltd))
RETURN(-EAGAIN);
+ rc = lod_qos_mdt_in_use_init(env, ltd, stripe_idx, stripe_count, pool,
+ stripes);
+ if (rc)
+ RETURN(rc);
+
/* Do actual allocation, use write lock here. */
down_write(<d->ltd_qos.lq_rw_sem);
@@
-1640,14
+1699,10
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
if (rc)
GOTO(unlock, rc);
- rc = lod_qos_tgt_in_use_clear(env, stripe_count);
- if (rc)
- GOTO(unlock, rc);
-
good_mdts = 0;
- /* Find all the
tgt
s that are valid stripe candidates */
+ /* Find all the
MDT
s that are valid stripe candidates */
for (i = 0; i < pool->op_count; i++) {
- if (!
cfs_bitmap_check(ltd->ltd_tgt_bitmap, pool->op_array[i]
))
+ if (!
test_bit(pool->op_array[i], ltd->ltd_tgt_bitmap
))
continue;
mdt = LTD_TGT(ltd, pool->op_array[i]);
@@
-1657,7
+1712,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
if (rc)
continue;
- if (mdt->ltd_statfs.os_state & OS_STAT
E
_DEGRADED)
+ if (mdt->ltd_statfs.os_state & OS_STAT
FS
_DEGRADED)
continue;
mdt->ltd_qos.ltq_usable = 1;
@@
-1667,14
+1722,13
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
good_mdts++;
}
- QOS_DEBUG("found %d good
tgt
s\n", good_mdts);
+ QOS_DEBUG("found %d good
MDT
s\n", good_mdts);
- if (good_mdts < stripe_count -
1
)
+ if (good_mdts < stripe_count -
stripe_idx
)
GOTO(unlock, rc = -EAGAIN);
- /* Find enough tgts with weighted random allocation. */
- nfound = 1;
- while (nfound < stripe_count) {
+ /* Find enough MDTs with weighted random allocation. */
+ while (stripe_idx < stripe_count) {
u64 rand, cur_weight;
cur_weight = 0;
@@
-1682,35
+1736,36
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
rand = lu_prandom_u64_max(total_weight);
- /* On average, this will hit larger-weighted
tgt
s more often.
- * 0-weight
tgts
will always get used last (only when rand=0) */
+ /* On average, this will hit larger-weighted
MDT
s more often.
+ * 0-weight
MDT
will always get used last (only when rand=0) */
for (i = 0; i < pool->op_count; i++) {
- __u32 idx = pool->op_array[i];
int rc2;
- mdt = LTD_TGT(ltd, idx);
+ mdt_idx = pool->op_array[i];
+ mdt = LTD_TGT(ltd, mdt_idx);
if (!mdt->ltd_qos.ltq_usable)
continue;
cur_weight += mdt->ltd_qos.ltq_weight;
- QOS_DEBUG("
idx=%d nfound
=%d cur_weight=%llu rand=%llu total_weight=%llu\n",
-
idx, nfound
, cur_weight, rand,
+ QOS_DEBUG("
stripe_count=%d stripe_index
=%d cur_weight=%llu rand=%llu total_weight=%llu\n",
+
stripe_count, stripe_idx
, cur_weight, rand,
total_weight);
if (cur_weight < rand)
continue;
- QOS_DEBUG("stripe=%d to idx=%d\n", nfound, idx);
+ QOS_DEBUG("stripe=%d to idx=%d\n",
+ stripe_idx, mdt_idx);
- if (lod_qos_is_tgt_used(env,
idx, nfound
))
+ if (lod_qos_is_tgt_used(env,
mdt_idx, stripe_idx
))
continue;
- rc2 =
obd_fid_alloc(env, mdt->ltd_exp, &fid
, NULL);
- if (rc2) {
+ rc2 =
dt_fid_alloc(env, mdt->ltd_tgt, &fid, NULL
, NULL);
+ if (rc2
< 0
) {
QOS_DEBUG("can't alloc FID on #%u: %d\n",
- idx, rc2);
+
mdt_
idx, rc2);
continue;
}
@@
-1720,14
+1775,14
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
&conf);
if (IS_ERR(dto)) {
QOS_DEBUG("can't alloc stripe on #%u: %d\n",
- idx, (int) PTR_ERR(dto));
+
mdt_
idx, (int) PTR_ERR(dto));
continue;
}
- lod_qos_tgt_in_use(env,
nfound,
idx);
- stripes[
nfound
] = dto;
+ lod_qos_tgt_in_use(env,
stripe_idx, mdt_
idx);
+ stripes[
stripe_idx
] = dto;
ltd_qos_update(ltd, mdt, &total_weight);
-
nfound
++;
+
stripe_idx
++;
rc = 0;
break;
}
@@
-1737,7
+1792,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
break;
}
- if (unlikely(
nfound
!= stripe_count)) {
+ if (unlikely(
stripe_idx
!= stripe_count)) {
/*
* when the decision to use weighted algorithm was made
* we had enough appropriate OSPs, but this state can
@@
-1746,8
+1801,8
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
* an object due to just changed state
*/
QOS_DEBUG("%s: wanted %d objects, found only %d\n",
- lod2obd(lod)->obd_name, stripe_count,
nfound
);
- for (i =
1; i < nfound
; i++) {
+ lod2obd(lod)->obd_name, stripe_count,
stripe_idx
);
+ for (i =
saved_idx; i < stripe_idx
; i++) {
LASSERT(stripes[i] != NULL);
dt_object_put(env, stripes[i]);
stripes[i] = NULL;
@@
-1759,7
+1814,7
@@
int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
rc = -EAGAIN;
} else {
- rc =
nfound
;
+ rc =
stripe_idx
;
}
unlock:
@@
-2047,7
+2102,7
@@
int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
else
lod_free_comp_entries(lo);
- rc = lod_verify_striping(d, lo, buf, false);
+ rc = lod_verify_striping(
env,
d, lo, buf, false);
if (rc)
RETURN(-EINVAL);
@@
-2180,9
+2235,8
@@
int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
}
lod_comp->llc_pattern = v1->lmm_pattern;
- lod_comp->llc_stripe_size = desc->ld_default_stripe_size;
- if (v1->lmm_stripe_size)
- lod_comp->llc_stripe_size = v1->lmm_stripe_size;
+ lod_comp->llc_stripe_size = v1->lmm_stripe_size;
+ lod_adjust_stripe_size(lod_comp, desc->ld_default_stripe_size);
lod_comp->llc_stripe_count = desc->ld_default_stripe_count;
if (v1->lmm_stripe_count ||
@@
-2252,8
+2306,7
@@
int lod_prepare_avoidance(const struct lu_env *env, struct lod_object *lo)
lag->lag_oaa_count = 0;
if (lag->lag_oss_avoid_array &&
lag->lag_oaa_size < lod->lod_ost_count) {
- OBD_FREE(lag->lag_oss_avoid_array,
- sizeof(__u32) * lag->lag_oaa_size);
+ OBD_FREE_PTR_ARRAY(lag->lag_oss_avoid_array, lag->lag_oaa_size);
lag->lag_oss_avoid_array = NULL;
lag->lag_oaa_size = 0;
}
@@
-2281,7
+2334,7
@@
int lod_prepare_avoidance(const struct lu_env *env, struct lod_object *lo)
* using OST count to allocate the array to store the OSS
* id.
*/
- OBD_ALLOC
(new_oss, sizeof(*new_oss) *
lod->lod_ost_count);
+ OBD_ALLOC
_PTR_ARRAY(new_oss,
lod->lod_ost_count);
if (!new_oss) {
CFS_FREE_BITMAP(bitmap);
return -ENOMEM;
@@
-2440,10
+2493,10
@@
int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
if (stripe_len == 0)
GOTO(out, rc = -ERANGE);
lod_comp->llc_stripe_count = stripe_len;
- OBD_ALLOC
(stripe, sizeof(stripe[0]) *
stripe_len);
+ OBD_ALLOC
_PTR_ARRAY(stripe,
stripe_len);
if (stripe == NULL)
GOTO(out, rc = -ENOMEM);
- OBD_ALLOC
(ost_indices, sizeof(*ost_indices) *
stripe_len);
+ OBD_ALLOC
_PTR_ARRAY(ost_indices,
stripe_len);
if (!ost_indices)
GOTO(out, rc = -ENOMEM);
@@
-2522,10
+2575,9
@@
put_ldts:
out:
if (rc < 0) {
if (stripe)
- OBD_FREE
(stripe, sizeof(stripe[0]) *
stripe_len);
+ OBD_FREE
_PTR_ARRAY(stripe,
stripe_len);
if (ost_indices)
- OBD_FREE(ost_indices,
- sizeof(*ost_indices) * stripe_len);
+ OBD_FREE_PTR_ARRAY(ost_indices, stripe_len);
}
RETURN(rc);
}