From: Bobi Jam Date: Mon, 4 Mar 2013 11:21:15 +0000 (+0800) Subject: LU-2899 lod: get ldo_stripenr correctly X-Git-Tag: 2.3.63~81 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=89b733638db0f137b0b2b499e238284e2cdcc7ec;p=fs%2Flustre-release.git LU-2899 lod: get ldo_stripenr correctly Current code relies on lod_statfs_and_check() to count the number of activated LOD targets, while lod::ldo_stripenr derivation happens before calling lod_statfs_and_check(), and that makes lod::ldo_stripenr not accurate. This patch make sure lod_statfs_and_check() called before updating ::ldo_stripenr. And if there is [de]activation happens on OST target, client needs wait 2*lod_qos_maxage seconds to get accurate ld_active_tgt_count number. Signed-off-by: Bobi Jam Change-Id: I37bebc69f876dd68da609fb5180bc6db36f01e84 Reviewed-on: http://review.whamcloud.com/5573 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Fan Yong Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/libcfs/include/libcfs/libcfs_debug.h b/libcfs/include/libcfs/libcfs_debug.h index 0e8aaa1..7a72eef 100644 --- a/libcfs/include/libcfs/libcfs_debug.h +++ b/libcfs/include/libcfs/libcfs_debug.h @@ -362,10 +362,10 @@ do { \ #endif /* !CDEBUG_ENTRY_EXIT */ -#define RETURN_EXIT \ -do { \ - EXIT_NESTING; \ - return; \ +#define RETURN_EXIT \ +do { \ + EXIT; \ + return; \ } while (0) extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 2b81ad8..29c582a 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -175,7 +175,9 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, spin_unlock(&d->lod_desc_lock); } else if (rc == 0 && ost->ltd_active == 0) { /* turned active? */ - LASSERT(d->lod_desc.ld_active_tgt_count < d->lod_ostnr); + LASSERTF(d->lod_desc.ld_active_tgt_count < d->lod_ostnr, + "active tgt count %d, ost nr %d\n", + d->lod_desc.ld_active_tgt_count, d->lod_ostnr); spin_lock(&d->lod_desc_lock); if (ost->ltd_active == 0) { ost->ltd_active = 1; @@ -188,7 +190,7 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, spin_unlock(&d->lod_desc_lock); } - return rc; + RETURN(rc); } static void lod_qos_statfs_update(const struct lu_env *env, @@ -225,6 +227,7 @@ static void lod_qos_statfs_update(const struct lu_env *env, out: up_write(&lod->lod_qos.lq_rw_sem); + EXIT; } /* Recalculate per-object penalties for OSSs and OSTs, @@ -1384,6 +1387,11 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, * no striping has been created so far */ LASSERT(lo->ldo_stripenr > 0); + /* + * statfs and check OST targets now, since ld_active_tgt_count + * could be changed if some OSTs are [de]activated manually. + */ + lod_qos_statfs_update(env, d); lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC, lo->ldo_stripenr); @@ -1394,8 +1402,9 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, lod_getref(&d->lod_ost_descs); /* XXX: support for non-0 files w/o objects */ + CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n", + d->lod_desc.ld_tgt_count, stripe_len); if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) { - lod_qos_statfs_update(env, d); rc = lod_alloc_qos(env, lo, stripe, flag, th); if (rc == -EAGAIN) rc = lod_alloc_rr(env, lo, stripe, flag, th); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e9753ca..234dd48 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1638,6 +1638,19 @@ test_27y() { do_facet $SINGLEMDS lctl --device %$OSC activate fi done + + # all osp devices get activated, hence -1 stripe count restored + local stripecnt=0 + + # sleep 2*lod_qos_maxage seconds waiting for lod qos to notice osp + # devices get activated. + sleep_maxage + $SETSTRIPE -c -1 $DIR/$tfile + stripecnt=$($GETSTRIPE -c $DIR/$tfile) + rm -f $DIR/$tfile + [ $stripecnt -ne $OSTCOUNT ] && + error "Of $OSTCOUNT OSTs, only $stripecnt is available" + return 0 } run_test 27y "create files while OST0 is degraded and the rest inactive"