From 6c0b4329d046de283eeb254fca561be9386df68a Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Wed, 9 Feb 2022 14:53:51 +0300 Subject: [PATCH] LU-16339 quota: notify OSTs until lge_qunit_nu is set There is a window when locks are not granted yet, but lqe is set to qmt_reba_list to send updates to OSTs. t1: lqe_init()->qmt_setup_lqe_gd->qmt_seed_glbe() t1: lqe_init()->qmt_setup_lqe_gd->qmt_id_lock_notify() t2: qmt_glimpse_lock() lustre-QMT0000: no granted locks to send glimpse t1: ldlm_lock_enqueue()->ldlm_granted_list_add_lock() ... If lge_qunit_nu was set to 1 in qmt_seed_glbe and appropriate qunit is equal to the least_qunit, new qunit won't be sent to OSTs and finally lqe_revoke will not be set causing endless -115 errors. The fix calls qmt_id_lock_notify into qmt_dqacq0 for an lqe that has set lge_qunit_nu or lge_edquot_nu. Add test 85 into sanity-quota to check that write doesn't hung if qunit initial value is equal to the least_qunit due to small block hard limit. HPE-bug-id: LUS-10711 Change-Id: Icd1ac29beab87c0ebf00bcb20b25c33b771b74c1 Signed-off-by: Sergey Cheremencev Reviewed-on: https://es-gerrit.dev.cray.com/160034 Tested-by: Jenkins Build User Reviewed-by: Alexander Boyko Reviewed-by: Alexey Lyashkov Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49228 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- lustre/quota/qmt_entry.c | 33 +++++++++++++++++++++++++++------ lustre/quota/qmt_handler.c | 8 +++++--- lustre/quota/qmt_internal.h | 18 +++++++++++------- lustre/quota/qmt_lock.c | 36 +++++++++++++++++++----------------- lustre/quota/qmt_pool.c | 2 +- lustre/tests/sanity-quota.sh | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 100 insertions(+), 34 deletions(-) diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c index ee55585..a8f0672 100644 --- a/lustre/quota/qmt_entry.c +++ b/lustre/quota/qmt_entry.c @@ -826,10 +826,12 @@ done: bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env, struct qmt_device *qmt, __u64 now, bool edquot, - bool qunit, __u32 qb_flags) + bool qunit, __u32 qb_flags, + int idx) { struct lquota_entry *lqe_gl, *lqe; bool need_reseed = false; + bool need_notify = false; int i; lqe_gl = qti_lqes_glbl(env); @@ -852,15 +854,34 @@ bool qmt_adjust_edquot_qunit_notify(const struct lu_env *env, return need_reseed; } - if (need_reseed) { + if (need_reseed || idx >= 0) { mutex_lock(&lqe_gl->lqe_glbl_data_lock); - if (lqe_gl->lqe_glbl_data) - qmt_seed_glbe_all(env, lqe_gl->lqe_glbl_data, qunit, - edquot); + if (lqe_gl->lqe_glbl_data) { + struct lqe_glbl_data *lgd = lqe_gl->lqe_glbl_data; + + if (need_reseed) { + qmt_seed_glbe_all(env, lgd, qunit, edquot); + } else if (idx >= 0) { + LASSERT(idx <= lgd->lqeg_num_used); + /* If there are no locks yet when + * lge_qunit/edquot_nu is set, slaves + * are still not notified with new + * qunit/edquot value. In a such case + * we need to notify them with new values to + * avoid endless EINPROGRESS if qunit is equal + * to the least qunit, but lqe_revoke_time is + * still not set. + */ + need_notify = lgd->lqeg_arr[idx].lge_qunit_nu || + lgd->lqeg_arr[idx].lge_edquot_nu; + } + } mutex_unlock(&lqe_gl->lqe_glbl_data_lock); + } + if (need_reseed || need_notify) qmt_id_lock_notify(qmt, lqe_gl); - } + return need_reseed; } diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c index 3446229..8eb2824c 100644 --- a/lustre/quota/qmt_handler.c +++ b/lustre/quota/qmt_handler.c @@ -879,6 +879,7 @@ static inline void qmt_lqes_tune_grace(const struct lu_env *env, __u64 now) * acquire/release * \param qb_usage - is the current space usage on the slave * \param repbody - is the quota_body of reply + * \param idx - is the index of a slave target * * \retval 0 : success * \retval -EDQUOT : out of quota @@ -887,7 +888,7 @@ static inline void qmt_lqes_tune_grace(const struct lu_env *env, __u64 now) */ int qmt_dqacq0(const struct lu_env *env, struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags, __u64 qb_count, - __u64 qb_usage, struct quota_body *repbody) + __u64 qb_usage, struct quota_body *repbody, int idx) { __u64 now, count = 0; struct dt_object *slv_obj = NULL; @@ -1098,7 +1099,7 @@ out_write: * size according to the total granted & limits. */ /* clear/set edquot flag and notify slaves via glimpse if needed */ - qmt_adjust_and_notify(env, qmt, now, qb_flags); + qmt_adjust_notify_nu(env, qmt, now, qb_flags, idx); out_locked: LQUOTA_DEBUG_LQES(env, "dqacq ends count:%llu ver:%llu rc:%d", repbody->qb_count, repbody->qb_slv_ver, rc); @@ -1267,7 +1268,8 @@ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld, RETURN(rc); rc = qmt_dqacq0(env, qmt, uuid, qbody->qb_flags, - qbody->qb_count, qbody->qb_usage, repbody); + qbody->qb_count, qbody->qb_usage, repbody, + qmt_dom(rtype, stype) ? -1 : idx); if (lustre_handle_is_used(&qbody->qb_lockh)) /* return current qunit value only to slaves owning an per-ID diff --git a/lustre/quota/qmt_internal.h b/lustre/quota/qmt_internal.h index 72b4c22..1474a8a 100644 --- a/lustre/quota/qmt_internal.h +++ b/lustre/quota/qmt_internal.h @@ -97,6 +97,10 @@ enum { QMT_STYPE_CNT }; +#define qmt_dom(rtype, stype) \ + ((rtype == LQUOTA_RES_DT && \ + stype == QMT_STYPE_MDT) ? true : false) + enum { /* set while recalc_thread is working */ QPI_FLAG_RECALC_OFFSET, @@ -474,13 +478,13 @@ bool qmt_adjust_qunit(const struct lu_env *, struct lquota_entry *); bool qmt_adjust_edquot(struct lquota_entry *, __u64); #define qmt_adjust_edquot_notify(env, qmt, now, qb_flags) \ - qmt_adjust_edquot_qunit_notify(env, qmt, now, true, false, qb_flags) -#define qmt_adjust_qunit_notify(env, qmt, qb_flags) \ - qmt_adjust_edquot_qunit_notify(env, qmt, 0, false, true, qb_flags) -#define qmt_adjust_and_notify(env, qmt, now, qb_flags) \ - qmt_adjust_edquot_qunit_notify(env, qmt, now, true, true, qb_flags) + qmt_adjust_edquot_qunit_notify(env, qmt, now, true, \ + false, qb_flags, -1) +#define qmt_adjust_notify_nu(env, qmt, now, qb_flags, idx) \ + qmt_adjust_edquot_qunit_notify(env, qmt, now, true, \ + true, qb_flags, idx) bool qmt_adjust_edquot_qunit_notify(const struct lu_env *, struct qmt_device *, - __u64, bool, bool, __u32); + __u64, bool, bool, __u32, int idx); bool qmt_revalidate(const struct lu_env *, struct lquota_entry *); void qmt_revalidate_lqes(const struct lu_env *, struct qmt_device *, __u32); __u64 qmt_alloc_expand(struct lquota_entry *, __u64, __u64); @@ -514,7 +518,7 @@ int qmt_set_with_lqe(const struct lu_env *env, struct qmt_device *qmt, struct lquota_entry *lqe, __u64 hard, __u64 soft, __u64 time, __u32 valid, bool is_default, bool is_updated); int qmt_dqacq0(const struct lu_env *, struct qmt_device *, struct obd_uuid *, - __u32, __u64, __u64, struct quota_body *); + __u32, __u64, __u64, struct quota_body *, int); int qmt_uuid2idx(struct obd_uuid *, int *); /* qmt_lock.c */ diff --git a/lustre/quota/qmt_lock.c b/lustre/quota/qmt_lock.c index ec8a345..c551aa8 100644 --- a/lustre/quota/qmt_lock.c +++ b/lustre/quota/qmt_lock.c @@ -89,7 +89,7 @@ int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld, case IT_QUOTA_DQACQ: { struct lquota_entry *lqe; struct ldlm_lock *lock; - int idx; + int idx, stype; if (res->lr_name.name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] == 0) /* acquire on global lock? something is wrong ... */ @@ -104,8 +104,8 @@ int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld, GOTO(out, rc = -ENOLCK); LDLM_LOCK_PUT(lock); - rc = qmt_uuid2idx(uuid, &idx); - if (rc < 0) + stype = qmt_uuid2idx(uuid, &idx); + if (stype < 0) GOTO(out, rc = -EINVAL); /* TODO: it seems we don't need to get lqe from @@ -115,7 +115,7 @@ int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld, LASSERT(lqe != NULL); lqe_getref(lqe); - rc = qmt_pool_lqes_lookup(env, qmt, lqe_rtype(lqe), rc, + rc = qmt_pool_lqes_lookup(env, qmt, lqe_rtype(lqe), stype, lqe_qtype(lqe), &reqbody->qb_id, NULL, idx); if (rc) { @@ -126,7 +126,8 @@ int qmt_intent_policy(const struct lu_env *env, struct lu_device *ld, /* acquire quota space */ rc = qmt_dqacq0(env, qmt, uuid, reqbody->qb_flags, reqbody->qb_count, - reqbody->qb_usage, repbody); + reqbody->qb_usage, repbody, + qmt_dom(lqe_rtype(lqe), stype) ? -1 : idx); lqe_putref(lqe); qti_lqes_fini(env); if (rc) @@ -275,7 +276,7 @@ static bool qmt_clear_lgeg_arr_nu(struct lquota_entry *lqe, int stype, int idx) /* There is no array to store lge for the case of DOM. * Ignore it until MDT pools will be ready. */ - if (!(lqe_rtype(lqe) == LQUOTA_RES_DT && stype == QMT_STYPE_MDT)) { + if (!qmt_dom(lqe_rtype(lqe), stype)) { lqe->lqe_glbl_data->lqeg_arr[idx].lge_qunit_nu = 0; lqe->lqe_glbl_data->lqeg_arr[idx].lge_edquot_nu = 0; @@ -331,7 +332,7 @@ int qmt_lvbo_update(struct lu_device *ld, struct ldlm_resource *res, struct ldlm_lock *lock; struct obd_export *exp; bool need_revoke; - int rc = 0, idx; + int rc = 0, idx, stype; ENTRY; LASSERT(res != NULL); @@ -381,11 +382,11 @@ int qmt_lvbo_update(struct lu_device *ld, struct ldlm_resource *res, GOTO(out, rc = -EFAULT); } - rc = qmt_uuid2idx(&exp->exp_client_uuid, &idx); - if (rc < 0) - GOTO(out_exp, rc); + stype = qmt_uuid2idx(&exp->exp_client_uuid, &idx); + if (stype < 0) + GOTO(out_exp, rc = stype); - need_revoke = qmt_clear_lgeg_arr_nu(lqe, rc, idx); + need_revoke = qmt_clear_lgeg_arr_nu(lqe, stype, idx); if (lvb->lvb_id_rel == 0) { /* nothing to release */ if (lvb->lvb_id_may_rel != 0) @@ -396,12 +397,12 @@ int qmt_lvbo_update(struct lu_device *ld, struct ldlm_resource *res, if (!need_revoke && lvb->lvb_id_rel == 0) GOTO(out_exp, rc = 0); - rc = qmt_pool_lqes_lookup(env, qmt, lqe_rtype(lqe), rc, lqe_qtype(lqe), - &lqe->lqe_id, NULL, idx); + rc = qmt_pool_lqes_lookup(env, qmt, lqe_rtype(lqe), stype, + lqe_qtype(lqe), &lqe->lqe_id, NULL, idx); if (rc) GOTO(out_exp, rc); - if (need_revoke && qmt_set_revoke(env, lqe, rc, idx) && + if (need_revoke && qmt_set_revoke(env, lqe, stype, idx) && lqe->lqe_glbl_data) { qmt_seed_glbe_edquot(env, lqe->lqe_glbl_data); qmt_id_lock_notify(qmt, lqe); @@ -414,7 +415,8 @@ int qmt_lvbo_update(struct lu_device *ld, struct ldlm_resource *res, /* release quota space */ rc = qmt_dqacq0(env, qmt, &exp->exp_client_uuid, QUOTA_DQACQ_FL_REL, lvb->lvb_id_rel, - 0, &qti->qti_body); + 0, &qti->qti_body, + qmt_dom(lqe_rtype(lqe), stype) ? -1 : idx); if (rc || qti->qti_body.qb_count != lvb->lvb_id_rel) LQUOTA_ERROR(lqe, "failed to release quota space on glimpse %llu!=%llu : rc = %d\n", @@ -637,7 +639,7 @@ void qmt_setup_id_desc(struct ldlm_lock *lock, union ldlm_gl_desc *desc, LASSERT(stype >= 0); /* DOM case - set global lqe settings */ - if (lqe_rtype(lqe) == LQUOTA_RES_DT && stype == QMT_STYPE_MDT) { + if (qmt_dom(lqe_rtype(lqe), stype)) { edquot = lqe->lqe_edquot; qunit = lqe->lqe_qunit; } else { @@ -844,7 +846,7 @@ static int qmt_id_lock_cb(struct ldlm_lock *lock, struct lquota_entry *lqe) /* Quota pools support only OSTs, despite MDTs also could be registered * as LQUOTA_RES_DT devices(DOM). */ - if (lqe_rtype(lqe) == LQUOTA_RES_DT && stype == QMT_STYPE_MDT) + if (qmt_dom(lqe_rtype(lqe), stype)) return 1; else return lgd->lqeg_arr[idx].lge_edquot_nu || diff --git a/lustre/quota/qmt_pool.c b/lustre/quota/qmt_pool.c index 97aefb1..decd399d 100644 --- a/lustre/quota/qmt_pool.c +++ b/lustre/quota/qmt_pool.c @@ -812,7 +812,7 @@ int qmt_pool_lqes_lookup(const struct lu_env *env, /* Until MDT pools are not emplemented, all MDTs belong to * global pool, thus lookup lqes only from global pool. */ - if (rtype == LQUOTA_RES_DT && stype == QMT_STYPE_MDT) + if (qmt_dom(rtype, stype)) idx = -1; qti_pools_init(env); diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 386178a..f6023f3 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -5835,6 +5835,43 @@ test_84() } run_test 84 "Reset quota should fix the insane granted quota" +test_85() +{ + local limit=3 # 3M + local qpool="qpool1" + local qpool2="qpool2" + local tfile1="$DIR/$tdir/$tfile-0" + + (( OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + mds_supports_qp + setup_quota_test || error "setup quota failed with $?" + + # enable ost quota + set_ost_qtype $QTYPE || error "enable ost quota failed" + + $LFS setquota -u $TSTUSR -b 0 -B 50T -i 0 -I 0 $DIR || + error "set user quota failed" + + pool_add $qpool || error "pool_add failed" + pool_add_targets $qpool 0 1 || + error "pool_add_targets failed" + + pool_add $qpool2 || error "pool_add failed" + pool_add_targets $qpool2 0 1 || + error "pool_add_targets failed" + + $LFS setstripe -p $qpool $DIR/$tdir || error "cannot set stripe" + $LFS setquota -u $TSTUSR -B 30M --pool $qpool $DIR || + error "set user quota failed" + $LFS setquota -u $TSTUSR -B ${limit}M --pool $qpool $DIR || + error "set user quota failed" + + # don't care about returned value. Just check we don't hung on write. + $RUNAS $DD of=$tfile1 count=10 + return 0 +} +run_test 85 "do not hung at write with the least_qunit" + quota_fini() { do_nodes $(comma_list $(nodes_list)) \ -- 1.8.3.1