From: Niu Yawei Date: Tue, 16 Oct 2012 02:48:03 +0000 (-0400) Subject: LU-2153 quota: several fixes for reintegration X-Git-Tag: 2.3.55~44 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=984f4ce51fd38caaf0bd2b706a130f7f17c51638 LU-2153 quota: several fixes for reintegration - On master side, never delete the id entry from the global/slave index, otherwise, those deleted entries will not be transfered during reintegration, and improved test_7a for this change; - When start reintegration thread, if there is any pending updates, abort and try to start reintegration later; - Set rq_no_retry_einprogress for quota request; - When master found quota acquire for not enforced ID, return -ESRCH to slave instead of -EINPROGRESS; - Check free inodes in test_2; Signed-off-by: Niu Yawei Change-Id: I64037f6aff6be686250272eda53c027bf5ba47c2 Reviewed-on: http://review.whamcloud.com/4275 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Johann Lombardi --- diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h index f07b1b9..27de832 100644 --- a/libcfs/include/libcfs/list.h +++ b/libcfs/include/libcfs/list.h @@ -35,6 +35,8 @@ typedef struct list_head cfs_list_t; list_for_each_entry(pos, head, member) #define cfs_list_for_each_entry_reverse(pos, head, member) \ list_for_each_entry_reverse(pos, head, member) +#define cfs_list_for_each_entry_safe_reverse(pos, n, head, member) \ + list_for_each_entry_safe_reverse(pos, n, head, member) #define cfs_list_for_each_entry_safe(pos, n, head, member) \ list_for_each_entry_safe(pos, n, head, member) #ifdef list_for_each_entry_safe_from diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c index 59a0653..fa47f52 100644 --- a/lustre/quota/qmt_entry.c +++ b/lustre/quota/qmt_entry.c @@ -261,21 +261,16 @@ int qmt_glb_write(const struct lu_env *env, struct thandle *th, LQUOTA_DEBUG(lqe, "write glb"); - if (!lqe->lqe_enforced && lqe->lqe_granted == 0 && - lqe->lqe_id.qid_uid != 0) { - /* quota isn't enforced any more for this entry and there is no - * more space granted to slaves, let's just remove the entry - * from the index */ - rec = NULL; - } else { - rec = &qti->qti_glb_rec; + /* never delete the entry even when the id isn't enforced and + * no any guota granted, otherwise, this entry will not be + * synced to slave during the reintegration. */ + rec = &qti->qti_glb_rec; - /* fill global index with updated quota settings */ - rec->qbr_granted = lqe->lqe_granted; - rec->qbr_hardlimit = lqe->lqe_hardlimit; - rec->qbr_softlimit = lqe->lqe_softlimit; - rec->qbr_time = lqe->lqe_gracetime; - } + /* fill global index with updated quota settings */ + rec->qbr_granted = lqe->lqe_granted; + rec->qbr_hardlimit = lqe->lqe_hardlimit; + rec->qbr_softlimit = lqe->lqe_softlimit; + rec->qbr_time = lqe->lqe_gracetime; /* write new quota settings */ rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id, @@ -372,16 +367,12 @@ int qmt_slv_write(const struct lu_env *env, struct thandle *th, LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64, PFID(lu_object_fid(&slv_obj->do_lu)), granted); - if (granted == 0) { - /* this slave does not own any quota space for this ID any more, - * so let's just remove the entry from the index */ - rec = NULL; - } else { - rec = &qti->qti_slv_rec; + /* never delete the entry, otherwise, it'll not be transferred + * to slave during reintegration. */ + rec = &qti->qti_slv_rec; - /* updated space granted to this slave */ - rec->qsr_granted = granted; - } + /* updated space granted to this slave */ + rec->qsr_granted = granted; /* write new granted space */ rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id, diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c index dc3f280..94a64d9 100644 --- a/lustre/quota/qmt_handler.c +++ b/lustre/quota/qmt_handler.c @@ -502,7 +502,7 @@ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe, * receive the change yet. Just return EINPROGRESS until the slave gets * notified. */ if (!lqe->lqe_enforced && !req_is_rel(qb_flags)) - GOTO(out_locked, rc = -EINPROGRESS); + GOTO(out_locked, rc = -ESRCH); /* recompute qunit in case it was never initialized */ qmt_revalidate(env, lqe); diff --git a/lustre/quota/qsd_reint.c b/lustre/quota/qsd_reint.c index 85c97cb..261d5dc 100644 --- a/lustre/quota/qsd_reint.c +++ b/lustre/quota/qsd_reint.c @@ -558,6 +558,72 @@ void qsd_stop_reint_thread(struct qsd_qtype_info *qqi) } } +static int qsd_entry_iter_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, + cfs_hlist_node_t *hnode, void *data) +{ + struct lquota_entry *lqe; + int *pending = (int *)data; + + lqe = cfs_hlist_entry(hnode, struct lquota_entry, lqe_hash); + LASSERT(atomic_read(&lqe->lqe_ref) > 0); + + lqe_read_lock(lqe); + *pending += lqe->lqe_pending_req; + lqe_read_unlock(lqe); + + return 0; +} + +static bool qsd_pending_updates(struct qsd_qtype_info *qqi) +{ + struct qsd_instance *qsd = qqi->qqi_qsd; + struct qsd_upd_rec *upd; + struct lquota_entry *lqe, *n; + int dqacq = 0; + bool updates = false; + ENTRY; + + /* any pending quota adjust? */ + cfs_spin_lock(&qsd->qsd_adjust_lock); + cfs_list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) { + if (lqe2qqi(lqe) == qqi) { + cfs_list_del_init(&lqe->lqe_link); + lqe_putref(lqe); + } + } + cfs_spin_unlock(&qsd->qsd_adjust_lock); + + /* any pending updates? */ + cfs_read_lock(&qsd->qsd_lock); + cfs_list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) { + if (upd->qur_qqi == qqi) { + cfs_read_unlock(&qsd->qsd_lock); + CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n", + qsd->qsd_svname, + upd->qur_global ? "global" : "slave", + qqi->qqi_qtype); + GOTO(out, updates = true); + } + } + cfs_read_unlock(&qsd->qsd_lock); + + /* any pending quota request? */ + cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb, + &dqacq); + if (dqacq) { + CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n", + qsd->qsd_svname, qqi->qqi_qtype); + updates = true; + } + EXIT; +out: + if (updates) + CERROR("%s: Delaying reintegration for qtype:%d until pending " + "updates are flushed.\n", + qsd->qsd_svname, qqi->qqi_qtype); + return updates; +} + int qsd_start_reint_thread(struct qsd_qtype_info *qqi) { struct ptlrpc_thread *thread = &qqi->qqi_reint_thread; @@ -582,6 +648,16 @@ int qsd_start_reint_thread(struct qsd_qtype_info *qqi) cfs_write_unlock(&qsd->qsd_lock); + /* there could be some unfinished global or index entry updates + * (very unlikely), to avoid them messing up with the reint + * procedure, we just return and try to re-start reint later. */ + if (qsd_pending_updates(qqi)) { + cfs_write_lock(&qsd->qsd_lock); + qqi->qqi_reint = 0; + cfs_write_unlock(&qsd->qsd_lock); + RETURN(0); + } + rc = cfs_create_thread(qsd_reint_main, (void *)qqi, 0); if (rc < 0) { thread_set_flags(thread, SVC_STOPPED); diff --git a/lustre/quota/qsd_request.c b/lustre/quota/qsd_request.c index 9697bbe..e316814 100644 --- a/lustre/quota/qsd_request.c +++ b/lustre/quota/qsd_request.c @@ -110,6 +110,7 @@ int qsd_send_dqacq(const struct lu_env *env, struct obd_export *exp, GOTO(out, rc = -ENOMEM); req->rq_no_resend = req->rq_no_delay = 1; + req->rq_no_retry_einprogress = 1; rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, QUOTA_DQACQ); if (rc) { ptlrpc_request_free(req); @@ -227,6 +228,7 @@ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp, if (req == NULL) GOTO(out, rc = -ENOMEM); + req->rq_no_retry_einprogress = 1; rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); if (rc) { ptlrpc_request_free(req); diff --git a/lustre/quota/qsd_writeback.c b/lustre/quota/qsd_writeback.c index 546bd37..360f694 100644 --- a/lustre/quota/qsd_writeback.c +++ b/lustre/quota/qsd_writeback.c @@ -103,13 +103,25 @@ static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd) /* must hold the qsd_lock */ static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd) { - struct qsd_upd_rec *tmp; + struct qsd_upd_rec *tmp, *n; /* Sort the updates in ascending order */ - cfs_list_for_each_entry_reverse(tmp, list, qur_link) { - - LASSERTF(upd->qur_ver != tmp->qur_ver, "ver:"LPU64"\n", - upd->qur_ver); + cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) { + + /* There could be some legacy records which have duplicated + * version. Imagine following scenario: slave received global + * glimpse and queued a record in the deferred list, then + * master crash and rollback to an ealier version, then the + * version of queued record will be conflicting with later + * updates. We should just delete the legacy record in such + * case. */ + if (upd->qur_ver == tmp->qur_ver) { + LASSERT(tmp->qur_lqe); + LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record " + "with ver:"LPU64"", tmp->qur_ver); + cfs_list_del_init(&tmp->qur_link); + qsd_upd_free(tmp); + } if (upd->qur_ver < tmp->qur_ver) { continue; diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 7ac5f48..3708dbb 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -455,6 +455,12 @@ test_2() { [ "$SLOW" = "no" ] && LIMIT=1024 # 1k inodes + local FREE_INODES=$(lfs df -i | grep "filesystem summary" | \ + awk '{print $5}') + [ $FREE_INODES -lt $LIMIT ] && + skip "not enough free inodes $FREE_INODES required $LIMIT" && + return + setup_quota_test trap cleanup_quota_test EXIT @@ -937,6 +943,25 @@ test_7a() { $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync && quota_error u $TSTUSR "write success, but expect EDQUOT" + rm -f $TESTFILE + wait_delete_completed + sync_all_data || true + sleep 3 + + echo "Stop ost1..." + stop ost1 + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + + echo "Start ost1..." + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS + + wait_ost_reint "ug" || error "reintegration failed" + + # hardlimit should be cleared on slave during reintegration + $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync || + quota_error u $TSTUSR "write error, but expect success" + cleanup_quota_test resetquota -u $TSTUSR }