Whamcloud - gitweb
LU-2153 quota: several fixes for reintegration
authorNiu Yawei <niu@whamcloud.com>
Tue, 16 Oct 2012 02:48:03 +0000 (22:48 -0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 2 Nov 2012 19:28:16 +0000 (15:28 -0400)
- On master side, never delete the id entry from the global/slave
  index, otherwise, those deleted entries will not be transfered
  during reintegration, and improved test_7a for this change;
- When start reintegration thread, if there is any pending
  updates, abort and try to start reintegration later;
- Set rq_no_retry_einprogress for quota request;
- When master found quota acquire for not enforced ID, return
  -ESRCH to slave instead of -EINPROGRESS;
- Check free inodes in test_2;

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: I64037f6aff6be686250272eda53c027bf5ba47c2
Reviewed-on: http://review.whamcloud.com/4275
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Johann Lombardi <johann.lombardi@intel.com>
libcfs/include/libcfs/list.h
lustre/quota/qmt_entry.c
lustre/quota/qmt_handler.c
lustre/quota/qsd_reint.c
lustre/quota/qsd_request.c
lustre/quota/qsd_writeback.c
lustre/tests/sanity-quota.sh

index f07b1b9..27de832 100644 (file)
@@ -35,6 +35,8 @@ typedef struct list_head cfs_list_t;
         list_for_each_entry(pos, head, member)
 #define cfs_list_for_each_entry_reverse(pos, head, member) \
         list_for_each_entry_reverse(pos, head, member)
+#define cfs_list_for_each_entry_safe_reverse(pos, n, head, member) \
+       list_for_each_entry_safe_reverse(pos, n, head, member)
 #define cfs_list_for_each_entry_safe(pos, n, head, member) \
         list_for_each_entry_safe(pos, n, head, member)
 #ifdef list_for_each_entry_safe_from
index 59a0653..fa47f52 100644 (file)
@@ -261,21 +261,16 @@ int qmt_glb_write(const struct lu_env *env, struct thandle *th,
 
        LQUOTA_DEBUG(lqe, "write glb");
 
-       if (!lqe->lqe_enforced && lqe->lqe_granted == 0 &&
-           lqe->lqe_id.qid_uid != 0) {
-               /* quota isn't enforced any more for this entry and there is no
-                * more space granted to slaves, let's just remove the entry
-                * from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_glb_rec;
+       /* never delete the entry even when the id isn't enforced and
+        * no any guota granted, otherwise, this entry will not be
+        * synced to slave during the reintegration. */
+       rec = &qti->qti_glb_rec;
 
-               /* fill global index with updated quota settings */
-               rec->qbr_granted   = lqe->lqe_granted;
-               rec->qbr_hardlimit = lqe->lqe_hardlimit;
-               rec->qbr_softlimit = lqe->lqe_softlimit;
-               rec->qbr_time      = lqe->lqe_gracetime;
-       }
+       /* fill global index with updated quota settings */
+       rec->qbr_granted   = lqe->lqe_granted;
+       rec->qbr_hardlimit = lqe->lqe_hardlimit;
+       rec->qbr_softlimit = lqe->lqe_softlimit;
+       rec->qbr_time      = lqe->lqe_gracetime;
 
        /* write new quota settings */
        rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
@@ -372,16 +367,12 @@ int qmt_slv_write(const struct lu_env *env, struct thandle *th,
        LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64,
                     PFID(lu_object_fid(&slv_obj->do_lu)), granted);
 
-       if (granted == 0) {
-               /* this slave does not own any quota space for this ID any more,
-                * so let's just remove the entry from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_slv_rec;
+       /* never delete the entry, otherwise, it'll not be transferred
+        * to slave during reintegration. */
+       rec = &qti->qti_slv_rec;
 
-               /* updated space granted to this slave */
-               rec->qsr_granted = granted;
-       }
+       /* updated space granted to this slave */
+       rec->qsr_granted = granted;
 
        /* write new granted space */
        rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
index dc3f280..94a64d9 100644 (file)
@@ -502,7 +502,7 @@ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe,
         * receive the change yet. Just return EINPROGRESS until the slave gets
         * notified. */
        if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
-               GOTO(out_locked, rc = -EINPROGRESS);
+               GOTO(out_locked, rc = -ESRCH);
 
        /* recompute qunit in case it was never initialized */
        qmt_revalidate(env, lqe);
index 85c97cb..261d5dc 100644 (file)
@@ -558,6 +558,72 @@ void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
        }
 }
 
+static int qsd_entry_iter_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+                            cfs_hlist_node_t *hnode, void *data)
+{
+       struct lquota_entry     *lqe;
+       int                     *pending = (int *)data;
+
+       lqe = cfs_hlist_entry(hnode, struct lquota_entry, lqe_hash);
+       LASSERT(atomic_read(&lqe->lqe_ref) > 0);
+
+       lqe_read_lock(lqe);
+       *pending += lqe->lqe_pending_req;
+       lqe_read_unlock(lqe);
+
+       return 0;
+}
+
+static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
+{
+       struct qsd_instance     *qsd = qqi->qqi_qsd;
+       struct qsd_upd_rec      *upd;
+       struct lquota_entry     *lqe, *n;
+       int                      dqacq = 0;
+       bool                     updates = false;
+       ENTRY;
+
+       /* any pending quota adjust? */
+       cfs_spin_lock(&qsd->qsd_adjust_lock);
+       cfs_list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) {
+               if (lqe2qqi(lqe) == qqi) {
+                       cfs_list_del_init(&lqe->lqe_link);
+                       lqe_putref(lqe);
+               }
+       }
+       cfs_spin_unlock(&qsd->qsd_adjust_lock);
+
+       /* any pending updates? */
+       cfs_read_lock(&qsd->qsd_lock);
+       cfs_list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
+               if (upd->qur_qqi == qqi) {
+                       cfs_read_unlock(&qsd->qsd_lock);
+                       CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
+                              qsd->qsd_svname,
+                              upd->qur_global ? "global" : "slave",
+                              qqi->qqi_qtype);
+                       GOTO(out, updates = true);
+               }
+       }
+       cfs_read_unlock(&qsd->qsd_lock);
+
+       /* any pending quota request? */
+       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
+                              &dqacq);
+       if (dqacq) {
+               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+               updates = true;
+       }
+       EXIT;
+out:
+       if (updates)
+               CERROR("%s: Delaying reintegration for qtype:%d until pending "
+                      "updates are flushed.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+       return updates;
+}
+
 int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
 {
        struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
@@ -582,6 +648,16 @@ int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
 
        cfs_write_unlock(&qsd->qsd_lock);
 
+       /* there could be some unfinished global or index entry updates
+        * (very unlikely), to avoid them messing up with the reint
+        * procedure, we just return and try to re-start reint later. */
+       if (qsd_pending_updates(qqi)) {
+               cfs_write_lock(&qsd->qsd_lock);
+               qqi->qqi_reint = 0;
+               cfs_write_unlock(&qsd->qsd_lock);
+               RETURN(0);
+       }
+
        rc = cfs_create_thread(qsd_reint_main, (void *)qqi, 0);
        if (rc < 0) {
                thread_set_flags(thread, SVC_STOPPED);
index 9697bbe..e316814 100644 (file)
@@ -110,6 +110,7 @@ int qsd_send_dqacq(const struct lu_env *env, struct obd_export *exp,
                GOTO(out, rc = -ENOMEM);
 
        req->rq_no_resend = req->rq_no_delay = 1;
+       req->rq_no_retry_einprogress = 1;
        rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, QUOTA_DQACQ);
        if (rc) {
                ptlrpc_request_free(req);
@@ -227,6 +228,7 @@ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp,
        if (req == NULL)
                GOTO(out, rc = -ENOMEM);
 
+       req->rq_no_retry_einprogress = 1;
        rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
        if (rc) {
                ptlrpc_request_free(req);
index 546bd37..360f694 100644 (file)
@@ -103,13 +103,25 @@ static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
 /* must hold the qsd_lock */
 static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
 {
-       struct qsd_upd_rec      *tmp;
+       struct qsd_upd_rec      *tmp, *n;
 
        /* Sort the updates in ascending order */
-       cfs_list_for_each_entry_reverse(tmp, list, qur_link) {
-
-               LASSERTF(upd->qur_ver != tmp->qur_ver, "ver:"LPU64"\n",
-                        upd->qur_ver);
+       cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
+
+               /* There could be some legacy records which have duplicated
+                * version. Imagine following scenario: slave received global
+                * glimpse and queued a record in the deferred list, then
+                * master crash and rollback to an ealier version, then the
+                * version of queued record will be conflicting with later
+                * updates. We should just delete the legacy record in such
+                * case. */
+               if (upd->qur_ver == tmp->qur_ver) {
+                       LASSERT(tmp->qur_lqe);
+                       LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
+                                    "with ver:"LPU64"", tmp->qur_ver);
+                       cfs_list_del_init(&tmp->qur_link);
+                       qsd_upd_free(tmp);
+               }
 
                if (upd->qur_ver < tmp->qur_ver) {
                        continue;
index 7ac5f48..3708dbb 100644 (file)
@@ -455,6 +455,12 @@ test_2() {
 
        [ "$SLOW" = "no" ] && LIMIT=1024 # 1k inodes
 
+       local FREE_INODES=$(lfs df -i | grep "filesystem summary" | \
+                               awk '{print $5}')
+       [ $FREE_INODES -lt $LIMIT ] &&
+               skip "not enough free inodes $FREE_INODES required $LIMIT" &&
+               return
+
        setup_quota_test
        trap cleanup_quota_test EXIT
 
@@ -937,6 +943,25 @@ test_7a() {
        $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync &&
                quota_error u $TSTUSR "write success, but expect EDQUOT"
 
+       rm -f $TESTFILE
+       wait_delete_completed
+       sync_all_data || true
+       sleep 3
+
+       echo "Stop ost1..."
+       stop ost1
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+       echo "Start ost1..."
+       start ost1 $(ostdevname 1) $OST_MOUNT_OPTS
+
+       wait_ost_reint "ug" || error "reintegration failed"
+
+       # hardlimit should be cleared on slave during reintegration
+       $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync ||
+               quota_error u $TSTUSR "write error, but expect success"
+
        cleanup_quota_test
        resetquota -u $TSTUSR
 }