LU-2153 quota: several fixes for reintegration

author Niu Yawei <niu@whamcloud.com>

Tue, 16 Oct 2012 02:48:03 +0000 (22:48 -0400)

committer Oleg Drokin <green@whamcloud.com>

Fri, 2 Nov 2012 19:28:16 +0000 (15:28 -0400)
author Niu Yawei <niu@whamcloud.com>
Tue, 16 Oct 2012 02:48:03 +0000 (22:48 -0400)
committer Oleg Drokin <green@whamcloud.com>
Fri, 2 Nov 2012 19:28:16 +0000 (15:28 -0400)
diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h

index f07b1b9..27de832 100644 (file)
--- a/libcfs/include/libcfs/list.h
+++ b/libcfs/include/libcfs/list.h
@@ -35,6 +35,8 @@ typedef struct list_head cfs_list_t;
          list_for_each_entry(pos, head, member)
  #define cfs_list_for_each_entry_reverse(pos, head, member) \
          list_for_each_entry_reverse(pos, head, member)
+#define cfs_list_for_each_entry_safe_reverse(pos, n, head, member) \
+       list_for_each_entry_safe_reverse(pos, n, head, member)
  #define cfs_list_for_each_entry_safe(pos, n, head, member) \
          list_for_each_entry_safe(pos, n, head, member)
  #ifdef list_for_each_entry_safe_from
diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c

index 59a0653..fa47f52 100644 (file)
--- a/lustre/quota/qmt_entry.c
+++ b/lustre/quota/qmt_entry.c
@@ -261,21 +261,16 @@ int qmt_glb_write(const struct lu_env *env, struct thandle *th,
  
         LQUOTA_DEBUG(lqe, "write glb");
  
-       if (!lqe->lqe_enforced && lqe->lqe_granted == 0 &&
-           lqe->lqe_id.qid_uid != 0) {
-               /* quota isn't enforced any more for this entry and there is no
-                * more space granted to slaves, let's just remove the entry
-                * from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_glb_rec;
+       /* never delete the entry even when the id isn't enforced and
+        * no any guota granted, otherwise, this entry will not be
+        * synced to slave during the reintegration. */
+       rec = &qti->qti_glb_rec;
  
-               /* fill global index with updated quota settings */
-               rec->qbr_granted   = lqe->lqe_granted;
-               rec->qbr_hardlimit = lqe->lqe_hardlimit;
-               rec->qbr_softlimit = lqe->lqe_softlimit;
-               rec->qbr_time      = lqe->lqe_gracetime;
-       }
+       /* fill global index with updated quota settings */
+       rec->qbr_granted   = lqe->lqe_granted;
+       rec->qbr_hardlimit = lqe->lqe_hardlimit;
+       rec->qbr_softlimit = lqe->lqe_softlimit;
+       rec->qbr_time      = lqe->lqe_gracetime;
  
         /* write new quota settings */
         rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
@@ -372,16 +367,12 @@ int qmt_slv_write(const struct lu_env *env, struct thandle *th,
         LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64,
                      PFID(lu_object_fid(&slv_obj->do_lu)), granted);
  
-       if (granted == 0) {
-               /* this slave does not own any quota space for this ID any more,
-                * so let's just remove the entry from the index */
-               rec = NULL;
-       } else {
-               rec = &qti->qti_slv_rec;
+       /* never delete the entry, otherwise, it'll not be transferred
+        * to slave during reintegration. */
+       rec = &qti->qti_slv_rec;
  
-               /* updated space granted to this slave */
-               rec->qsr_granted = granted;
-       }
+       /* updated space granted to this slave */
+       rec->qsr_granted = granted;
  
         /* write new granted space */
         rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
diff --git a/lustre/quota/qmt_handler.c b/lustre/quota/qmt_handler.c

index dc3f280..94a64d9 100644 (file)
--- a/lustre/quota/qmt_handler.c
+++ b/lustre/quota/qmt_handler.c
@@ -502,7 +502,7 @@ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe,
          * receive the change yet. Just return EINPROGRESS until the slave gets
          * notified. */
         if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
-               GOTO(out_locked, rc = -EINPROGRESS);
+               GOTO(out_locked, rc = -ESRCH);
  
         /* recompute qunit in case it was never initialized */
         qmt_revalidate(env, lqe);
diff --git a/lustre/quota/qsd_reint.c b/lustre/quota/qsd_reint.c

index 85c97cb..261d5dc 100644 (file)
--- a/lustre/quota/qsd_reint.c
+++ b/lustre/quota/qsd_reint.c
@@ -558,6 +558,72 @@ void qsd_stop_reint_thread(struct qsd_qtype_info *qqi)
         }
  }
  
+static int qsd_entry_iter_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+                            cfs_hlist_node_t *hnode, void *data)
+{
+       struct lquota_entry     *lqe;
+       int                     *pending = (int *)data;
+
+       lqe = cfs_hlist_entry(hnode, struct lquota_entry, lqe_hash);
+       LASSERT(atomic_read(&lqe->lqe_ref) > 0);
+
+       lqe_read_lock(lqe);
+       *pending += lqe->lqe_pending_req;
+       lqe_read_unlock(lqe);
+
+       return 0;
+}
+
+static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
+{
+       struct qsd_instance     *qsd = qqi->qqi_qsd;
+       struct qsd_upd_rec      *upd;
+       struct lquota_entry     *lqe, *n;
+       int                      dqacq = 0;
+       bool                     updates = false;
+       ENTRY;
+
+       /* any pending quota adjust? */
+       cfs_spin_lock(&qsd->qsd_adjust_lock);
+       cfs_list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) {
+               if (lqe2qqi(lqe) == qqi) {
+                       cfs_list_del_init(&lqe->lqe_link);
+                       lqe_putref(lqe);
+               }
+       }
+       cfs_spin_unlock(&qsd->qsd_adjust_lock);
+
+       /* any pending updates? */
+       cfs_read_lock(&qsd->qsd_lock);
+       cfs_list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
+               if (upd->qur_qqi == qqi) {
+                       cfs_read_unlock(&qsd->qsd_lock);
+                       CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
+                              qsd->qsd_svname,
+                              upd->qur_global ? "global" : "slave",
+                              qqi->qqi_qtype);
+                       GOTO(out, updates = true);
+               }
+       }
+       cfs_read_unlock(&qsd->qsd_lock);
+
+       /* any pending quota request? */
+       cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
+                              &dqacq);
+       if (dqacq) {
+               CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+               updates = true;
+       }
+       EXIT;
+out:
+       if (updates)
+               CERROR("%s: Delaying reintegration for qtype:%d until pending "
+                      "updates are flushed.\n",
+                      qsd->qsd_svname, qqi->qqi_qtype);
+       return updates;
+}
+
  int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
  {
         struct ptlrpc_thread    *thread = &qqi->qqi_reint_thread;
@@ -582,6 +648,16 @@ int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
  
         cfs_write_unlock(&qsd->qsd_lock);
  
+       /* there could be some unfinished global or index entry updates
+        * (very unlikely), to avoid them messing up with the reint
+        * procedure, we just return and try to re-start reint later. */
+       if (qsd_pending_updates(qqi)) {
+               cfs_write_lock(&qsd->qsd_lock);
+               qqi->qqi_reint = 0;
+               cfs_write_unlock(&qsd->qsd_lock);
+               RETURN(0);
+       }
+
         rc = cfs_create_thread(qsd_reint_main, (void *)qqi, 0);
         if (rc < 0) {
                 thread_set_flags(thread, SVC_STOPPED);
diff --git a/lustre/quota/qsd_request.c b/lustre/quota/qsd_request.c

index 9697bbe..e316814 100644 (file)
--- a/lustre/quota/qsd_request.c
+++ b/lustre/quota/qsd_request.c
@@ -110,6 +110,7 @@ int qsd_send_dqacq(const struct lu_env *env, struct obd_export *exp,
                 GOTO(out, rc = -ENOMEM);
  
         req->rq_no_resend = req->rq_no_delay = 1;
+       req->rq_no_retry_einprogress = 1;
         rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, QUOTA_DQACQ);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -227,6 +228,7 @@ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp,
         if (req == NULL)
                 GOTO(out, rc = -ENOMEM);
  
+       req->rq_no_retry_einprogress = 1;
         rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
         if (rc) {
                 ptlrpc_request_free(req);
diff --git a/lustre/quota/qsd_writeback.c b/lustre/quota/qsd_writeback.c

index 546bd37..360f694 100644 (file)
--- a/lustre/quota/qsd_writeback.c
+++ b/lustre/quota/qsd_writeback.c
@@ -103,13 +103,25 @@ static void qsd_upd_add(struct qsd_instance *qsd, struct qsd_upd_rec *upd)
  /* must hold the qsd_lock */
  static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
  {
-       struct qsd_upd_rec      *tmp;
+       struct qsd_upd_rec      *tmp, *n;
  
         /* Sort the updates in ascending order */
-       cfs_list_for_each_entry_reverse(tmp, list, qur_link) {
-
-               LASSERTF(upd->qur_ver != tmp->qur_ver, "ver:"LPU64"\n",
-                        upd->qur_ver);
+       cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
+
+               /* There could be some legacy records which have duplicated
+                * version. Imagine following scenario: slave received global
+                * glimpse and queued a record in the deferred list, then
+                * master crash and rollback to an ealier version, then the
+                * version of queued record will be conflicting with later
+                * updates. We should just delete the legacy record in such
+                * case. */
+               if (upd->qur_ver == tmp->qur_ver) {
+                       LASSERT(tmp->qur_lqe);
+                       LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
+                                    "with ver:"LPU64"", tmp->qur_ver);
+                       cfs_list_del_init(&tmp->qur_link);
+                       qsd_upd_free(tmp);
+               }
  
                 if (upd->qur_ver < tmp->qur_ver) {
                         continue;
diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh

index 7ac5f48..3708dbb 100644 (file)
--- a/lustre/tests/sanity-quota.sh
+++ b/lustre/tests/sanity-quota.sh
@@ -455,6 +455,12 @@ test_2() {
  
         [ "$SLOW" = "no" ] && LIMIT=1024 # 1k inodes
  
+       local FREE_INODES=$(lfs df -i | grep "filesystem summary" | \
+                               awk '{print $5}')
+       [ $FREE_INODES -lt $LIMIT ] &&
+               skip "not enough free inodes $FREE_INODES required $LIMIT" &&
+               return
+
         setup_quota_test
         trap cleanup_quota_test EXIT
  
@@ -937,6 +943,25 @@ test_7a() {
         $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync &&
                 quota_error u $TSTUSR "write success, but expect EDQUOT"
  
+       rm -f $TESTFILE
+       wait_delete_completed
+       sync_all_data || true
+       sleep 3
+
+       echo "Stop ost1..."
+       stop ost1
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+       echo "Start ost1..."
+       start ost1 $(ostdevname 1) $OST_MOUNT_OPTS
+
+       wait_ost_reint "ug" || error "reintegration failed"
+
+       # hardlimit should be cleared on slave during reintegration
+       $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync ||
+               quota_error u $TSTUSR "write error, but expect success"
+
         cleanup_quota_test
         resetquota -u $TSTUSR
  }
author	Niu Yawei <niu@whamcloud.com>
	Tue, 16 Oct 2012 02:48:03 +0000 (22:48 -0400)
committer	Oleg Drokin <green@whamcloud.com>
	Fri, 2 Nov 2012 19:28:16 +0000 (15:28 -0400)
libcfs/include/libcfs/list.h		patch \| blob \| history
lustre/quota/qmt_entry.c		patch \| blob \| history
lustre/quota/qmt_handler.c		patch \| blob \| history
lustre/quota/qsd_reint.c		patch \| blob \| history
lustre/quota/qsd_request.c		patch \| blob \| history
lustre/quota/qsd_writeback.c		patch \| blob \| history
lustre/tests/sanity-quota.sh		patch \| blob \| history