Whamcloud - gitweb
LU-1493 quota: extra release caused by race
authorNiu Yawei <niu@whamcloud.com>
Mon, 11 Jun 2012 10:55:32 +0000 (03:55 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 22 Jun 2012 15:56:45 +0000 (11:56 -0400)
There is a race between the check_cur_qunit() and the
dqacq_completion(): check_cur_qunit() read hardlimit
and calculate how much quota need be acquired/released
based on the hardlimit, however, the hardlimit can be
changed by the dqacq_completion() at anytime. So that
could result in extra quota acquire/release when there
is inflight dqacq.

In general, such extra dqacq dosen't bring fatal error,
unless an extra release is going to release more than
'hardlimit' quota.

To minimize the code changes (anyway, it'll be totally
rewritten in the new quota design), we just do one more
check here to avoid the extra release which could bring
fatal error. A better solution could be calculating the
qd_count here and removing the lqs_blk/ino_rec stuff.

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: I0ad5ff0f32e39f32872c201ad1d545fbd9d1a57d
Reviewed-on: http://review.whamcloud.com/3074
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
Reviewed-by: Fan Yong <yong.fan@whamcloud.com>
lustre/quota/quota_context.c

index 50a14f1..7d92270 100644 (file)
@@ -925,6 +925,56 @@ revoke_lqs_rec(struct lustre_qunit_size *lqs, struct qunit_data *qdata, int opc)
         cfs_spin_unlock(&lqs->lqs_lock);
 }
 
+static int verify_cur_qunit(struct obd_device *obd,
+                           struct lustre_quota_ctxt *qctxt,
+                           struct qunit_data *qdata, int opc)
+{
+       struct obd_quotactl *qctl;
+       __u64 limit;
+       int ret = 0;
+       ENTRY;
+
+       /* extra quota acquire can be tolerated. */
+       if (opc == QUOTA_DQACQ)
+               RETURN(ret);
+
+       OBD_ALLOC_PTR(qctl);
+       if (qctl == NULL) {
+               CERROR("Fail to allocate mem!\n");
+               RETURN(-ENOMEM);
+       }
+
+       qctl->qc_cmd = Q_GETQUOTA;
+       qctl->qc_id = qdata->qd_id;
+       qctl->qc_type = QDATA_IS_GRP(qdata);
+       ret = fsfilt_quotactl(obd, qctxt->lqc_sb, qctl);
+       if (ret) {
+               /* -ESRCH means no limit */
+               CDEBUG(ret == -ESRCH ? D_QUOTA : D_ERROR,
+                      "Can't get quota usage! rc:%d\n", ret);
+               GOTO(out, ret);
+       }
+
+       if (QDATA_IS_BLK(qdata))
+               limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
+       else
+               limit = qctl->qc_dqblk.dqb_ihardlimit;
+
+       if (limit <= qdata->qd_count) {
+               CDEBUG(D_QUOTA, "drop extra release. id(%u), flag(%u), "
+                      "type(%c), isblk(%c), count("LPU64"), "
+                      "qd_qunit("LPU64"), hardlimit("LPU64").\n",
+                      qdata->qd_id, qdata->qd_flags,
+                      QDATA_IS_GRP(qdata) ? 'g' : 'u',
+                      QDATA_IS_BLK(qdata) ? 'b' : 'i',
+                      qdata->qd_count, qdata->qd_qunit, limit);
+               ret = -EAGAIN;
+       }
+out:
+       OBD_FREE_PTR(qctl);
+       RETURN(ret);
+}
+
 static int
 schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
                struct qunit_data *qdata, int opc, int wait,
@@ -984,6 +1034,41 @@ schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
         /* this is for quota_search_lqs */
         lqs_putref(lqs);
 
+       /* LU-1493
+        *
+        * There is a race between the check_cur_qunit() and the
+        * dqacq_completion(): check_cur_qunit() read hardlimit
+        * and calculate how much quota need be acquired/released
+        * based on the hardlimit, however, the hardlimit can be
+        * changed by the dqacq_completion() at anytime. So that
+        * could result in extra quota acquire/release when there
+        * is inflight dqacq.
+        *
+        * In general, such extra dqacq dosen't bring fatal error,
+        * unless an extra release is going to release more than
+        * 'hardlimit' quota.
+        *
+        * To minimize the code changes (anyway, it'll be totally
+        * rewritten in the new quota design), we just do one more
+        * check here to avoid the extra release which could bring
+        * fatal error. A better solution could be calculating the
+        * qd_count here and removing the lqs_blk/ino_rec stuff.
+        */
+       rc = verify_cur_qunit(obd, qctxt, qdata, opc);
+       if (rc) {
+               cfs_spin_lock(&qunit_hash_lock);
+               remove_qunit_nolock(qunit);
+               cfs_spin_unlock(&qunit_hash_lock);
+
+               compute_lqs_after_removing_qunit(qunit);
+               /* this is for qunit_get() */
+               qunit_put(qunit);
+               /* this for alloc_qunit() */
+               qunit_put(qunit);
+               /* drop this extra release silently */
+               RETURN(0);
+       }
+
         QDATA_DEBUG(qdata, "obd(%s): send %s quota req\n",
                     obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel");
         /* master is going to dqacq/dqrel from itself */