From: Niu Yawei Date: Mon, 11 Jun 2012 10:55:32 +0000 (-0700) Subject: LU-1493 quota: extra release caused by race X-Git-Tag: 2.2.58~20 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b916544be6a57296aa2c4e55c02c45ff7495a77c LU-1493 quota: extra release caused by race There is a race between the check_cur_qunit() and the dqacq_completion(): check_cur_qunit() read hardlimit and calculate how much quota need be acquired/released based on the hardlimit, however, the hardlimit can be changed by the dqacq_completion() at anytime. So that could result in extra quota acquire/release when there is inflight dqacq. In general, such extra dqacq dosen't bring fatal error, unless an extra release is going to release more than 'hardlimit' quota. To minimize the code changes (anyway, it'll be totally rewritten in the new quota design), we just do one more check here to avoid the extra release which could bring fatal error. A better solution could be calculating the qd_count here and removing the lqs_blk/ino_rec stuff. Signed-off-by: Niu Yawei Change-Id: I0ad5ff0f32e39f32872c201ad1d545fbd9d1a57d Reviewed-on: http://review.whamcloud.com/3074 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Johann Lombardi Reviewed-by: Fan Yong --- diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 50a14f1..7d92270 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -925,6 +925,56 @@ revoke_lqs_rec(struct lustre_qunit_size *lqs, struct qunit_data *qdata, int opc) cfs_spin_unlock(&lqs->lqs_lock); } +static int verify_cur_qunit(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int opc) +{ + struct obd_quotactl *qctl; + __u64 limit; + int ret = 0; + ENTRY; + + /* extra quota acquire can be tolerated. */ + if (opc == QUOTA_DQACQ) + RETURN(ret); + + OBD_ALLOC_PTR(qctl); + if (qctl == NULL) { + CERROR("Fail to allocate mem!\n"); + RETURN(-ENOMEM); + } + + qctl->qc_cmd = Q_GETQUOTA; + qctl->qc_id = qdata->qd_id; + qctl->qc_type = QDATA_IS_GRP(qdata); + ret = fsfilt_quotactl(obd, qctxt->lqc_sb, qctl); + if (ret) { + /* -ESRCH means no limit */ + CDEBUG(ret == -ESRCH ? D_QUOTA : D_ERROR, + "Can't get quota usage! rc:%d\n", ret); + GOTO(out, ret); + } + + if (QDATA_IS_BLK(qdata)) + limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; + else + limit = qctl->qc_dqblk.dqb_ihardlimit; + + if (limit <= qdata->qd_count) { + CDEBUG(D_QUOTA, "drop extra release. id(%u), flag(%u), " + "type(%c), isblk(%c), count("LPU64"), " + "qd_qunit("LPU64"), hardlimit("LPU64").\n", + qdata->qd_id, qdata->qd_flags, + QDATA_IS_GRP(qdata) ? 'g' : 'u', + QDATA_IS_BLK(qdata) ? 'b' : 'i', + qdata->qd_count, qdata->qd_qunit, limit); + ret = -EAGAIN; + } +out: + OBD_FREE_PTR(qctl); + RETURN(ret); +} + static int schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int opc, int wait, @@ -984,6 +1034,41 @@ schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, /* this is for quota_search_lqs */ lqs_putref(lqs); + /* LU-1493 + * + * There is a race between the check_cur_qunit() and the + * dqacq_completion(): check_cur_qunit() read hardlimit + * and calculate how much quota need be acquired/released + * based on the hardlimit, however, the hardlimit can be + * changed by the dqacq_completion() at anytime. So that + * could result in extra quota acquire/release when there + * is inflight dqacq. + * + * In general, such extra dqacq dosen't bring fatal error, + * unless an extra release is going to release more than + * 'hardlimit' quota. + * + * To minimize the code changes (anyway, it'll be totally + * rewritten in the new quota design), we just do one more + * check here to avoid the extra release which could bring + * fatal error. A better solution could be calculating the + * qd_count here and removing the lqs_blk/ino_rec stuff. + */ + rc = verify_cur_qunit(obd, qctxt, qdata, opc); + if (rc) { + cfs_spin_lock(&qunit_hash_lock); + remove_qunit_nolock(qunit); + cfs_spin_unlock(&qunit_hash_lock); + + compute_lqs_after_removing_qunit(qunit); + /* this is for qunit_get() */ + qunit_put(qunit); + /* this for alloc_qunit() */ + qunit_put(qunit); + /* drop this extra release silently */ + RETURN(0); + } + QDATA_DEBUG(qdata, "obd(%s): send %s quota req\n", obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); /* master is going to dqacq/dqrel from itself */