list_for_each_entry(pos, head, member)
#define cfs_list_for_each_entry_reverse(pos, head, member) \
list_for_each_entry_reverse(pos, head, member)
+#define cfs_list_for_each_entry_safe_reverse(pos, n, head, member) \
+ list_for_each_entry_safe_reverse(pos, n, head, member)
#define cfs_list_for_each_entry_safe(pos, n, head, member) \
list_for_each_entry_safe(pos, n, head, member)
#ifdef list_for_each_entry_safe_from
LQUOTA_DEBUG(lqe, "write glb");
- if (!lqe->lqe_enforced && lqe->lqe_granted == 0 &&
- lqe->lqe_id.qid_uid != 0) {
- /* quota isn't enforced any more for this entry and there is no
- * more space granted to slaves, let's just remove the entry
- * from the index */
- rec = NULL;
- } else {
- rec = &qti->qti_glb_rec;
+ /* never delete the entry even when the id isn't enforced and
+ * no any guota granted, otherwise, this entry will not be
+ * synced to slave during the reintegration. */
+ rec = &qti->qti_glb_rec;
- /* fill global index with updated quota settings */
- rec->qbr_granted = lqe->lqe_granted;
- rec->qbr_hardlimit = lqe->lqe_hardlimit;
- rec->qbr_softlimit = lqe->lqe_softlimit;
- rec->qbr_time = lqe->lqe_gracetime;
- }
+ /* fill global index with updated quota settings */
+ rec->qbr_granted = lqe->lqe_granted;
+ rec->qbr_hardlimit = lqe->lqe_hardlimit;
+ rec->qbr_softlimit = lqe->lqe_softlimit;
+ rec->qbr_time = lqe->lqe_gracetime;
/* write new quota settings */
rc = lquota_disk_write(env, th, LQE_GLB_OBJ(lqe), &lqe->lqe_id,
LQUOTA_DEBUG(lqe, "write slv "DFID" granted:"LPU64,
PFID(lu_object_fid(&slv_obj->do_lu)), granted);
- if (granted == 0) {
- /* this slave does not own any quota space for this ID any more,
- * so let's just remove the entry from the index */
- rec = NULL;
- } else {
- rec = &qti->qti_slv_rec;
+ /* never delete the entry, otherwise, it'll not be transferred
+ * to slave during reintegration. */
+ rec = &qti->qti_slv_rec;
- /* updated space granted to this slave */
- rec->qsr_granted = granted;
- }
+ /* updated space granted to this slave */
+ rec->qsr_granted = granted;
/* write new granted space */
rc = lquota_disk_write(env, th, slv_obj, &lqe->lqe_id,
* receive the change yet. Just return EINPROGRESS until the slave gets
* notified. */
if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
- GOTO(out_locked, rc = -EINPROGRESS);
+ GOTO(out_locked, rc = -ESRCH);
/* recompute qunit in case it was never initialized */
qmt_revalidate(env, lqe);
}
}
+static int qsd_entry_iter_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+ cfs_hlist_node_t *hnode, void *data)
+{
+ struct lquota_entry *lqe;
+ int *pending = (int *)data;
+
+ lqe = cfs_hlist_entry(hnode, struct lquota_entry, lqe_hash);
+ LASSERT(atomic_read(&lqe->lqe_ref) > 0);
+
+ lqe_read_lock(lqe);
+ *pending += lqe->lqe_pending_req;
+ lqe_read_unlock(lqe);
+
+ return 0;
+}
+
+static bool qsd_pending_updates(struct qsd_qtype_info *qqi)
+{
+ struct qsd_instance *qsd = qqi->qqi_qsd;
+ struct qsd_upd_rec *upd;
+ struct lquota_entry *lqe, *n;
+ int dqacq = 0;
+ bool updates = false;
+ ENTRY;
+
+ /* any pending quota adjust? */
+ cfs_spin_lock(&qsd->qsd_adjust_lock);
+ cfs_list_for_each_entry_safe(lqe, n, &qsd->qsd_adjust_list, lqe_link) {
+ if (lqe2qqi(lqe) == qqi) {
+ cfs_list_del_init(&lqe->lqe_link);
+ lqe_putref(lqe);
+ }
+ }
+ cfs_spin_unlock(&qsd->qsd_adjust_lock);
+
+ /* any pending updates? */
+ cfs_read_lock(&qsd->qsd_lock);
+ cfs_list_for_each_entry(upd, &qsd->qsd_upd_list, qur_link) {
+ if (upd->qur_qqi == qqi) {
+ cfs_read_unlock(&qsd->qsd_lock);
+ CDEBUG(D_QUOTA, "%s: pending %s updates for type:%d.\n",
+ qsd->qsd_svname,
+ upd->qur_global ? "global" : "slave",
+ qqi->qqi_qtype);
+ GOTO(out, updates = true);
+ }
+ }
+ cfs_read_unlock(&qsd->qsd_lock);
+
+ /* any pending quota request? */
+ cfs_hash_for_each_safe(qqi->qqi_site->lqs_hash, qsd_entry_iter_cb,
+ &dqacq);
+ if (dqacq) {
+ CDEBUG(D_QUOTA, "%s: pending dqacq for type:%d.\n",
+ qsd->qsd_svname, qqi->qqi_qtype);
+ updates = true;
+ }
+ EXIT;
+out:
+ if (updates)
+ CERROR("%s: Delaying reintegration for qtype:%d until pending "
+ "updates are flushed.\n",
+ qsd->qsd_svname, qqi->qqi_qtype);
+ return updates;
+}
+
int qsd_start_reint_thread(struct qsd_qtype_info *qqi)
{
struct ptlrpc_thread *thread = &qqi->qqi_reint_thread;
cfs_write_unlock(&qsd->qsd_lock);
+ /* there could be some unfinished global or index entry updates
+ * (very unlikely), to avoid them messing up with the reint
+ * procedure, we just return and try to re-start reint later. */
+ if (qsd_pending_updates(qqi)) {
+ cfs_write_lock(&qsd->qsd_lock);
+ qqi->qqi_reint = 0;
+ cfs_write_unlock(&qsd->qsd_lock);
+ RETURN(0);
+ }
+
rc = cfs_create_thread(qsd_reint_main, (void *)qqi, 0);
if (rc < 0) {
thread_set_flags(thread, SVC_STOPPED);
GOTO(out, rc = -ENOMEM);
req->rq_no_resend = req->rq_no_delay = 1;
+ req->rq_no_retry_einprogress = 1;
rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, QUOTA_DQACQ);
if (rc) {
ptlrpc_request_free(req);
if (req == NULL)
GOTO(out, rc = -ENOMEM);
+ req->rq_no_retry_einprogress = 1;
rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
if (rc) {
ptlrpc_request_free(req);
/* must hold the qsd_lock */
static void qsd_add_deferred(cfs_list_t *list, struct qsd_upd_rec *upd)
{
- struct qsd_upd_rec *tmp;
+ struct qsd_upd_rec *tmp, *n;
/* Sort the updates in ascending order */
- cfs_list_for_each_entry_reverse(tmp, list, qur_link) {
-
- LASSERTF(upd->qur_ver != tmp->qur_ver, "ver:"LPU64"\n",
- upd->qur_ver);
+ cfs_list_for_each_entry_safe_reverse(tmp, n, list, qur_link) {
+
+ /* There could be some legacy records which have duplicated
+ * version. Imagine following scenario: slave received global
+ * glimpse and queued a record in the deferred list, then
+ * master crash and rollback to an ealier version, then the
+ * version of queued record will be conflicting with later
+ * updates. We should just delete the legacy record in such
+ * case. */
+ if (upd->qur_ver == tmp->qur_ver) {
+ LASSERT(tmp->qur_lqe);
+ LQUOTA_ERROR(tmp->qur_lqe, "Found a conflict record "
+ "with ver:"LPU64"", tmp->qur_ver);
+ cfs_list_del_init(&tmp->qur_link);
+ qsd_upd_free(tmp);
+ }
if (upd->qur_ver < tmp->qur_ver) {
continue;
[ "$SLOW" = "no" ] && LIMIT=1024 # 1k inodes
+ local FREE_INODES=$(lfs df -i | grep "filesystem summary" | \
+ awk '{print $5}')
+ [ $FREE_INODES -lt $LIMIT ] &&
+ skip "not enough free inodes $FREE_INODES required $LIMIT" &&
+ return
+
setup_quota_test
trap cleanup_quota_test EXIT
$RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync &&
quota_error u $TSTUSR "write success, but expect EDQUOT"
+ rm -f $TESTFILE
+ wait_delete_completed
+ sync_all_data || true
+ sleep 3
+
+ echo "Stop ost1..."
+ stop ost1
+
+ $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+ echo "Start ost1..."
+ start ost1 $(ostdevname 1) $OST_MOUNT_OPTS
+
+ wait_ost_reint "ug" || error "reintegration failed"
+
+ # hardlimit should be cleared on slave during reintegration
+ $RUNAS $DD of=$TESTFILE count=$((LIMIT + 1)) oflag=sync ||
+ quota_error u $TSTUSR "write error, but expect success"
+
cleanup_quota_test
resetquota -u $TSTUSR
}