From 9680dabde70a9ec5f211e26d7c2f2988f296f455 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Thu, 11 Oct 2012 15:55:58 +0200 Subject: [PATCH] LU-2147 quota: several fixes to reintegration procedure This patch gathers several fixes/improvements to the quota reintegration procedure: - do not set rq_no_resend & rq_no_delay for IT_QUOTA_CONN to have the reintegration thread waiting instead of stopping/starting the reint thread until the master is available - add procfs tunable to force reintegration, it can be useful for testing, but also for fixing things at customer site when a bug was hit during reintegration. - when transferring indexes, the per-page header isn't swabbed - on index transfer, the hash value isn't sent any more (unlike on orion_quota) since we now use II_FL_NOHASH. As a consequence, qsd_reint_entries() shouldn't take the hash size into account when parsing a page container key/record pairs. This patch also: - quiets many common messages which aren't real errors and shouldn't make it to the console - fixes a bug in qmt_adjust_edquot() which does not check correctly whether the revoke timeout has elapsed - changes test_6 to use a larger quota limit to avoid edquot flag to be set by the QMT and cause the test to fail. - removes temporary code from setup_quota() in t-f now that the new quota code is fully landed. - re-enables ost-pool tests 23a & 23b Signed-off-by: Johann Lombardi Change-Id: I9af9a025faa1ef173810df647b93307e2139c6f9 Reviewed-on: http://review.whamcloud.com/4253 Tested-by: Hudson Reviewed-by: Niu Yawei Tested-by: Maloo Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 1 + lustre/obdclass/obd_mount.c | 8 +++++-- lustre/osp/osp_dev.c | 2 +- lustre/ptlrpc/niobuf.c | 2 +- lustre/ptlrpc/pack_generic.c | 9 +++++++ lustre/quota/qmt_entry.c | 14 +++++++---- lustre/quota/qsd_entry.c | 2 +- lustre/quota/qsd_handler.c | 33 ++++++++++++------------- lustre/quota/qsd_internal.h | 2 +- lustre/quota/qsd_lib.c | 49 +++++++++++++++++++++++++++++++++++++- lustre/quota/qsd_reint.c | 38 ++++++++++++++++++++++++----- lustre/quota/qsd_request.c | 4 +++- lustre/quota/qsd_writeback.c | 5 +++- lustre/tests/ost-pools.sh | 6 ----- lustre/tests/sanity-quota.sh | 8 ++++--- lustre/tests/test-framework.sh | 4 ---- 16 files changed, 137 insertions(+), 50 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 508b52e..5d80e49 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -3099,6 +3099,7 @@ struct lu_idxpage { * For the time being, we only support fixed-size key & record. */ char lip_entries[0]; }; +extern void lustre_swab_lip_header(struct lu_idxpage *lip); #define LIP_HDR_SIZE (offsetof(struct lu_idxpage, lip_entries)) diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 865b451..325acaa 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -2106,8 +2106,12 @@ static void server_put_super(struct super_block *sb) /* disconnect the osp-on-ost first to drain off the inflight request */ if (IS_OST(lsi) || IS_MDT(lsi)) { - if (lustre_disconnect_osp(sb) < 0) - CERROR("%s: Fail to disconnect osp-on-ost!\n", tmpname); + int rc; + + rc = lustre_disconnect_osp(sb); + if (rc && rc != ETIMEDOUT) + CERROR("%s: failed to disconnect osp-on-ost (rc=%d)!\n", + tmpname, rc); } /* Stop the target */ diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index e99b663..c9d9639 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -212,7 +212,7 @@ int osp_disconnect(struct osp_device *d) (void)ptlrpc_pinger_del_import(imp); rc = ptlrpc_disconnect_import(imp, 0); - if (rc) + if (rc && rc != -ETIMEDOUT) CERROR("%s: can't disconnect: rc = %d\n", d->opd_obd->obd_name, rc); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index cefa639..4233689 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -565,7 +565,7 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult) if (req->rq_status != -ENOSPC && req->rq_status != -EACCES && req->rq_status != -EPERM && req->rq_status != -ENOENT && - req->rq_status != -EINPROGRESS) + req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT) req->rq_type = PTL_RPC_MSG_ERR; rc = ptlrpc_send_reply(req, may_be_difficult); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index d6e9a66..1983a99 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2038,6 +2038,15 @@ void lustre_swab_idx_info(struct idx_info *ii) __swab16s(&ii->ii_recsize); } +void lustre_swab_lip_header(struct lu_idxpage *lip) +{ + /* swab header */ + __swab32s(&lip->lip_magic); + __swab16s(&lip->lip_flags); + __swab16s(&lip->lip_nr); +} +EXPORT_SYMBOL(lustre_swab_lip_header); + void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr) { __swab32s (&rr->rr_opcode); diff --git a/lustre/quota/qmt_entry.c b/lustre/quota/qmt_entry.c index 588440a..59a0653 100644 --- a/lustre/quota/qmt_entry.c +++ b/lustre/quota/qmt_entry.c @@ -118,14 +118,14 @@ static void qmt_lqe_debug(struct lquota_entry *lqe, void *arg, libcfs_debug_vmsg2(msgdata, fmt, args, "qmt:%s pool:%d-%s id:"LPU64" enforced:%d hard:"LPU64 " soft:"LPU64" granted:"LPU64" time:"LPU64" qunit:" - LPU64" edquot:%d revoke:"LPU64"\n", + LPU64" edquot:%d may_rel:"LPU64" revoke:"LPU64"\n", pool->qpi_qmt->qmt_svname, pool->qpi_key & 0x0000ffff, RES_NAME(pool->qpi_key >> 16), lqe->lqe_id.qid_uid, lqe->lqe_enforced, lqe->lqe_hardlimit, lqe->lqe_softlimit, lqe->lqe_granted, lqe->lqe_gracetime, - lqe->lqe_qunit, lqe->lqe_edquot, + lqe->lqe_qunit, lqe->lqe_edquot, lqe->lqe_may_rel, lqe->lqe_revoke_time); } @@ -423,6 +423,7 @@ int qmt_validate_limits(struct lquota_entry *lqe, __u64 hard, __u64 soft) void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now) { struct qmt_pool_info *pool = lqe2qpi(lqe); + ENTRY; if (!lqe->lqe_enforced) RETURN_EXIT; @@ -441,9 +442,13 @@ void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now) * some quota space */ RETURN_EXIT; + if (lqe->lqe_revoke_time == 0) + /* least qunit value not sent to all slaves yet */ + RETURN_EXIT; + if (lqe->lqe_may_rel != 0 && - cfs_time_beforeq_64(lqe->lqe_revoke_time, - cfs_time_shift_64(-QMT_REBA_TIMEOUT))) + cfs_time_before_64(cfs_time_shift_64(-QMT_REBA_TIMEOUT), + lqe->lqe_revoke_time)) /* Let's give more time to slave to release space */ RETURN_EXIT; @@ -473,6 +478,7 @@ void qmt_adjust_edquot(struct lquota_entry *lqe, __u64 now) /* let's notify slave by issuing glimpse on per-ID lock. * the rebalance thread will take care of this */ qmt_id_lock_notify(pool->qpi_qmt, lqe); + EXIT; } /* diff --git a/lustre/quota/qsd_entry.c b/lustre/quota/qsd_entry.c index 0970a42..993c1e3 100644 --- a/lustre/quota/qsd_entry.c +++ b/lustre/quota/qsd_entry.c @@ -330,7 +330,7 @@ int qsd_update_lqe(const struct lu_env *env, struct lquota_entry *lqe, glb_rec->qbr_softlimit) ? true : false; LQUOTA_DEBUG(lqe, "updating global index hardlimit: "LPU64", " - "softlimit: "LPU64"\n", glb_rec->qbr_hardlimit, + "softlimit: "LPU64, glb_rec->qbr_hardlimit, glb_rec->qbr_softlimit); } else { struct lquota_slv_rec *slv_rec = (struct lquota_slv_rec *)rec; diff --git a/lustre/quota/qsd_handler.c b/lustre/quota/qsd_handler.c index b1ec4a0..40c8cba 100644 --- a/lustre/quota/qsd_handler.c +++ b/lustre/quota/qsd_handler.c @@ -158,16 +158,19 @@ static void qsd_dqacq_completion(const struct lu_env *env, lqe_write_lock(lqe); - if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) { - LQUOTA_ERROR(lqe, "DQACQ failed with %d, op:%x", ret, - reqbody->qb_flags); - GOTO(out, ret); - } + LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:%x", ret, + reqbody->qb_flags); /* despite -EDQUOT & -EINPROGRESS errors, the master might still * grant us back quota space to adjust quota overrun */ - - LQUOTA_DEBUG(lqe, "DQACQ returned %d", ret); + if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) { + if (ret != -ETIMEDOUT && ret != -ENOTCONN && + ret != -ESHUTDOWN && ret != -EAGAIN) + /* print errors only if return code is unexpected */ + LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:%x", ret, + reqbody->qb_flags); + GOTO(out, ret); + } /* Set the lqe_lockh */ if (lustre_handle_is_used(lockh) && @@ -736,9 +739,8 @@ int qsd_op_begin(const struct lu_env *env, struct qsd_instance *qsd, struct lquota_trans *trans, struct lquota_id_info *qi, int *flags) { - struct qsd_qtype_info *qqi; - int i, rc; - bool found = false; + int i, rc; + bool found = false; ENTRY; if (unlikely(qsd == NULL)) @@ -757,15 +759,10 @@ int qsd_op_begin(const struct lu_env *env, struct qsd_instance *qsd, (qsd->qsd_is_md && qi->lqi_is_blk)) RETURN(0); - qqi = qsd->qsd_type_array[qi->lqi_type]; - /* ignore quota enforcement request when: * - quota isn't enforced for this quota type - * or - we failed to access the accounting object for this quota type - * or - the space to acquire is null * or - the user/group is root */ - if (!qsd_type_enabled(qsd, qi->lqi_type) || qqi->qqi_acct_obj == NULL || - qi->lqi_id.qid_uid == 0) + if (!qsd_type_enabled(qsd, qi->lqi_type) || qi->lqi_id.qid_uid == 0) RETURN(0); LASSERTF(trans->lqt_id_cnt <= QUOTA_MAX_TRANSIDS, "id_cnt=%d", @@ -795,8 +792,8 @@ int qsd_op_begin(const struct lu_env *env, struct qsd_instance *qsd, } /* manage quota enforcement for this ID */ - rc = qsd_op_begin0(env, qqi, &trans->lqt_ids[i], qi->lqi_space, flags); - + rc = qsd_op_begin0(env, qsd->qsd_type_array[qi->lqi_type], + &trans->lqt_ids[i], qi->lqi_space, flags); RETURN(rc); } EXPORT_SYMBOL(qsd_op_begin); diff --git a/lustre/quota/qsd_internal.h b/lustre/quota/qsd_internal.h index 2a84bf6..4b8cde5 100644 --- a/lustre/quota/qsd_internal.h +++ b/lustre/quota/qsd_internal.h @@ -318,7 +318,7 @@ static inline void qsd_set_qunit(struct lquota_entry *lqe, __u64 qunit) lqe->lqe_nopreacq = false; } -#define QSD_WB_INTERVAL 15 /* 15 seconds */ +#define QSD_WB_INTERVAL 60 /* 60 seconds */ /* qsd_entry.c */ extern struct lquota_entry_operations qsd_lqe_ops; diff --git a/lustre/quota/qsd_lib.c b/lustre/quota/qsd_lib.c index 277791b..5a8c7b0 100644 --- a/lustre/quota/qsd_lib.c +++ b/lustre/quota/qsd_lib.c @@ -122,6 +122,7 @@ static int lprocfs_qsd_rd_enabled(char *page, char **start, off_t off, { struct qsd_instance *qsd = (struct qsd_instance *)data; char enabled[5]; + LASSERT(qsd != NULL); memset(enabled, 0, sizeof(enabled)); @@ -135,9 +136,48 @@ static int lprocfs_qsd_rd_enabled(char *page, char **start, off_t off, return snprintf(page, count, "%s\n", enabled); } +/* force reintegration procedure to be executed. + * Used for test/debugging purpose */ +static int lprocfs_qsd_wr_force_reint(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct qsd_instance *qsd = (struct qsd_instance *)data; + int rc = 0, qtype; + + LASSERT(qsd != NULL); + + cfs_write_lock(&qsd->qsd_lock); + if (qsd->qsd_stopping) { + /* don't mess up with shutdown procedure, it is already + * complicated enough */ + rc = -ESHUTDOWN; + } else if (!qsd->qsd_prepared) { + rc = -EAGAIN; + } else { + /* mark all indexes as stale */ + for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) { + qsd->qsd_type_array[qtype]->qqi_glb_uptodate = false; + qsd->qsd_type_array[qtype]->qqi_slv_uptodate = false; + } + } + cfs_write_unlock(&qsd->qsd_lock); + + if (rc) + return rc; + + /* kick off reintegration */ + for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) { + rc = qsd_start_reint_thread(qsd->qsd_type_array[qtype]); + if (rc) + break; + } + return rc == 0 ? count : rc; +} + static struct lprocfs_vars lprocfs_quota_qsd_vars[] = { { "info", lprocfs_qsd_rd_state, 0, 0}, { "enabled", lprocfs_qsd_rd_enabled, 0, 0}, + { "force_reint", 0, lprocfs_qsd_wr_force_reint, 0}, { NULL } }; @@ -365,6 +405,9 @@ void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd) int qtype; ENTRY; + if (unlikely(qsd == NULL)) + RETURN_EXIT; + CDEBUG(D_QUOTA, "%s: initiating QSD shutdown\n", qsd->qsd_svname); cfs_write_lock(&qsd->qsd_lock); qsd->qsd_stopping = true; @@ -547,7 +590,8 @@ int qsd_prepare(const struct lu_env *env, struct qsd_instance *qsd) int qtype, rc = 0; ENTRY; - LASSERT(qsd != NULL); + if (unlikely(qsd == NULL)) + RETURN(0); cfs_read_lock(&qsd->qsd_lock); if (qsd->qsd_prepared) { @@ -647,6 +691,9 @@ int qsd_start(const struct lu_env *env, struct qsd_instance *qsd) int type, rc = 0; ENTRY; + if (unlikely(qsd == NULL)) + RETURN(0); + cfs_write_lock(&qsd->qsd_lock); if (!qsd->qsd_prepared) { CERROR("%s: can't start qsd instance since it was properly " diff --git a/lustre/quota/qsd_reint.c b/lustre/quota/qsd_reint.c index 040b885..936117e 100644 --- a/lustre/quota/qsd_reint.c +++ b/lustre/quota/qsd_reint.c @@ -52,9 +52,10 @@ static void qsd_reint_completion(const struct lu_env *env, ENTRY; if (rc) { - CERROR("%s: failed to enqueue global quota lock, glb " - "fid:"DFID", rc:%d\n", qsd->qsd_svname, - PFID(&req_qbody->qb_fid), rc); + CDEBUG_LIMIT(rc != -EAGAIN ? D_ERROR : D_QUOTA, + "%s: failed to enqueue global quota lock, glb fid:" + DFID", rc:%d\n", qsd->qsd_svname, + PFID(&req_qbody->qb_fid), rc); RETURN_EXIT; } @@ -82,6 +83,8 @@ static int qsd_reint_qid(const struct lu_env *env, struct qsd_qtype_info *qqi, if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); + LQUOTA_DEBUG(lqe, "reintegrating entry"); + rc = qsd_update_lqe(env, lqe, global, rec); if (rc) GOTO(out, rc); @@ -99,25 +102,48 @@ static int qsd_reint_entries(const struct lu_env *env, unsigned int npages, bool need_swab) { struct qsd_thread_info *qti = qsd_info(env); + struct qsd_instance *qsd = qqi->qqi_qsd; union lquota_id *qid = &qti->qti_id; int i, j, k, size; int rc = 0; ENTRY; + CDEBUG(D_QUOTA, "%s: processing %d pages for %s index\n", + qsd->qsd_svname, npages, global ? "global" : "slave"); + /* sanity check on the record size */ if ((global && ii->ii_recsize != sizeof(struct lquota_glb_rec)) || (!global && ii->ii_recsize != sizeof(struct lquota_slv_rec))) { - CERROR("Invalid record size:%d, global:%s\n", - ii->ii_recsize, global ? "true" : "false"); + CERROR("%s: invalid record size (%d) for %s index\n", + qsd->qsd_svname, ii->ii_recsize, + global ? "global" : "slave"); RETURN(-EINVAL); } - size = ii->ii_recsize + ii->ii_keysize + sizeof(__u64); + size = ii->ii_recsize + ii->ii_keysize; for (i = 0; i < npages; i++) { union lu_page *lip = cfs_kmap(pages[i]); for (j = 0; j < LU_PAGE_COUNT; j++) { + if (need_swab) + /* swab header */ + lustre_swab_lip_header(&lip->lp_idx); + + if (lip->lp_idx.lip_magic != LIP_MAGIC) { + CERROR("%s: invalid magic (%x != %x) for page " + "%d/%d while transferring %s index\n", + qsd->qsd_svname, lip->lp_idx.lip_magic, + LIP_MAGIC, i + 1, npages, + global ? "global" : "slave"); + GOTO(out, rc = -EINVAL); + } + + CDEBUG(D_QUOTA, "%s: processing page %d/%d with %d " + "entries for %s index\n", qsd->qsd_svname, i + 1, + npages, lip->lp_idx.lip_nr, + global ? "global" : "slave"); + for (k = 0; k < lip->lp_idx.lip_nr; k++) { char *entry; diff --git a/lustre/quota/qsd_request.c b/lustre/quota/qsd_request.c index fa98b56..a3992eb 100644 --- a/lustre/quota/qsd_request.c +++ b/lustre/quota/qsd_request.c @@ -227,7 +227,6 @@ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp, if (req == NULL) GOTO(out, rc = -ENOMEM); - req->rq_no_resend = req->rq_no_delay = 1; rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); if (rc) { ptlrpc_request_free(req); @@ -296,6 +295,9 @@ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp, case IT_QUOTA_DQACQ: /* grab reference on lqe for new lock */ lqe_getref((struct lquota_entry *)arg); + /* all acquire/release request are sent with no_resend and + * no_delay flag */ + req->rq_no_resend = req->rq_no_delay = 1; break; default: break; diff --git a/lustre/quota/qsd_writeback.c b/lustre/quota/qsd_writeback.c index cafadd6..6eb8931 100644 --- a/lustre/quota/qsd_writeback.c +++ b/lustre/quota/qsd_writeback.c @@ -367,7 +367,10 @@ static bool qsd_job_pending(struct qsd_instance *qsd, cfs_list_t *upd, if (!qsd_type_enabled(qsd, qtype)) continue; - if (!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) + if ((!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) && + !qqi->qqi_reint) + /* global or slave index not up to date and reint + * thread not running */ *uptodate = false; } diff --git a/lustre/tests/ost-pools.sh b/lustre/tests/ost-pools.sh index 3f924e4..c9c3f22 100644 --- a/lustre/tests/ost-pools.sh +++ b/lustre/tests/ost-pools.sh @@ -1159,9 +1159,6 @@ test_22() { run_test 22 "Simultaneous manipulation of a pool" test_23a() { - # XXX remove this once all quota code landed - skip_env "quota isn't functional" && return - set_cleanup_trap local POOL_ROOT=${POOL_ROOT:-$DIR/$tdir} [[ $OSTCOUNT -le 1 ]] && skip_env "Need at least 2 OSTs" && return @@ -1228,9 +1225,6 @@ test_23a() { run_test 23a "OST pools and quota" test_23b() { - # XXX remove this once all quota code landed - skip_env "quota isn't functional" && return - set_cleanup_trap local POOL_ROOT=${POOL_ROOT:-$DIR/$tdir} [[ $OSTCOUNT -le 1 ]] && skip_env "Need at least 2 OSTs" && return 0 diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 09806a5..d77800b 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -802,6 +802,8 @@ run_test 5 "Chown & chgrp successfully even out of block/file quota" # test dropping acquire request on master test_6() { + local LIMIT=3 # 3M + setup_quota_test trap cleanup_quota_test EXIT @@ -824,7 +826,7 @@ test_6() { chown $TSTUSR2.$TSTUSR2 $TESTFILE2 # cache per-ID lock for $TSTUSR on slave - $LFS setquota -u $TSTUSR -b 0 -B 2M -i 0 -I 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B ${LIMIT}M -i 0 -I 0 $DIR $RUNAS $DD of=$TESTFILE count=1 || error "write $TESTFILE failure, expect success" $RUNAS2 $DD of=$TESTFILE2 count=1 || @@ -837,12 +839,12 @@ test_6() { lustre_fail mds 0x513 601 # write to un-enforced ID ($TSTUSR2) should succeed - $RUNAS2 $DD of=$TESTFILE2 count=1 seek=1 oflag=sync conv=notrunc || + $RUNAS2 $DD of=$TESTFILE2 count=$LIMIT seek=1 oflag=sync conv=notrunc || error "write failure, expect success" # write to enforced ID ($TSTUSR) in background, exceeding limit # to make sure DQACQ is sent - $RUNAS $DD of=$TESTFILE count=2 seek=1 oflag=sync conv=notrunc & + $RUNAS $DD of=$TESTFILE count=$LIMIT seek=1 oflag=sync conv=notrunc & DDPID=$! echo "Sleep for $TIMEOUT" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 9a1bcbd..1bdf5fe 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1155,10 +1155,6 @@ setup_quota(){ return fi - # XXX remove it once all quota code landed - echo "skip quota setup" - return - local mntpt=$1 # save old quota type & set new quota type -- 1.8.3.1