From: Andrew Perepechko Date: Thu, 11 Feb 2010 19:48:35 +0000 (+0300) Subject: b=21919 Optimize quota_ctl operations by sending requests in parallel X-Git-Tag: v1_8_2_50~10 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=5fcf73343d65c41b4b44e1feb69fd01dbdf52c3c;p=fs%2Flustre-release.git b=21919 Optimize quota_ctl operations by sending requests in parallel i=Johann Lombardi i=ZhiYong Tian Based on a patch from Joseph Herring (LLNL). Send MDS->OST quota_ctl requests in parallel, do not resend. Compiled from two attachments in the ticket. --- diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index ee166cd..6727305 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -473,7 +473,6 @@ extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data); - struct adaptive_timeout; extern int lprocfs_at_hist_helper(char *page, int count, int rc, struct adaptive_timeout *at); @@ -486,11 +485,6 @@ extern int lprocfs_wr_evict_client(struct file *file, const char *buffer, extern int lprocfs_wr_ping(struct file *file, const char *buffer, unsigned long count, void *data); -extern int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, - unsigned long count, void *data); - /* Statfs helpers */ extern int lprocfs_rd_blksize(char *page, char **start, off_t off, int count, int *eof, void *data); @@ -507,7 +501,6 @@ extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count, int *eof, void *data); - extern int lprocfs_write_helper(const char *buffer, unsigned long count, int *val); extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index ff1988b..00a2bec 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -404,7 +404,6 @@ struct filter_obd { #define MDC_MAX_RIF_DEFAULT 8 #define MDC_MAX_RIF_MAX 512 -#define CLIENT_QUOTA_DEFAULT_RESENDS 10 struct mdc_rpc_lock; struct obd_import; @@ -545,8 +544,6 @@ struct client_obd { struct lu_client_seq *cl_seq; atomic_t cl_resends; /* resend count */ - atomic_t cl_quota_resends; /* quota related resend count */ - /* Cache of triples */ struct lustre_cache *cl_cache; obd_lock_cancel_cb cl_ext_lock_cancel_cb; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 1aaa7a0..16efa09 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -286,7 +286,6 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; #endif atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); - atomic_set(&cli->cl_quota_resends, CLIENT_QUOTA_DEFAULT_RESENDS); /* This value may be changed at connect time in ptlrpc_connect_interpret. */ diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 63ef292..46aae45 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -75,7 +75,6 @@ static int mdc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, return count; } - static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "ping", 0, lprocfs_wr_ping, 0, 0, 0222 }, @@ -91,8 +90,6 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, - { "quota_resend_count", lprocfs_rd_quota_resend_count, - lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 6506bd7..d0609e2 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -919,33 +919,6 @@ int lprocfs_at_hist_helper(char *page, int count, int rc, return rc; } -int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct obd_device *obd = data; - - return snprintf(page, count, "%u\n", - atomic_read(&obd->u.cli.cl_quota_resends)); -} - -int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - int val, rc; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val < 0) - return -EINVAL; - - atomic_set(&obd->u.cli.cl_quota_resends, val); - - return count; -} - /* See also ptlrpc_lprocfs_rd_timeouts */ int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -2316,8 +2289,6 @@ EXPORT_SYMBOL(lprocfs_rd_kbytesfree); EXPORT_SYMBOL(lprocfs_rd_kbytesavail); EXPORT_SYMBOL(lprocfs_rd_filestotal); EXPORT_SYMBOL(lprocfs_rd_filesfree); -EXPORT_SYMBOL(lprocfs_rd_quota_resend_count); -EXPORT_SYMBOL(lprocfs_wr_quota_resend_count); EXPORT_SYMBOL(lprocfs_write_helper); EXPORT_SYMBOL(lprocfs_write_frac_helper); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 9ca2cbc..37d2ec8 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -541,8 +541,6 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = { { "checksums", osc_rd_checksum, osc_wr_checksum, 0 }, { "checksum_type", osc_rd_checksum_type, osc_wd_checksum_type, 0 }, { "resend_count", osc_rd_resend_count, osc_wr_resend_count, 0}, - { "quota_resend_count", lprocfs_rd_quota_resend_count, - lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index 5652b3d..518ccbd 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -344,7 +344,7 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) struct ptlrpc_request *req; struct obd_quotactl *oqc; __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*oqctl) }; - int ver, opc, rc, resends = 0; + int ver, opc, rc; ENTRY; if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) { @@ -357,8 +357,6 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) RETURN(-EINVAL); } -restart_request: - req = ptlrpc_prep_req(class_exp2cliimp(exp), ver, opc, 2, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -389,17 +387,40 @@ restart_request: EXIT; out: ptlrpc_req_finished(req); + return rc; +} + +struct lov_getquota_set_arg { + __u64 curspace; + __u64 bhardlimit; +}; + +static int lov_getquota_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) +{ + struct lov_getquota_set_arg *set_arg = data; + struct ptlrpc_request *req; + struct list_head *pos; + struct obd_quotactl *oqc; + + list_for_each(pos, &rqset->set_requests) { + req = list_entry(pos, struct ptlrpc_request, rq_set_chain); + + if (req->rq_status) + continue; - if (client_quota_recoverable_error(rc)) { - resends++; - if (!client_quota_should_resend(resends, &exp->exp_obd->u.cli)) { - CERROR("too many resend retries, returning error " - "(cmd = %d, id = %u, type = %d)\n", - oqctl->qc_cmd, oqctl->qc_id, oqctl->qc_type); - RETURN(-EIO); + oqc = NULL; + if (req->rq_repmsg) + oqc = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*oqc), + lustre_swab_obd_quotactl); + + if (oqc == NULL) { + CERROR("Can't unpack obd_quotactl\n"); + rc = -EPROTO; + continue; } - goto restart_request; + set_arg->curspace += oqc->qc_dqblk.dqb_curspace; + set_arg->bhardlimit += oqc->qc_dqblk.dqb_bhardlimit; } return rc; @@ -409,9 +430,12 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; - __u64 curspace = 0; - __u64 bhardlimit = 0; - int i, rc = 0; + int i, rc = 0, rc1; + struct lov_getquota_set_arg set_arg = { 0 }; + struct obd_export *ltd_exp; + struct ptlrpc_request_set *rqset; + __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*oqctl) }; + ENTRY; if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON && @@ -421,12 +445,18 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) oqctl->qc_cmd != LUSTRE_Q_SETQUOTA && oqctl->qc_cmd != Q_FINVALIDATE) { CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd); - RETURN(-EFAULT); + RETURN(-EINVAL); } + rqset = ptlrpc_prep_set(); + if (rqset == NULL) + RETURN(-ENOMEM); + obd_getref(obd); + for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int err; + struct ptlrpc_request *req; + struct obd_quotactl *oqc; if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) { if (oqctl->qc_cmd == Q_GETOQUOTA) { @@ -438,23 +468,40 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) continue; } - err = obd_quotactl(lov->lov_tgts[i]->ltd_exp, oqctl); - if (err) { - if (lov->lov_tgts[i]->ltd_active && !rc) - rc = err; - continue; - } + ltd_exp = lov->lov_tgts[i]->ltd_exp; - if (oqctl->qc_cmd == Q_GETOQUOTA) { - curspace += oqctl->qc_dqblk.dqb_curspace; - bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit; + req = ptlrpc_prep_req(class_exp2cliimp(ltd_exp), + LUSTRE_OST_VERSION, + OST_QUOTACTL, 2, size, NULL); + if (!req) { + obd_putref(obd); + GOTO(out, rc = -ENOMEM); } + + oqc = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*oqctl)); + *oqc = *oqctl; + + ptlrpc_req_set_repsize(req, 2, size); + ptlrpc_at_set_req_timeout(req); + req->rq_no_resend = 1; + req->rq_no_delay = 1; + + ptlrpc_set_add_req(rqset, req); } + obd_putref(obd); if (oqctl->qc_cmd == Q_GETOQUOTA) { - oqctl->qc_dqblk.dqb_curspace = curspace; - oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit; + rqset->set_interpret = lov_getquota_interpret; + rqset->set_arg = &set_arg; } + rc1 = ptlrpc_set_wait(rqset); + rc = rc1 ? rc1 : rc; + +out: + ptlrpc_set_destroy(rqset); + oqctl->qc_dqblk.dqb_curspace = set_arg.curspace; + oqctl->qc_dqblk.dqb_bhardlimit = set_arg.bhardlimit; + RETURN(rc); } diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index 0ec25f9..9709506 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -202,18 +202,6 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); - -static inline int client_quota_recoverable_error(int rc) -{ - return (rc == -ETIMEDOUT || rc == -EAGAIN); -} - -static inline int client_quota_should_resend(int resend, struct client_obd *cli) -{ - return atomic_read(&cli->cl_quota_resends) ? - atomic_read(&cli->cl_quota_resends) > resend : 1; -} - int generic_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl, int global); #endif diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index fc056c9..f54df0f 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -60,7 +60,7 @@ unset ENABLE_QUOTA remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 -[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21 29" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21" QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} @@ -2058,7 +2058,6 @@ test_29() local newtimeo=10 # the default ptlrpc AT value local oldtimeo local pid - local resends if at_is_enabled; then oldtimeo=$(at_max_get client) @@ -2068,16 +2067,14 @@ test_29() lctl set_param timeout=$newtimeo fi - resends=$(lctl get_param -n mdc.${FSNAME}-*.quota_resend_count | head -1) - #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e lustre_fail mds 0x12e $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR & pid=$! # 1.25 * at_max + 5 + net_latency - echo "sleeping for $(((newtimeo * 9 / 4 + 5) * resends)) seconds" - sleep $(((newtimeo * 9 / 4 + 5) * resends)) + echo "sleeping for $((newtimeo * 9 / 4 + 5)) seconds" + sleep $((newtimeo * 9 / 4 + 5)) ps -p $pid && error "lfs hadn't finished by timeout" wait $pid && error "succeeded, but should have failed"