From: anserper Date: Thu, 23 Jul 2009 10:14:41 +0000 (+0000) Subject: b=19778 X-Git-Tag: v1_8_2_01~1^2~222 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=fab524e33654a6ee4d43dffce80462b92500d667;p=fs%2Flustre-release.git b=19778 a=24644 i=Alexey Lyashkov i=ZhiYong Tian do not loop infinitely in client_quota_ctl, retry several times after timeout --- diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 175ddc1..cb1dab5 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -473,6 +473,7 @@ extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data); + struct adaptive_timeout; extern int lprocfs_at_hist_helper(char *page, int count, int rc, struct adaptive_timeout *at); @@ -485,6 +486,11 @@ extern int lprocfs_wr_evict_client(struct file *file, const char *buffer, extern int lprocfs_wr_ping(struct file *file, const char *buffer, unsigned long count, void *data); +extern int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, + unsigned long count, void *data); + /* Statfs helpers */ extern int lprocfs_rd_blksize(char *page, char **start, off_t off, int count, int *eof, void *data); @@ -501,6 +507,7 @@ extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count, int *eof, void *data); + extern int lprocfs_write_helper(const char *buffer, unsigned long count, int *val); extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index a2bc1fa..235ac08 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -393,6 +393,7 @@ struct filter_obd { #define MDC_MAX_RIF_DEFAULT 8 #define MDC_MAX_RIF_MAX 512 +#define CLIENT_QUOTA_DEFAULT_RESENDS 10 struct mdc_rpc_lock; struct obd_import; @@ -502,6 +503,8 @@ struct client_obd { struct lu_client_seq *cl_seq; atomic_t cl_resends; /* resend count */ + atomic_t cl_quota_resends; /* quota related resend count */ + /* Cache of triples */ struct lustre_cache *cl_cache; obd_lock_cancel_cb cl_ext_lock_cancel_cb; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 78fc799..7459e77 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -286,6 +286,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; #endif atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); + atomic_set(&cli->cl_quota_resends, CLIENT_QUOTA_DEFAULT_RESENDS); /* This value may be changed at connect time in ptlrpc_connect_interpret. */ diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 46aae45..63ef292 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -75,6 +75,7 @@ static int mdc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, return count; } + static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "ping", 0, lprocfs_wr_ping, 0, 0, 0222 }, @@ -90,6 +91,8 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, + { "quota_resend_count", lprocfs_rd_quota_resend_count, + lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 1f99bf9..bb9ebb7 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -910,6 +910,33 @@ int lprocfs_at_hist_helper(char *page, int count, int rc, return rc; } +int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + + return snprintf(page, count, "%u\n", + atomic_read(&obd->u.cli.cl_quota_resends)); +} + +int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 0) + return -EINVAL; + + atomic_set(&obd->u.cli.cl_quota_resends, val); + + return count; +} + /* See also ptlrpc_lprocfs_rd_timeouts */ int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -2279,6 +2306,8 @@ EXPORT_SYMBOL(lprocfs_rd_kbytesfree); EXPORT_SYMBOL(lprocfs_rd_kbytesavail); EXPORT_SYMBOL(lprocfs_rd_filestotal); EXPORT_SYMBOL(lprocfs_rd_filesfree); +EXPORT_SYMBOL(lprocfs_rd_quota_resend_count); +EXPORT_SYMBOL(lprocfs_wr_quota_resend_count); EXPORT_SYMBOL(lprocfs_write_helper); EXPORT_SYMBOL(lprocfs_write_frac_helper); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 37d2ec8..9ca2cbc 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -541,6 +541,8 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = { { "checksums", osc_rd_checksum, osc_wr_checksum, 0 }, { "checksum_type", osc_rd_checksum_type, osc_wd_checksum_type, 0 }, { "resend_count", osc_rd_resend_count, osc_wr_resend_count, 0}, + { "quota_resend_count", lprocfs_rd_quota_resend_count, + lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index 5397b94..4a4261c 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -346,7 +346,7 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) struct ptlrpc_request *req; struct obd_quotactl *oqc; __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*oqctl) }; - int ver, opc, rc; + int ver, opc, rc, resends = 0; ENTRY; if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) { @@ -359,6 +359,8 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) RETURN(-EINVAL); } +restart_request: + req = ptlrpc_prep_req(class_exp2cliimp(exp), ver, opc, 2, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -367,6 +369,8 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) *oqc = *oqctl; ptlrpc_req_set_repsize(req, 2, size); + ptlrpc_at_set_req_timeout(req); + req->rq_no_resend = 1; rc = ptlrpc_queue_wait(req); if (rc) { @@ -387,6 +391,19 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) EXIT; out: ptlrpc_req_finished(req); + + if (client_quota_recoverable_error(rc)) { + resends++; + if (!client_quota_should_resend(resends, &exp->exp_obd->u.cli)) { + CERROR("too many resend retries, returning error " + "(cmd = %d, id = %u, type = %d)\n", + oqctl->qc_cmd, oqctl->qc_id, oqctl->qc_type); + RETURN(-EIO); + } + + goto restart_request; + } + return rc; } diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index 4f17e88..bf07ecf 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -200,4 +200,16 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); + +static inline int client_quota_recoverable_error(int rc) +{ + return (rc == -ETIMEDOUT || rc == -EAGAIN); +} + +static inline int client_quota_should_resend(int resend, struct client_obd *cli) +{ + return atomic_read(&cli->cl_quota_resends) ? + atomic_read(&cli->cl_quota_resends) > resend : 1; +} + #endif diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 8981512..102f27e 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -53,7 +53,7 @@ DIRECTIO=${DIRECTIO:-$LUSTRE/tests/directio} remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 -[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21 29" QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} @@ -2158,6 +2158,45 @@ test_28() { } run_test_with_stat 28 "test for consistency for qunit when setquota (18574) ===========" +test_29() +{ + local BLK_LIMIT=$((100 * 1024 * 1024)) # 100G + local timeout + local pid + local resends + + if at_is_enabled; then + timeout=$(at_max_get client) + at_max_set 10 client + else + timeout=$(lctl get_param -n timeout) + lctl set_param timeout=10 + fi + + resends=$(lctl get_param -n mdc.${FSNAME}-*.quota_resend_count | head -1) + + #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e + lustre_fail mds 0x12e + + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR & pid=$! + + echo "sleeping for $((10 * resends + 5)) seconds" + sleep $((10 * resends + 5)) + ps -p $pid && error "lfs hadn't finished by timeout" + wait $pid && error "succeeded, but should have failed" + + lustre_fail mds 0 + + if at_is_enabled; then + at_max_set $timeout client + else + lctl set_param timeout=$timeout + fi + + resetquota -u $TSTUSR +} +run_test_with_stat 29 "unhandled quotactls must not hang lustre client (19778) ========" + # turn off quota test_99() {