From: tianzy Date: Mon, 14 Sep 2009 06:31:59 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_9_270~66 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e7f3c2955a426bbe3e39b32208056b9ab8667190 Branch HEAD port att24108 and att24644 of bz19778 to HEAD. b=19778 i=johann i=tianzy i=shadow --- diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 15c590e..25e4b98 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -520,6 +520,11 @@ extern int lprocfs_wr_evict_client(struct file *file, const char *buffer, extern int lprocfs_wr_ping(struct file *file, const char *buffer, unsigned long count, void *data); +extern int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, + unsigned long count, void *data); + /* Statfs helpers */ extern int lprocfs_rd_blksize(char *page, char **start, off_t off, int count, int *eof, void *data); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 2b7396b..9eb9c76 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -314,6 +314,7 @@ struct filter_obd { struct semaphore fo_init_lock; /* group initialization lock */ int fo_committed_group; +#define CLIENT_QUOTA_DEFAULT_RESENDS 10 spinlock_t fo_objidlock; /* protect fo_lastobjid */ @@ -511,6 +512,7 @@ struct client_obd { struct lu_client_seq *cl_seq; atomic_t cl_resends; /* resend count */ + atomic_t cl_quota_resends; /* quota related resend count */ }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index d59c14b..54683be 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -303,6 +303,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; #endif atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); + atomic_set(&cli->cl_quota_resends, CLIENT_QUOTA_DEFAULT_RESENDS); /* This value may be changed at connect time in ptlrpc_connect_interpret. */ diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index f55ab46..c5cefe4 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -147,6 +147,8 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, + { "quota_resend_count", lprocfs_rd_quota_resend_count, + lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index b1224aa..71da72a 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -923,6 +923,33 @@ int lprocfs_at_hist_helper(char *page, int count, int rc, return rc; } +int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + + return snprintf(page, count, "%u\n", + atomic_read(&obd->u.cli.cl_quota_resends)); +} + +int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 0) + return -EINVAL; + + atomic_set(&obd->u.cli.cl_quota_resends, val); + + return count; +} + /* See also ptlrpc_lprocfs_rd_timeouts */ int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -2533,6 +2560,8 @@ EXPORT_SYMBOL(lprocfs_rd_kbytesfree); EXPORT_SYMBOL(lprocfs_rd_kbytesavail); EXPORT_SYMBOL(lprocfs_rd_filestotal); EXPORT_SYMBOL(lprocfs_rd_filesfree); +EXPORT_SYMBOL(lprocfs_rd_quota_resend_count); +EXPORT_SYMBOL(lprocfs_wr_quota_resend_count); EXPORT_SYMBOL(lprocfs_write_helper); EXPORT_SYMBOL(lprocfs_write_frac_helper); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 962cbd9..35ad7a7 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -614,6 +614,8 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = { { "checksums", osc_rd_checksum, osc_wr_checksum, 0 }, { "checksum_type", osc_rd_checksum_type, osc_wd_checksum_type, 0 }, { "resend_count", osc_rd_resend_count, osc_wr_resend_count, 0}, + { "quota_resend_count", lprocfs_rd_quota_resend_count, + lprocfs_wr_quota_resend_count, 0}, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "contention_seconds", osc_rd_contention_seconds, osc_wr_contention_seconds, 0 }, diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index 75ab78d..8891c9d 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -298,7 +298,7 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, struct ptlrpc_request *req; struct obd_quotactl *oqc; const struct req_format *rf; - int ver, opc, rc; + int ver, opc, rc, resends = 0; ENTRY; if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) { @@ -313,6 +313,8 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, RETURN(-EINVAL); } +restart_request: + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc); if (req == NULL) RETURN(-ENOMEM); @@ -321,6 +323,8 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, *oqc = *oqctl; ptlrpc_request_set_replen(req); + ptlrpc_at_set_req_timeout(req); + req->rq_no_resend = 1; rc = ptlrpc_queue_wait(req); if (rc) { @@ -341,6 +345,17 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, EXIT; out: ptlrpc_req_finished(req); + + if (client_quota_recoverable_error(rc)) { + resends++; + if (!client_quota_should_resend(resends, &exp->exp_obd->u.cli)) { + CERROR("too many resend retries, returning error\n"); + RETURN(-EIO); + } + + goto restart_request; + } + return rc; } diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index a6db1ec..fa645b8 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -211,4 +211,16 @@ int lmv_quota_check(struct obd_device *unused, struct obd_export *exp, int lov_quota_check(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl); int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); + +static inline int client_quota_recoverable_error(int rc) +{ + return (rc == -ETIMEDOUT || rc == -EAGAIN); +} + +static inline int client_quota_should_resend(int resend, struct client_obd *cli) +{ + return atomic_read(&cli->cl_quota_resends) ? + atomic_read(&cli->cl_quota_resends) > resend : 1; +} + #endif diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 2001ced..4c68085 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -1576,13 +1576,14 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) up(&dquot->dq_sem); lustre_dqput(dquot); + up(&mds->mds_qonoff_sem); /* the usages in admin quota file is inaccurate */ dqblk->dqb_curinodes = 0; dqblk->dqb_curspace = 0; rc = mds_get_space(obd, oqctl); EXIT; - + return rc; out: up(&mds->mds_qonoff_sem); return rc; diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 3375133..5b8cf28 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -57,7 +57,7 @@ unset ENABLE_QUOTA remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 -[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21 29" QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} @@ -2075,6 +2075,45 @@ test_30() } run_test_with_stat 30 "hard limit updates should not reset grace times ================" +test_29() +{ + local BLK_LIMIT=$((100 * 1024 * 1024)) # 100G + local timeout + local pid + local resends + + if at_is_enabled; then + timeout=$(at_max_get client) + at_max_set 10 client + else + timeout=$(lctl get_param -n timeout) + lctl set_param timeout=10 + fi + + resends=$(lctl get_param -n mdc.${FSNAME}-*.quota_resend_count | head -1) + + #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e + lustre_fail mds 0x12e + + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR & pid=$! + + echo "sleeping for $((10 * resends + 5)) seconds" + sleep $((10 * resends + 5)) + ps -p $pid && error "lfs hadn't finished by timeout" + wait $pid && error "succeeded, but should have failed" + + lustre_fail mds 0 + + if at_is_enabled; then + at_max_set $timeout client + else + lctl set_param timeout=$timeout + fi + + resetquota -u $TSTUSR +} +run_test_with_stat 29 "unhandled quotactls must not hang lustre client (19778) ========" + # turn off quota quota_fini() {