Whamcloud - gitweb
Branch HEAD
authortianzy <tianzy>
Mon, 14 Sep 2009 06:31:59 +0000 (06:31 +0000)
committertianzy <tianzy>
Mon, 14 Sep 2009 06:31:59 +0000 (06:31 +0000)
port att24108 and att24644 of bz19778 to HEAD.
b=19778
i=johann
i=tianzy
i=shadow

lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/mdc/lproc_mdc.c
lustre/obdclass/lprocfs_status.c
lustre/osc/lproc_osc.c
lustre/quota/quota_ctl.c
lustre/quota/quota_internal.h
lustre/quota/quota_master.c
lustre/tests/sanity-quota.sh

index 15c590e..25e4b98 100644 (file)
@@ -520,6 +520,11 @@ extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
 extern int lprocfs_wr_ping(struct file *file, const char *buffer,
                            unsigned long count, void *data);
 
+extern int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data);
+extern int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer,
+                                         unsigned long count, void *data);
+
 /* Statfs helpers */
 extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
                               int count, int *eof, void *data);
index 2b7396b..9eb9c76 100644 (file)
@@ -314,6 +314,7 @@ struct filter_obd {
         struct semaphore     fo_init_lock;      /* group initialization lock */
         int                  fo_committed_group;
 
+#define CLIENT_QUOTA_DEFAULT_RESENDS 10
 
         spinlock_t           fo_objidlock;      /* protect fo_lastobjid */
 
@@ -511,6 +512,7 @@ struct client_obd {
         struct lu_client_seq    *cl_seq;
 
         atomic_t                 cl_resends; /* resend count */
+        atomic_t                 cl_quota_resends; /* quota related resend count */
 };
 #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
 
index d59c14b..54683be 100644 (file)
@@ -303,6 +303,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
         cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
 #endif
         atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
+        atomic_set(&cli->cl_quota_resends, CLIENT_QUOTA_DEFAULT_RESENDS);
 
         /* This value may be changed at connect time in
            ptlrpc_connect_interpret. */
index f55ab46..c5cefe4 100644 (file)
@@ -147,6 +147,8 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
         { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight,
                                 mdc_wr_max_rpcs_in_flight, 0 },
+        { "quota_resend_count",  lprocfs_rd_quota_resend_count,
+                                 lprocfs_wr_quota_resend_count, 0},
         { "timeouts",        lprocfs_rd_timeouts,    0, 0 },
         { "import",          lprocfs_rd_import,      0, 0 },
         { "state",           lprocfs_rd_state,       0, 0 },
index b1224aa..71da72a 100644 (file)
@@ -923,6 +923,33 @@ int lprocfs_at_hist_helper(char *page, int count, int rc,
         return rc;
 }
 
+int lprocfs_rd_quota_resend_count(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+
+        return snprintf(page, count, "%u\n",
+                        atomic_read(&obd->u.cli.cl_quota_resends));
+}
+
+int lprocfs_wr_quota_resend_count(struct file *file, const char *buffer,
+                                  unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 0)
+               return -EINVAL;
+
+        atomic_set(&obd->u.cli.cl_quota_resends, val);
+
+        return count;
+}
+
 /* See also ptlrpc_lprocfs_rd_timeouts */
 int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count,
                         int *eof, void *data)
@@ -2533,6 +2560,8 @@ EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
 EXPORT_SYMBOL(lprocfs_rd_kbytesavail);
 EXPORT_SYMBOL(lprocfs_rd_filestotal);
 EXPORT_SYMBOL(lprocfs_rd_filesfree);
+EXPORT_SYMBOL(lprocfs_rd_quota_resend_count);
+EXPORT_SYMBOL(lprocfs_wr_quota_resend_count);
 
 EXPORT_SYMBOL(lprocfs_write_helper);
 EXPORT_SYMBOL(lprocfs_write_frac_helper);
index 962cbd9..35ad7a7 100644 (file)
@@ -614,6 +614,8 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
         { "checksums",       osc_rd_checksum, osc_wr_checksum, 0 },
         { "checksum_type",   osc_rd_checksum_type, osc_wd_checksum_type, 0 },
         { "resend_count",    osc_rd_resend_count, osc_wr_resend_count, 0},
+        { "quota_resend_count",  lprocfs_rd_quota_resend_count,
+                                 lprocfs_wr_quota_resend_count, 0},
         { "timeouts",        lprocfs_rd_timeouts,      0, 0 },
         { "contention_seconds", osc_rd_contention_seconds,
                                 osc_wr_contention_seconds, 0 },
index 75ab78d..8891c9d 100644 (file)
@@ -298,7 +298,7 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
         struct ptlrpc_request   *req;
         struct obd_quotactl     *oqc;
         const struct req_format *rf;
-        int                      ver, opc, rc;
+        int                      ver, opc, rc, resends = 0;
         ENTRY;
 
         if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) {
@@ -313,6 +313,8 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
                 RETURN(-EINVAL);
         }
 
+restart_request:
+
         req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc);
         if (req == NULL)
                 RETURN(-ENOMEM);
@@ -321,6 +323,8 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
         *oqc = *oqctl;
 
         ptlrpc_request_set_replen(req);
+        ptlrpc_at_set_req_timeout(req);
+        req->rq_no_resend = 1;
 
         rc = ptlrpc_queue_wait(req);
         if (rc) {
@@ -341,6 +345,17 @@ int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
         EXIT;
 out:
         ptlrpc_req_finished(req);
+
+        if (client_quota_recoverable_error(rc)) {
+                resends++;
+                if (!client_quota_should_resend(resends, &exp->exp_obd->u.cli)) {
+                        CERROR("too many resend retries, returning error\n");
+                        RETURN(-EIO);
+                }
+
+                goto restart_request;
+        }
+
         return rc;
 }
 
index a6db1ec..fa645b8 100644 (file)
@@ -211,4 +211,16 @@ int lmv_quota_check(struct obd_device *unused, struct obd_export *exp,
 int lov_quota_check(struct obd_device *unused, struct obd_export *exp,
                     struct obd_quotactl *oqctl);
 int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+
+static inline int client_quota_recoverable_error(int rc)
+{
+        return (rc == -ETIMEDOUT || rc == -EAGAIN);
+}
+
+static inline int client_quota_should_resend(int resend, struct client_obd *cli)
+{
+        return atomic_read(&cli->cl_quota_resends) ?
+                atomic_read(&cli->cl_quota_resends) > resend : 1;
+}
+
 #endif
index 2001ced..4c68085 100644 (file)
@@ -1576,13 +1576,14 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         up(&dquot->dq_sem);
 
         lustre_dqput(dquot);
+        up(&mds->mds_qonoff_sem);
 
         /* the usages in admin quota file is inaccurate */
         dqblk->dqb_curinodes = 0;
         dqblk->dqb_curspace = 0;
         rc = mds_get_space(obd, oqctl);
         EXIT;
-
+        return rc;
 out:
         up(&mds->mds_qonoff_sem);
         return rc;
index 3375133..5b8cf28 100644 (file)
@@ -57,7 +57,7 @@ unset ENABLE_QUOTA
 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
 
-[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21"
+[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21 29"
 
 QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log}
 
@@ -2075,6 +2075,45 @@ test_30()
 }
 run_test_with_stat 30 "hard limit updates should not reset grace times ================"
 
+test_29()
+{
+        local BLK_LIMIT=$((100 * 1024 * 1024)) # 100G
+        local timeout
+        local pid
+        local resends
+
+        if at_is_enabled; then
+                timeout=$(at_max_get client)
+                at_max_set 10 client
+        else
+                timeout=$(lctl get_param -n timeout)
+                lctl set_param timeout=10
+        fi
+
+        resends=$(lctl get_param -n mdc.${FSNAME}-*.quota_resend_count | head -1)
+
+        #define OBD_FAIL_MDS_QUOTACTL_NET 0x12e
+        lustre_fail mds 0x12e
+
+        $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR & pid=$!
+
+        echo "sleeping for $((10 * resends + 5)) seconds"
+        sleep $((10 * resends + 5))
+        ps -p $pid && error "lfs hadn't finished by timeout"
+        wait $pid && error "succeeded, but should have failed"
+
+        lustre_fail mds 0
+
+        if at_is_enabled; then
+                at_max_set $timeout client
+        else
+                lctl set_param timeout=$timeout
+        fi
+
+        resetquota -u $TSTUSR
+}
+run_test_with_stat 29 "unhandled quotactls must not hang lustre client (19778) ========"
+
 # turn off quota
 quota_fini()
 {