Whamcloud - gitweb
Branch b1_6
authorbwzhou <bwzhou>
Mon, 24 Dec 2007 03:20:03 +0000 (03:20 +0000)
committerbwzhou <bwzhou>
Mon, 24 Dec 2007 03:20:03 +0000 (03:20 +0000)
b=13079
r=adilger, deen

set an upper limit for ost recovery period

lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/include/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/mds/mds_fs.c
lustre/obdclass/lprocfs_status.c
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c

index 9bfe431..711788c 100644 (file)
@@ -516,6 +516,16 @@ struct file_operations name##_fops = {                                     \
 struct ptlrpc_request;
 extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
 
+#ifdef CRAY_XT3
+/* lprocfs_status.c: read recovery max time bz13079 */
+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data);
+
+/* lprocfs_status.c: write recovery max time bz13079 */
+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
+                                    unsigned long count, void *data);
+#endif
+
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
index 6af8734..ee38516 100644 (file)
@@ -815,8 +815,11 @@ struct obd_device {
         cfs_timer_t                      obd_recovery_timer;
         struct list_head                 obd_recovery_queue;
         struct list_head                 obd_delayed_reply_queue;
-        time_t                           obd_recovery_start;
-        time_t                           obd_recovery_end; /* for lprocfs_status */
+        time_t                           obd_recovery_start; /* seconds */
+        time_t                           obd_recovery_end; /* seconds, for lprocfs_status */
+#ifdef CRAY_XT3
+        time_t                           obd_recovery_max_time; /* seconds, bz13079 */
+#endif
         int                              obd_recovery_timeout;
 
         union {
index 09c2359..eaa1db8 100644 (file)
@@ -54,6 +54,9 @@ extern unsigned int obd_alloc_fail_rate;
 /* Timeout definitions */
 #define OBD_TIMEOUT_DEFAULT 100
 #define LDLM_TIMEOUT_DEFAULT 20
+#ifdef CRAY_XT3
+ #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
+#endif
 /* Time to wait for all clients to reconnect during recovery */
 /* Should be very conservative; must catch the first reconnect after reboot */
 #define OBD_RECOVERY_FACTOR (5 / 2) /* times obd_timeout */
index 01b886b..68308df 100644 (file)
@@ -1102,6 +1102,12 @@ static void reset_recovery_timer(struct obd_device *obd, int mintime)
         }
         /* Track the client's largest expected replay time */
         obd->obd_recovery_timeout = max(mintime, obd->obd_recovery_timeout);
+#ifdef CRAY_XT3
+        if(cfs_time_current_sec() + obd->obd_recovery_timeout >
+           obd->obd_recovery_start + obd->obd_recovery_max_time)
+                obd->obd_recovery_timeout = obd->obd_recovery_start + 
+                        obd->obd_recovery_max_time - cfs_time_current_sec();
+#endif
         obd->obd_recovery_end = cfs_time_current_sec() +
                 obd->obd_recovery_timeout;
         cfs_timer_arm(&obd->obd_recovery_timer, 
index b91d624..d3209d3 100644 (file)
@@ -463,6 +463,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 obd->obd_recovery_start = 0;
                 obd->obd_recovery_end = 0;
                 obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
+#ifdef CRAY_XT3
+                /* bz13079: this won't be changed for mds */
+                obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME;
+#endif
         }
 
         mds->mds_mount_count = mount_count + 1;
index ff46954..2fb7598 100644 (file)
@@ -1677,6 +1677,35 @@ out:
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
 
+#ifdef CRAY_XT3
+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n", 
+                        obd->obd_recovery_max_time);
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
+
+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
+                                    unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        obd->obd_recovery_max_time = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
+#endif /* CRAY_XT3 */
+
 EXPORT_SYMBOL(lprocfs_register);
 EXPORT_SYMBOL(lprocfs_srch);
 EXPORT_SYMBOL(lprocfs_remove);
index 09b059f..146a08e 100644 (file)
@@ -836,6 +836,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                 obd->obd_recovery_start = 0;
                 obd->obd_recovery_end = 0;
                 obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
+#ifdef CRAY_XT3
+                /* b13079: this should be set to desired value for ost */
+                obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME;
+#endif
         }
 
 out:
index 17e73ef..5d09fa7 100644 (file)
@@ -192,6 +192,10 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "tot_pending",  lprocfs_filter_rd_tot_pending, 0, 0 },
         { "tot_granted",  lprocfs_filter_rd_tot_granted, 0, 0 },
         { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
+#ifdef CRAY_XT3
+        { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime,
+                              lprocfs_obd_wr_recovery_maxtime, 0},
+#endif
         { "evict_client", 0, lprocfs_wr_evict_client, 0,
                                 &lprocfs_evict_client_fops},
         { "num_exports",  lprocfs_rd_num_exports,   0, 0 },