Add hard and soft time limits for server recovery.
i=andrew.perepechko
i=Hongchao.zhang
.BI md_stripe_cache_size
Sets the stripe cache size for server side disk with a striped raid
configuration.
+.TP
+.BI recovery_time_soft= timeout
+Allow 'timeout' seconds for clients to reconnect for recovery after a server
+crash. This timeout will be incrementally extended if it is about to expire
+and the server is still handling new connections from recoverable clients.
+The default soft recovery timeout is set to 300 seconds (5 minutes).
+.TP
+.BI recovery_time_hard= timeout
+The server will be allowed to incrementally extend its timeout up to a hard
+maximum of 'timeout' seconds. The default hard recovery timeout is set to
+900 seconds (15 minutes).
.SH EXAMPLES
.TP
.B mount -t lustre cfs21@tcp0:/testfs /mnt/myfilesystem
struct ptlrpc_request;
extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
-/* lprocfs_status.c: read recovery max time bz13079 */
-int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-
-/* lprocfs_status.c: write recovery max time bz13079 */
-int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
- unsigned long count, void *data);
+/* lproc_status.c */
+int lprocfs_obd_rd_recovery_time_soft(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_obd_wr_recovery_time_soft(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int lprocfs_obd_rd_recovery_time_hard(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+int lprocfs_obd_wr_recovery_time_hard(struct file *file, const char *buffer,
+ unsigned long count, void *data);
/* all quota proc functions */
extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
__u32 lmd_flags; /* lustre mount flags */
int lmd_mgs_failnodes; /* mgs failover node count */
int lmd_exclude_count;
+ int lmd_recovery_time_soft;
+ int lmd_recovery_time_hard;
char *lmd_dev; /* device name */
char *lmd_profile; /* client only */
char *lmd_mgssec; /* sptlrpc flavor to mgs */
int lustre_common_put_super(struct super_block *sb);
+struct lustre_mount_info *server_find_mount_locked(const char *name);
struct lustre_mount_info *server_get_mount(const char *name);
struct lustre_mount_info *server_get_mount_2(const char *name);
int server_put_mount(const char *name, struct vfsmount *mnt);
cfs_timer_t obd_recovery_timer;
time_t obd_recovery_start; /* seconds */
time_t obd_recovery_end; /* seconds, for lprocfs_status */
- time_t obd_recovery_max_time; /* seconds, bz13079 */
+ time_t obd_recovery_time_hard;
int obd_recovery_timeout;
/* new recovery stuff from CMD2 */
#define OBD_TIMEOUT_DEFAULT 100
#define LDLM_TIMEOUT_DEFAULT 20
#define MDS_LDLM_TIMEOUT_DEFAULT 6
-/* Time to wait for all clients to reconnect during recovery */
+/* Time to wait for all clients to reconnect during recovery (hard limit) */
+#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9)
+/* Time to wait for all clients to reconnect during recovery (soft limit) */
/* Should be very conservative; must catch the first reconnect after reboot */
-#define OBD_RECOVERY_FACTOR (3) /* times obd_timeout */
+#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3)
/* Change recovery-small 26b time if you change this */
#define PING_INTERVAL max(obd_timeout / 4, 1U)
/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
else if (!extend && (duration > obd->obd_recovery_timeout))
/* Track the client's largest expected replay time */
obd->obd_recovery_timeout = duration;
-#ifdef CRAY_XT3
- /*
- * If total recovery time already exceed the
- * obd_recovery_max_time, then CRAY XT3 will
- * abort the recovery
- */
- if(obd->obd_recovery_timeout > obd->obd_recovery_max_time)
- obd->obd_recovery_timeout = obd->obd_recovery_max_time;
-#endif
+
+ /* Hard limit of obd_recovery_time_hard which should not happen */
+ if (obd->obd_recovery_timeout > obd->obd_recovery_time_hard)
+ obd->obd_recovery_timeout = obd->obd_recovery_time_hard;
+
obd->obd_recovery_end = obd->obd_recovery_start +
obd->obd_recovery_timeout;
if (!cfs_timer_is_armed(&obd->obd_recovery_timer) ||
}
CDEBUG(D_HA, "%s: starting recovery timer\n", obd->obd_name);
obd->obd_recovery_start = cfs_time_current_sec();
- /* minimum */
- obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
reset_recovery_timer(obd, obd->obd_recovery_timeout, 0);
delta = (jiffies - delta) / CFS_HZ;
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
- if (delta > obd_timeout * OBD_RECOVERY_FACTOR) {
+ if (delta > OBD_RECOVERY_TIME_SOFT) {
CWARN("too long recovery - read logs\n");
libcfs_debug_dumplog();
}
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
obd->obd_recovery_start = 0;
obd->obd_recovery_end = 0;
- obd->obd_recovery_timeout = OBD_RECOVERY_FACTOR * obd_timeout;
- /* bz13079: this should be set to desired value for ost but not for mds */
- obd->obd_recovery_max_time = OBD_RECOVERY_MAX_TIME;
+
+ /* both values can be get from mount data already */
+ if (obd->obd_recovery_timeout == 0)
+ obd->obd_recovery_timeout = OBD_RECOVERY_TIME_SOFT;
+ if (obd->obd_recovery_time_hard == 0)
+ obd->obd_recovery_time_hard = OBD_RECOVERY_TIME_HARD;
cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd);
target_start_recovery_thread(lut, handler);
}
CERROR("CMD Operation not allowed in IOP mode\n");
GOTO(err_lmi, rc = -EINVAL);
}
+ /* Read recovery timeouts */
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
+ obd->obd_recovery_timeout =
+ lsi->lsi_lmd->lmd_recovery_time_soft;
+
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
+ obd->obd_recovery_time_hard =
+ lsi->lsi_lmd->lmd_recovery_time_hard;
}
cfs_rwlock_init(&m->mdt_sptlrpc_lock);
}
EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
-int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+int lprocfs_obd_rd_recovery_time_soft(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ LASSERT(obd != NULL);
+
+ return snprintf(page, count, "%d\n",
+ obd->obd_recovery_timeout);
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_time_soft);
+
+int lprocfs_obd_wr_recovery_time_soft(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = (struct obd_device *)data;
+ int val, rc;
+ LASSERT(obd != NULL);
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ obd->obd_recovery_timeout = val;
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_time_soft);
+
+int lprocfs_obd_rd_recovery_time_hard(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
struct obd_device *obd = data;
LASSERT(obd != NULL);
- return snprintf(page, count, "%lu\n", obd->obd_recovery_max_time);
+ return snprintf(page, count, "%lu\n", obd->obd_recovery_time_hard);
}
-EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_time_hard);
-int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
- unsigned long count, void *data)
+int lprocfs_obd_wr_recovery_time_hard(struct file *file, const char *buffer,
+ unsigned long count, void *data)
{
struct obd_device *obd = data;
int val, rc;
if (rc)
return rc;
- obd->obd_recovery_max_time = val;
+ obd->obd_recovery_time_hard = val;
return count;
}
-EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
-
+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_time_hard);
EXPORT_SYMBOL(lprocfs_register);
EXPORT_SYMBOL(lprocfs_srch);
}
lsi->lsi_lmd->lmd_exclude_count = 0;
+ lsi->lsi_lmd->lmd_recovery_time_soft = 0;
+ lsi->lsi_lmd->lmd_recovery_time_hard = 0;
s2lsi_nocast(sb) = lsi;
/* we take 1 extra ref for our setup */
cfs_atomic_set(&lsi->lsi_mounts, 1);
RETURN(rc);
}
-#if 0
static void lmd_print(struct lustre_mount_data *lmd)
{
int i;
PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
+
if (lmd->lmd_opts)
PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
+
+ if (lmd->lmd_recovery_time_soft)
+ PRINT_CMD(PRINT_MASK, "recovery time soft: %d\n",
+ lmd->lmd_recovery_time_soft);
+
+ if (lmd->lmd_recovery_time_hard)
+ PRINT_CMD(PRINT_MASK, "recovery time hard: %d\n",
+ lmd->lmd_recovery_time_hard);
+
for (i = 0; i < lmd->lmd_exclude_count; i++) {
PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i,
lmd->lmd_exclude[i]);
}
}
-#endif
/* Is this server on the exclusion list */
int lustre_check_exclusion(struct super_block *sb, char *svname)
s1 = options;
while (*s1) {
int clear = 0;
+ int time_min = 2 * (CONNECTION_SWITCH_MAX +
+ 2 * INITIAL_CONNECT_TIMEOUT);
+
/* Skip whitespace and extra commas */
while (*s1 == ' ' || *s1 == ',')
s1++;
if (strncmp(s1, "abort_recov", 11) == 0) {
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
clear++;
+ } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
+ lmd->lmd_recovery_time_soft = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
+ } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
+ lmd->lmd_recovery_time_hard = max_t(int,
+ simple_strtoul(s1 + 19, NULL, 10), time_min);
+ clear++;
} else if (strncmp(s1, "nosvc", 5) == 0) {
lmd->lmd_flags |= LMD_FLG_NOSVC;
clear++;
strcpy(lmd->lmd_opts, options);
}
+ lmd_print(lmd);
lmd->lmd_magic = LMD_MAGIC;
RETURN(rc);
struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
mnt = lmi->lmi_mnt;
obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
+
+ /* gets recovery timeouts from mount data */
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
+ obd->obd_recovery_timeout =
+ lsi->lsi_lmd->lmd_recovery_time_soft;
+ if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
+ obd->obd_recovery_time_hard =
+ lsi->lsi_lmd->lmd_recovery_time_hard;
} else {
/* old path - used by lctl */
CERROR("Using old MDS mount method\n");
{ "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 },
{ "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 },
{ "hash_stats", lprocfs_obd_rd_hash, 0, 0 },
- { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
- { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime,
- lprocfs_obd_wr_recovery_maxtime, 0},
+ { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
+ { "recovery_time_soft", lprocfs_obd_rd_recovery_time_soft,
+ lprocfs_obd_wr_recovery_time_soft, 0},
+ { "recovery_time_hard", lprocfs_obd_rd_recovery_time_hard,
+ lprocfs_obd_wr_recovery_time_hard, 0},
{ "evict_client", 0, lprocfs_wr_evict_client, 0,
&lprocfs_evict_client_fops},
{ "num_exports", lprocfs_rd_num_exports, 0, 0 },