From 4e10baf258ecfd8acde1f426f9dc1a21477bdccc Mon Sep 17 00:00:00 2001 From: nathan Date: Mon, 26 Feb 2007 19:11:33 +0000 Subject: [PATCH] b=10894 r=adilger Clarify watchdog timer units. Add ability to change MDT ldlm timeout. --- lnet/libcfs/watchdog.c | 11 ++++++----- lustre/include/lustre_net.h | 2 +- lustre/include/obd_support.h | 2 ++ lustre/mds/handler.c | 3 ++- lustre/mgs/mgs_handler.c | 2 ++ lustre/obdclass/class_obd.c | 2 +- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c index 3000e8f..22754af 100644 --- a/lnet/libcfs/watchdog.c +++ b/lnet/libcfs/watchdog.c @@ -36,7 +36,7 @@ struct lc_watchdog { void *lcw_data; pid_t lcw_pid; - int lcw_time; /* time until watchdog fires, in ms */ + cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */ enum { LC_WATCHDOG_DISABLED, @@ -126,8 +126,8 @@ static void lcw_cb(unsigned long data) /* NB this warning should appear on the console, but may not get into * the logs since we're running in a softirq handler */ - CWARN("Watchdog triggered for pid %d: it was inactive for %ldms\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time) * 1000); + CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", + (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); lcw_dump(lcw); spin_lock_bh(&lcw_pending_timers_lock); @@ -197,8 +197,9 @@ static int lcw_dispatch_main(void *data) list_del_init(&lcw->lcw_list); spin_unlock_bh(&lcw_pending_timers_lock); - CDEBUG(D_INFO, "found lcw for pid %d: inactive for %ldms\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time) * 1000); + CDEBUG(D_INFO, "found lcw for pid %d: inactive for " + "%lds\n", (int)lcw->lcw_pid, + cfs_duration_sec(lcw->lcw_time)); if (lcw->lcw_state != LC_WATCHDOG_DISABLED) lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 5835442..e1fa8e8 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -120,7 +120,7 @@ #define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) #define MGS_THREADS_AUTO_MIN 2 -#define MGS_THREADS_AUTO_MAX 128 +#define MGS_THREADS_AUTO_MAX 32 #define MGS_NBUFS (64 * smp_num_cpus) #define MGS_BUFSIZE (8 * 1024) #define MGS_MAXREQSIZE (8 * 1024) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 6cdcc9a..1b31dd4 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -35,6 +35,7 @@ extern unsigned int obd_dump_on_eviction; extern unsigned int obd_timeout; /* seconds */ #define PING_INTERVAL max(obd_timeout / 4, 1U) #define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) +#define LDLM_TIMEOUT_DEFAULT 20 extern unsigned int ldlm_timeout; extern unsigned int obd_health_check_timeout; extern unsigned int obd_sync_filter; @@ -188,6 +189,7 @@ extern int obd_race_state; #define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 #define OBD_FAIL_MGC_PROCESS_LOG 0x903 #define OBD_FAIL_MGS_SLOW_REQUEST_NET 0x904 +#define OBD_FAIL_MGS_SLOW_TARGET_REG 0x905 #define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 5f59ab3..8cc1aba 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -2053,7 +2053,8 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) obd->obd_replayable ? "enabled" : "disabled"); } - ldlm_timeout = 6; + if (ldlm_timeout == LDLM_TIMEOUT_DEFAULT) + ldlm_timeout = 6; RETURN(0); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index e161ad3..e6ba9be 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -382,6 +382,8 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) obd->obd_name, lockrc); } + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10); + /* Log writing contention is handled by the fsdb_sem */ if (mti->mti_flags & LDD_F_WRITECONF) { diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 21732e6..c2af448 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -64,7 +64,7 @@ unsigned int obd_debug_peer_on_timeout; unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; unsigned int obd_timeout = 100; /* seconds */ -unsigned int ldlm_timeout = 20; /* seconds */ +unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */ unsigned int obd_health_check_timeout = 120; /* seconds */ unsigned int obd_max_dirty_pages = 256; atomic_t obd_dirty_pages; -- 1.8.3.1