From d2f2601baff3fb8add503ecab932c20c26f9a118 Mon Sep 17 00:00:00 2001 From: isaac Date: Tue, 24 Feb 2009 03:36:18 +0000 Subject: [PATCH] b=16034,i=nic: - Change ptllnd timeout and watchdog timers. --- lnet/ChangeLog | 6 ++++++ lnet/klnds/ptllnd/ptllnd.c | 1 + lnet/klnds/ptllnd/ptllnd.h | 13 ++++++++++++ lnet/klnds/ptllnd/ptllnd_cb.c | 17 ++++++++++++++++ lnet/klnds/ptllnd/ptllnd_modparams.c | 18 ++++++++++++++++- lnet/klnds/ptllnd/ptllnd_peer.c | 6 +++++- lnet/klnds/ptllnd/ptllnd_ptltrace.c | 38 +++++++++++++++--------------------- lnet/klnds/ptllnd/ptllnd_rx_buf.c | 6 ++++-- lnet/klnds/ptllnd/ptllnd_tx.c | 1 + 9 files changed, 80 insertions(+), 26 deletions(-) diff --git a/lnet/ChangeLog b/lnet/ChangeLog index e70bd23..7b1cf84 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -18,6 +18,12 @@ Description: Details : Severity : normal +Bugzilla : 16034 +Description: Change ptllnd timeout and watchdog timers +Details : Add ptltrace_on_nal_failed and bump ptllnd timeout to match + Portals wire timeout. + +Severity : normal Bugzilla : 16186 Description: One down Lustre FS hangs ALL mounted Lustre filesystems Details : Shared routing enhancements - peer health detection. diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c index 760c8ba..b148fe5 100755 --- a/lnet/klnds/ptllnd/ptllnd.c +++ b/lnet/klnds/ptllnd/ptllnd.c @@ -658,6 +658,7 @@ kptllnd_startup (lnet_ni_t *ni) */ rwlock_init(&kptllnd_data.kptl_peer_rw_lock); init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq); + atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0); INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers); INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers); diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h index 8b25d0f..96bae2d 100755 --- a/lnet/klnds/ptllnd/ptllnd.h +++ b/lnet/klnds/ptllnd/ptllnd.h @@ -117,6 +117,7 @@ typedef struct int *kptl_ack_puts; /* make portals ack PUTs */ #ifdef CRAY_XT3 int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */ + int *kptl_ptltrace_on_fail; /* dump pltrace on PTL_NAL_FAILED? */ char **kptl_ptltrace_basename; /* ptltrace dump file basename */ #endif #ifdef PJK_DEBUGGING @@ -282,6 +283,7 @@ struct kptl_data struct list_head kptl_sched_rxbq; /* rxb requiring reposting */ wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */ + atomic_t kptl_needs_ptltrace; /* watchdog thread to dump ptltrace */ kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */ cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */ @@ -336,6 +338,17 @@ kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) #endif } +static inline void +kptllnd_schedule_ptltrace_dump (void) +{ +#ifdef CRAY_XT3 + if (*kptllnd_tunables.kptl_ptltrace_on_fail) { + atomic_inc(&kptllnd_data.kptl_needs_ptltrace); + wake_up(&kptllnd_data.kptl_watchdog_waitq); + } +#endif +} + int kptllnd_startup(lnet_ni_t *ni); void kptllnd_shutdown(lnet_ni_t *ni); int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); diff --git a/lnet/klnds/ptllnd/ptllnd_cb.c b/lnet/klnds/ptllnd/ptllnd_cb.c index d3227fb..8acf9d0 100644 --- a/lnet/klnds/ptllnd/ptllnd_cb.c +++ b/lnet/klnds/ptllnd/ptllnd_cb.c @@ -301,6 +301,7 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, kptllnd_peer_close(peer, -EIO); /* Everything (including this RDMA) queued on the peer will * be completed with failure */ + kptllnd_schedule_ptltrace_dump(); } return 0; @@ -687,6 +688,22 @@ kptllnd_watchdog(void *arg) /* threads shut down in phase 2 after all peers have been destroyed */ while (kptllnd_data.kptl_shutdown < 2) { + /* add a check for needs ptltrace + * yes, this is blatant hijacking of this thread + * we can't dump directly from tx or rx _callbacks as it deadlocks portals + * and takes out the node + */ + + if (atomic_read(&kptllnd_data.kptl_needs_ptltrace)) { +#ifdef CRAY_XT3 + kptllnd_dump_ptltrace(); + /* we only dump once, no matter how many pending */ + atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0); +#else + LBUG(); +#endif + } + timeout = (int)(deadline - jiffies); if (timeout <= 0) { diff --git a/lnet/klnds/ptllnd/ptllnd_modparams.c b/lnet/klnds/ptllnd/ptllnd_modparams.c index b63580d..bb54d32 100644 --- a/lnet/klnds/ptllnd/ptllnd_modparams.c +++ b/lnet/klnds/ptllnd/ptllnd_modparams.c @@ -57,7 +57,8 @@ static int checksum = 0; CFS_MODULE_PARM(checksum, "i", int, 0644, "set non-zero to enable message (not RDMA) checksums"); -static int timeout = 50; +/* NB 250 is the Cray Portals wire timeout */ +static int timeout = 250; CFS_MODULE_PARM(timeout, "i", int, 0644, "timeout (seconds)"); @@ -106,6 +107,10 @@ static int ptltrace_on_timeout = 0; CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644, "dump ptltrace on timeout"); +static int ptltrace_on_fail = 1; +CFS_MODULE_PARM(ptltrace_on_fail, "i", int, 0644, + "dump ptltrace on Portals failure"); + static char *ptltrace_basename = "/tmp/lnet-ptltrace"; CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644, "ptltrace dump file basename"); @@ -135,6 +140,7 @@ kptl_tunables_t kptllnd_tunables = { .kptl_ack_puts = &ack_puts, #ifdef CRAY_XT3 .kptl_ptltrace_on_timeout = &ptltrace_on_timeout, + .kptl_ptltrace_on_fail = &ptltrace_on_fail, .kptl_ptltrace_basename = &ptltrace_basename, #endif #ifdef PJK_DEBUGGING @@ -174,6 +180,7 @@ enum { KPTLLND_RESHEDULE_LOOPS, KPTLLND_ACK_PUTS, KPTLLND_TRACETIMEOUT, + KPTLLND_TRACEFAIL, KPTLLND_TRACEBASENAME, KPTLLND_SIMULATION_BITMAP }; @@ -194,6 +201,7 @@ enum { #define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED #define KPTLLND_ACK_PUTS CTL_UNNUMBERED #define KPTLLND_TRACETIMEOUT CTL_UNNUMBERED +#define KPTLLND_TRACEFAIL CTL_UNNUMBERED #define KPTLLND_TRACEBASENAME CTL_UNNUMBERED #define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED #endif @@ -321,6 +329,14 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = { .proc_handler = &proc_dointvec }, { + .ctl_name = KPTLLND_TRACEFAIL, + .procname = "ptltrace_on_fail", + .data = &ptltrace_on_fail, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = KPTLLND_TRACEBASENAME, .procname = "ptltrace_basename", .data = ptltrace_basename_space, diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c index 1a5c383..c92fe6c 100644 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ b/lnet/klnds/ptllnd/ptllnd_peer.c @@ -768,6 +768,7 @@ kptllnd_peer_check_sends (kptl_peer_t *peer) /* Nuke everything (including tx we were trying) */ kptllnd_peer_close(peer, -EIO); kptllnd_tx_decref(tx); + kptllnd_schedule_ptltrace_dump(); } kptl_tx_t * @@ -892,7 +893,10 @@ kptllnd_peer_check_bucket (int idx, int stamp) (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted), *kptllnd_tunables.kptl_timeout); - kptllnd_dump_ptltrace(); +#ifdef CRAY_XT3 + if (*kptllnd_tunables.kptl_ptltrace_on_timeout) + kptllnd_dump_ptltrace(); +#endif kptllnd_tx_decref(tx); diff --git a/lnet/klnds/ptllnd/ptllnd_ptltrace.c b/lnet/klnds/ptllnd/ptllnd_ptltrace.c index ff98d5e..e85ab5a 100644 --- a/lnet/klnds/ptllnd/ptllnd_ptltrace.c +++ b/lnet/klnds/ptllnd/ptllnd_ptltrace.c @@ -38,7 +38,7 @@ #ifdef CRAY_XT3 static struct semaphore ptltrace_mutex; -static struct semaphore ptltrace_signal; +static cfs_waitq_t ptltrace_debug_ctlwq; void kptllnd_ptltrace_to_file(char *filename) @@ -136,7 +136,7 @@ kptllnd_dump_ptltrace_thread(void *arg) { static char fname[1024]; - libcfs_daemonize("ptltracedump"); + libcfs_daemonize("kpt_ptltrace_dump"); /* serialise with other instances of me */ mutex_down(&ptltrace_mutex); @@ -150,8 +150,7 @@ kptllnd_dump_ptltrace_thread(void *arg) mutex_up(&ptltrace_mutex); /* unblock my creator */ - mutex_up(&ptltrace_signal); - + cfs_waitq_signal(&ptltrace_debug_ctlwq); return 0; } @@ -159,9 +158,13 @@ void kptllnd_dump_ptltrace(void) { int rc; + cfs_waitlink_t wait; + ENTRY; - if (!*kptllnd_tunables.kptl_ptltrace_on_timeout) - return; + /* taken from libcfs_debug_dumplog */ + cfs_waitlink_init(&wait); + set_current_state(TASK_INTERRUPTIBLE); + cfs_waitq_add(&ptltrace_debug_ctlwq, &wait); rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread, (void *)(long)cfs_curproc_pid(), @@ -169,28 +172,19 @@ kptllnd_dump_ptltrace(void) if (rc < 0) { CERROR("Error %d starting ptltrace dump thread\n", rc); } else { - /* block until thread completes */ - mutex_down(&ptltrace_signal); + cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE); } -} -void -kptllnd_init_ptltrace(void) -{ - init_mutex(&ptltrace_mutex); - init_mutex_locked(&ptltrace_signal); -} - -#else - -void -kptllnd_dump_ptltrace(void) -{ + /* teardown if kernel_thread() failed */ + cfs_waitq_del(&ptltrace_debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); + EXIT; } void kptllnd_init_ptltrace(void) { + cfs_waitq_init(&ptltrace_debug_ctlwq); + init_mutex(&ptltrace_mutex); } - #endif diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c index f7bf2d3..5c28881 100644 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ b/lnet/klnds/ptllnd/ptllnd_rx_buf.c @@ -430,7 +430,7 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev) kptllnd_evtype2str(ev->type), ev->type, rxb, kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type, unlinked); - + kptllnd_schedule_ptltrace_dump(); } else if (ev->type == PTL_EVENT_PUT_END && !rxbp->rxbp_shutdown) { @@ -532,10 +532,12 @@ kptllnd_nak (kptl_rx_t *rx) *kptllnd_tunables.kptl_portal, 0, LNET_MSG_MATCHBITS, 0, 0); - if (rc != PTL_OK) + if (rc != PTL_OK) { CWARN("Can't NAK %s: put failed %s(%d)\n", kptllnd_ptlid2str(rx->rx_initiator), kptllnd_errtype2str(rc), rc); + kptllnd_schedule_ptltrace_dump(); + } } void diff --git a/lnet/klnds/ptllnd/ptllnd_tx.c b/lnet/klnds/ptllnd/ptllnd_tx.c index 4043f8f..96e350b 100644 --- a/lnet/klnds/ptllnd/ptllnd_tx.c +++ b/lnet/klnds/ptllnd/ptllnd_tx.c @@ -492,6 +492,7 @@ kptllnd_tx_callback(ptl_event_t *ev) ev->ni_fail_type, unlinked); tx->tx_status = -EIO; kptllnd_peer_close(peer, -EIO); + kptllnd_schedule_ptltrace_dump(); } if (!unlinked) -- 1.8.3.1