- Change ptllnd timeout and watchdog timers.
Details :
Severity : normal
+Bugzilla : 16034
+Description: Change ptllnd timeout and watchdog timers
+Details : Add ptltrace_on_nal_failed and bump ptllnd timeout to match
+ Portals wire timeout.
+
+Severity : normal
Bugzilla : 16186
Description: One down Lustre FS hangs ALL mounted Lustre filesystems
Details : Shared routing enhancements - peer health detection.
*/
rwlock_init(&kptllnd_data.kptl_peer_rw_lock);
init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq);
+ atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0);
INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers);
INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers);
int *kptl_ack_puts; /* make portals ack PUTs */
#ifdef CRAY_XT3
int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */
+ int *kptl_ptltrace_on_fail; /* dump pltrace on PTL_NAL_FAILED? */
char **kptl_ptltrace_basename; /* ptltrace dump file basename */
#endif
#ifdef PJK_DEBUGGING
struct list_head kptl_sched_rxbq; /* rxb requiring reposting */
wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */
+ atomic_t kptl_needs_ptltrace; /* watchdog thread to dump ptltrace */
kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */
cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */
#endif
}
+static inline void
+kptllnd_schedule_ptltrace_dump (void)
+{
+#ifdef CRAY_XT3
+ if (*kptllnd_tunables.kptl_ptltrace_on_fail) {
+ atomic_inc(&kptllnd_data.kptl_needs_ptltrace);
+ wake_up(&kptllnd_data.kptl_watchdog_waitq);
+ }
+#endif
+}
+
int kptllnd_startup(lnet_ni_t *ni);
void kptllnd_shutdown(lnet_ni_t *ni);
int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
kptllnd_peer_close(peer, -EIO);
/* Everything (including this RDMA) queued on the peer will
* be completed with failure */
+ kptllnd_schedule_ptltrace_dump();
}
return 0;
/* threads shut down in phase 2 after all peers have been destroyed */
while (kptllnd_data.kptl_shutdown < 2) {
+ /* add a check for needs ptltrace
+ * yes, this is blatant hijacking of this thread
+ * we can't dump directly from tx or rx _callbacks as it deadlocks portals
+ * and takes out the node
+ */
+
+ if (atomic_read(&kptllnd_data.kptl_needs_ptltrace)) {
+#ifdef CRAY_XT3
+ kptllnd_dump_ptltrace();
+ /* we only dump once, no matter how many pending */
+ atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0);
+#else
+ LBUG();
+#endif
+ }
+
timeout = (int)(deadline - jiffies);
if (timeout <= 0) {
CFS_MODULE_PARM(checksum, "i", int, 0644,
"set non-zero to enable message (not RDMA) checksums");
-static int timeout = 50;
+/* NB 250 is the Cray Portals wire timeout */
+static int timeout = 250;
CFS_MODULE_PARM(timeout, "i", int, 0644,
"timeout (seconds)");
CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644,
"dump ptltrace on timeout");
+static int ptltrace_on_fail = 1;
+CFS_MODULE_PARM(ptltrace_on_fail, "i", int, 0644,
+ "dump ptltrace on Portals failure");
+
static char *ptltrace_basename = "/tmp/lnet-ptltrace";
CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644,
"ptltrace dump file basename");
.kptl_ack_puts = &ack_puts,
#ifdef CRAY_XT3
.kptl_ptltrace_on_timeout = &ptltrace_on_timeout,
+ .kptl_ptltrace_on_fail = &ptltrace_on_fail,
.kptl_ptltrace_basename = &ptltrace_basename,
#endif
#ifdef PJK_DEBUGGING
KPTLLND_RESHEDULE_LOOPS,
KPTLLND_ACK_PUTS,
KPTLLND_TRACETIMEOUT,
+ KPTLLND_TRACEFAIL,
KPTLLND_TRACEBASENAME,
KPTLLND_SIMULATION_BITMAP
};
#define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
#define KPTLLND_ACK_PUTS CTL_UNNUMBERED
#define KPTLLND_TRACETIMEOUT CTL_UNNUMBERED
+#define KPTLLND_TRACEFAIL CTL_UNNUMBERED
#define KPTLLND_TRACEBASENAME CTL_UNNUMBERED
#define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED
#endif
.proc_handler = &proc_dointvec
},
{
+ .ctl_name = KPTLLND_TRACEFAIL,
+ .procname = "ptltrace_on_fail",
+ .data = &ptltrace_on_fail,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
.ctl_name = KPTLLND_TRACEBASENAME,
.procname = "ptltrace_basename",
.data = ptltrace_basename_space,
/* Nuke everything (including tx we were trying) */
kptllnd_peer_close(peer, -EIO);
kptllnd_tx_decref(tx);
+ kptllnd_schedule_ptltrace_dump();
}
kptl_tx_t *
(tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted),
*kptllnd_tunables.kptl_timeout);
- kptllnd_dump_ptltrace();
+#ifdef CRAY_XT3
+ if (*kptllnd_tunables.kptl_ptltrace_on_timeout)
+ kptllnd_dump_ptltrace();
+#endif
kptllnd_tx_decref(tx);
#ifdef CRAY_XT3
static struct semaphore ptltrace_mutex;
-static struct semaphore ptltrace_signal;
+static cfs_waitq_t ptltrace_debug_ctlwq;
void
kptllnd_ptltrace_to_file(char *filename)
{
static char fname[1024];
- libcfs_daemonize("ptltracedump");
+ libcfs_daemonize("kpt_ptltrace_dump");
/* serialise with other instances of me */
mutex_down(&ptltrace_mutex);
mutex_up(&ptltrace_mutex);
/* unblock my creator */
- mutex_up(&ptltrace_signal);
-
+ cfs_waitq_signal(&ptltrace_debug_ctlwq);
return 0;
}
kptllnd_dump_ptltrace(void)
{
int rc;
+ cfs_waitlink_t wait;
+ ENTRY;
- if (!*kptllnd_tunables.kptl_ptltrace_on_timeout)
- return;
+ /* taken from libcfs_debug_dumplog */
+ cfs_waitlink_init(&wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ cfs_waitq_add(&ptltrace_debug_ctlwq, &wait);
rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread,
(void *)(long)cfs_curproc_pid(),
if (rc < 0) {
CERROR("Error %d starting ptltrace dump thread\n", rc);
} else {
- /* block until thread completes */
- mutex_down(&ptltrace_signal);
+ cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
}
-}
-void
-kptllnd_init_ptltrace(void)
-{
- init_mutex(&ptltrace_mutex);
- init_mutex_locked(&ptltrace_signal);
-}
-
-#else
-
-void
-kptllnd_dump_ptltrace(void)
-{
+ /* teardown if kernel_thread() failed */
+ cfs_waitq_del(&ptltrace_debug_ctlwq, &wait);
+ set_current_state(TASK_RUNNING);
+ EXIT;
}
void
kptllnd_init_ptltrace(void)
{
+ cfs_waitq_init(&ptltrace_debug_ctlwq);
+ init_mutex(&ptltrace_mutex);
}
-
#endif
kptllnd_evtype2str(ev->type), ev->type, rxb,
kptllnd_errtype2str(ev->ni_fail_type),
ev->ni_fail_type, unlinked);
-
+ kptllnd_schedule_ptltrace_dump();
} else if (ev->type == PTL_EVENT_PUT_END &&
!rxbp->rxbp_shutdown) {
*kptllnd_tunables.kptl_portal, 0,
LNET_MSG_MATCHBITS, 0, 0);
- if (rc != PTL_OK)
+ if (rc != PTL_OK) {
CWARN("Can't NAK %s: put failed %s(%d)\n",
kptllnd_ptlid2str(rx->rx_initiator),
kptllnd_errtype2str(rc), rc);
+ kptllnd_schedule_ptltrace_dump();
+ }
}
void
ev->ni_fail_type, unlinked);
tx->tx_status = -EIO;
kptllnd_peer_close(peer, -EIO);
+ kptllnd_schedule_ptltrace_dump();
}
if (!unlinked)