Whamcloud - gitweb
b=16034,i=nic:
authorisaac <isaac>
Tue, 24 Feb 2009 03:36:18 +0000 (03:36 +0000)
committerisaac <isaac>
Tue, 24 Feb 2009 03:36:18 +0000 (03:36 +0000)
- Change ptllnd timeout and watchdog timers.

lnet/ChangeLog
lnet/klnds/ptllnd/ptllnd.c
lnet/klnds/ptllnd/ptllnd.h
lnet/klnds/ptllnd/ptllnd_cb.c
lnet/klnds/ptllnd/ptllnd_modparams.c
lnet/klnds/ptllnd/ptllnd_peer.c
lnet/klnds/ptllnd/ptllnd_ptltrace.c
lnet/klnds/ptllnd/ptllnd_rx_buf.c
lnet/klnds/ptllnd/ptllnd_tx.c

index e70bd23..7b1cf84 100644 (file)
@@ -18,6 +18,12 @@ Description:
 Details    :
 
 Severity   : normal
+Bugzilla   : 16034
+Description: Change ptllnd timeout and watchdog timers
+Details    : Add ptltrace_on_nal_failed and bump ptllnd timeout to match
+             Portals wire timeout.
+
+Severity   : normal
 Bugzilla   : 16186
 Description: One down Lustre FS hangs ALL mounted Lustre filesystems
 Details    : Shared routing enhancements - peer health detection.
index 760c8ba..b148fe5 100755 (executable)
@@ -658,6 +658,7 @@ kptllnd_startup (lnet_ni_t *ni)
          */
         rwlock_init(&kptllnd_data.kptl_peer_rw_lock);
         init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq);
+        atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0);
         INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers);
         INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers);
 
index 8b25d0f..96bae2d 100755 (executable)
@@ -117,6 +117,7 @@ typedef struct
         int             *kptl_ack_puts;         /* make portals ack PUTs */
 #ifdef CRAY_XT3
         int             *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */
+        int             *kptl_ptltrace_on_fail;    /* dump pltrace on PTL_NAL_FAILED? */
         char           **kptl_ptltrace_basename;  /* ptltrace dump file basename */
 #endif
 #ifdef PJK_DEBUGGING
@@ -282,6 +283,7 @@ struct kptl_data
         struct list_head        kptl_sched_rxbq;       /* rxb requiring reposting */
 
         wait_queue_head_t       kptl_watchdog_waitq;   /* watchdog sleeps here */
+        atomic_t                kptl_needs_ptltrace;   /* watchdog thread to dump ptltrace */
 
         kptl_rx_buffer_pool_t   kptl_rx_buffer_pool;   /* rx buffer pool */
         cfs_mem_cache_t*        kptl_rx_cache;         /* rx descripter cache */
@@ -336,6 +338,17 @@ kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
 #endif
 }
 
+static inline void
+kptllnd_schedule_ptltrace_dump (void)
+{
+#ifdef CRAY_XT3
+        if (*kptllnd_tunables.kptl_ptltrace_on_fail) {
+                atomic_inc(&kptllnd_data.kptl_needs_ptltrace);
+                wake_up(&kptllnd_data.kptl_watchdog_waitq);
+        }
+#endif
+}
+
 int  kptllnd_startup(lnet_ni_t *ni);
 void kptllnd_shutdown(lnet_ni_t *ni);
 int  kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
index d3227fb..8acf9d0 100644 (file)
@@ -301,6 +301,7 @@ kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
                 kptllnd_peer_close(peer, -EIO);
                 /* Everything (including this RDMA) queued on the peer will
                  * be completed with failure */
+                kptllnd_schedule_ptltrace_dump();
         }
 
         return 0;
@@ -687,6 +688,22 @@ kptllnd_watchdog(void *arg)
         /* threads shut down in phase 2 after all peers have been destroyed */
         while (kptllnd_data.kptl_shutdown < 2) {
 
+                /* add a check for needs ptltrace
+                 * yes, this is blatant hijacking of this thread
+                 * we can't dump directly from tx or rx _callbacks as it deadlocks portals
+                 * and takes out the node
+                */
+
+                if (atomic_read(&kptllnd_data.kptl_needs_ptltrace)) {
+#ifdef CRAY_XT3
+                        kptllnd_dump_ptltrace();
+                        /* we only dump once, no matter how many pending */
+                        atomic_set(&kptllnd_data.kptl_needs_ptltrace, 0);
+#else
+                        LBUG();
+#endif
+                }
+
                 timeout = (int)(deadline - jiffies);
 
                 if (timeout <= 0) {
index b63580d..bb54d32 100644 (file)
@@ -57,7 +57,8 @@ static int checksum = 0;
 CFS_MODULE_PARM(checksum, "i", int, 0644,
                 "set non-zero to enable message (not RDMA) checksums");
 
-static int timeout = 50;
+/* NB 250 is the Cray Portals wire timeout */
+static int timeout = 250;
 CFS_MODULE_PARM(timeout, "i", int, 0644,
                 "timeout (seconds)");
 
@@ -106,6 +107,10 @@ static int ptltrace_on_timeout = 0;
 CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644,
                 "dump ptltrace on timeout");
 
+static int ptltrace_on_fail = 1;
+CFS_MODULE_PARM(ptltrace_on_fail, "i", int, 0644,
+                "dump ptltrace on Portals failure");
+
 static char *ptltrace_basename = "/tmp/lnet-ptltrace";
 CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644,
                 "ptltrace dump file basename");
@@ -135,6 +140,7 @@ kptl_tunables_t kptllnd_tunables = {
         .kptl_ack_puts               = &ack_puts,
 #ifdef CRAY_XT3
         .kptl_ptltrace_on_timeout    = &ptltrace_on_timeout,
+        .kptl_ptltrace_on_fail       = &ptltrace_on_fail,
         .kptl_ptltrace_basename      = &ptltrace_basename,
 #endif
 #ifdef PJK_DEBUGGING
@@ -174,6 +180,7 @@ enum {
         KPTLLND_RESHEDULE_LOOPS,
         KPTLLND_ACK_PUTS,
         KPTLLND_TRACETIMEOUT,
+        KPTLLND_TRACEFAIL,
         KPTLLND_TRACEBASENAME,
         KPTLLND_SIMULATION_BITMAP
 };
@@ -194,6 +201,7 @@ enum {
 #define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
 #define KPTLLND_ACK_PUTS        CTL_UNNUMBERED
 #define KPTLLND_TRACETIMEOUT    CTL_UNNUMBERED
+#define KPTLLND_TRACEFAIL       CTL_UNNUMBERED
 #define KPTLLND_TRACEBASENAME   CTL_UNNUMBERED
 #define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED
 #endif
@@ -321,6 +329,14 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = {
                 .proc_handler = &proc_dointvec
         },
         {
+                .ctl_name = KPTLLND_TRACEFAIL,
+                .procname = "ptltrace_on_fail",
+                .data     = &ptltrace_on_fail,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        },
+        {
                 .ctl_name = KPTLLND_TRACEBASENAME,
                 .procname = "ptltrace_basename",
                 .data     = ptltrace_basename_space,
index 1a5c383..c92fe6c 100644 (file)
@@ -768,6 +768,7 @@ kptllnd_peer_check_sends (kptl_peer_t *peer)
         /* Nuke everything (including tx we were trying) */
         kptllnd_peer_close(peer, -EIO);
         kptllnd_tx_decref(tx);
+        kptllnd_schedule_ptltrace_dump();
 }
 
 kptl_tx_t *
@@ -892,7 +893,10 @@ kptllnd_peer_check_bucket (int idx, int stamp)
                        (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted),
                        *kptllnd_tunables.kptl_timeout);
 
-                kptllnd_dump_ptltrace();
+#ifdef CRAY_XT3
+                if (*kptllnd_tunables.kptl_ptltrace_on_timeout)
+                        kptllnd_dump_ptltrace();
+#endif
 
                 kptllnd_tx_decref(tx);
 
index ff98d5e..e85ab5a 100644 (file)
@@ -38,7 +38,7 @@
 
 #ifdef CRAY_XT3
 static struct semaphore   ptltrace_mutex;
-static struct semaphore   ptltrace_signal;
+static cfs_waitq_t        ptltrace_debug_ctlwq;
 
 void
 kptllnd_ptltrace_to_file(char *filename)
@@ -136,7 +136,7 @@ kptllnd_dump_ptltrace_thread(void *arg)
 {
         static char fname[1024];
 
-        libcfs_daemonize("ptltracedump");
+        libcfs_daemonize("kpt_ptltrace_dump");
 
         /* serialise with other instances of me */
         mutex_down(&ptltrace_mutex);
@@ -150,8 +150,7 @@ kptllnd_dump_ptltrace_thread(void *arg)
         mutex_up(&ptltrace_mutex);
 
         /* unblock my creator */
-        mutex_up(&ptltrace_signal);
-        
+        cfs_waitq_signal(&ptltrace_debug_ctlwq);
         return 0;
 }
 
@@ -159,9 +158,13 @@ void
 kptllnd_dump_ptltrace(void)
 {
         int            rc;     
+        cfs_waitlink_t wait;
+        ENTRY;
 
-        if (!*kptllnd_tunables.kptl_ptltrace_on_timeout)
-                return;
+        /* taken from libcfs_debug_dumplog */
+        cfs_waitlink_init(&wait);
+        set_current_state(TASK_INTERRUPTIBLE);
+        cfs_waitq_add(&ptltrace_debug_ctlwq, &wait);
 
         rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread,
                                (void *)(long)cfs_curproc_pid(),
@@ -169,28 +172,19 @@ kptllnd_dump_ptltrace(void)
         if (rc < 0) {
                 CERROR("Error %d starting ptltrace dump thread\n", rc);
         } else {
-                /* block until thread completes */
-                mutex_down(&ptltrace_signal);
+                cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
         }
-}
 
-void
-kptllnd_init_ptltrace(void)
-{
-        init_mutex(&ptltrace_mutex);
-        init_mutex_locked(&ptltrace_signal);
-}
-
-#else
-
-void
-kptllnd_dump_ptltrace(void)
-{
+        /* teardown if kernel_thread() failed */
+        cfs_waitq_del(&ptltrace_debug_ctlwq, &wait);
+        set_current_state(TASK_RUNNING);
+        EXIT;
 }
 
 void
 kptllnd_init_ptltrace(void)
 {
+        cfs_waitq_init(&ptltrace_debug_ctlwq);
+        init_mutex(&ptltrace_mutex);
 }
-
 #endif
index f7bf2d3..5c28881 100644 (file)
@@ -430,7 +430,7 @@ kptllnd_rx_buffer_callback (ptl_event_t *ev)
                        kptllnd_evtype2str(ev->type), ev->type, rxb,
                        kptllnd_errtype2str(ev->ni_fail_type),
                        ev->ni_fail_type, unlinked);
-
+                kptllnd_schedule_ptltrace_dump();
         } else if (ev->type == PTL_EVENT_PUT_END &&
                    !rxbp->rxbp_shutdown) {
 
@@ -532,10 +532,12 @@ kptllnd_nak (kptl_rx_t *rx)
                     *kptllnd_tunables.kptl_portal, 0,
                     LNET_MSG_MATCHBITS, 0, 0);
 
-        if (rc != PTL_OK)
+        if (rc != PTL_OK) {
                 CWARN("Can't NAK %s: put failed %s(%d)\n",
                       kptllnd_ptlid2str(rx->rx_initiator),
                       kptllnd_errtype2str(rc), rc);
+                kptllnd_schedule_ptltrace_dump();
+        }
 }
 
 void
index 4043f8f..96e350b 100644 (file)
@@ -492,6 +492,7 @@ kptllnd_tx_callback(ptl_event_t *ev)
                        ev->ni_fail_type, unlinked);
                 tx->tx_status = -EIO; 
                 kptllnd_peer_close(peer, -EIO);
+                kptllnd_schedule_ptltrace_dump();
         }
 
         if (!unlinked)