Whamcloud - gitweb
* ghastly hack to serialise RapkSetRiParams with comms scheduler thread
authoreeb <eeb>
Fri, 11 Feb 2005 20:22:44 +0000 (20:22 +0000)
committereeb <eeb>
Fri, 11 Feb 2005 20:22:44 +0000 (20:22 +0000)
*   Ranal "working" @ 1.2GBytes/sec single channel, 2.4GBytes/sec double

lnet/klnds/ralnd/ralnd.c
lnet/klnds/ralnd/ralnd.h
lnet/klnds/ralnd/ralnd_cb.c

index 35f436e..014b4c6 100644 (file)
@@ -22,6 +22,8 @@
  */
 #include "ranal.h"
 
+static int        kranal_devids[] = {RAPK_MAIN_DEVICE_ID,
+                                     RAPK_EXPANSION_DEVICE_ID};
 
 nal_t                   kranal_api;
 ptl_handle_ni_t         kranal_ni;
@@ -533,7 +535,17 @@ int
 kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
                        __u32 peer_ip, int peer_port)
 {
-        RAP_RETURN    rrc;
+        kra_device_t  *dev = conn->rac_device;
+        unsigned long  flags;
+        RAP_RETURN     rrc;
+
+        /* tell scheduler to release the setri_mutex... */
+        spin_lock_irqsave(&dev->rad_lock, flags);
+        dev->rad_setri_please++;
+        wake_up(&dev->rad_waitq);
+        spin_unlock_irqrestore(&dev->rad_lock, flags);
+        /* ...and grab it */
+        down(&dev->rad_setri_mutex);
 
         rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
         if (rrc != RAP_SUCCESS) {
@@ -542,6 +554,14 @@ kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
                 return -EPROTO;
         }
 
+        /* release the setri_mutex... */
+        up(&dev->rad_setri_mutex);
+        /* ...and tell scheduler we're all done */
+        spin_lock_irqsave(&dev->rad_lock, flags);
+        dev->rad_setri_please--;
+        wake_up(&dev->rad_waitq);
+        spin_unlock_irqrestore(&dev->rad_lock, flags);
+        
         conn->rac_peerstamp = connreq->racr_peerstamp;
         conn->rac_peer_connstamp = connreq->racr_connstamp;
         conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
@@ -894,6 +914,9 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
         if (nstale != 0)
                 CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid);
 
+        CDEBUG(D_WARNING, "New connection to "LPX64" on devid[%d] = %d\n",
+               peer_nid, conn->rac_device->rad_idx, conn->rac_device->rad_id);
+
         /* Ensure conn gets checked.  Transmits may have been queued and an
          * FMA event may have happened before it got in the cq hash table */
         kranal_schedule_conn(conn);
@@ -1720,6 +1743,11 @@ kranal_device_init(int id, kra_device_t *dev)
         const int         total_ntx = RANAL_NTX + RANAL_NTX_NBLK;
         RAP_RETURN        rrc;
 
+        /* The awful serialise RapkSetRiParams with the device scheduler
+         * work-around! */
+        dev->rad_setri_please = 0;
+        init_MUTEX(&dev->rad_setri_mutex);
+
         dev->rad_id = id;
         rrc = RapkGetDeviceByIndex(id, kranal_device_callback,
                                    &dev->rad_handle);
@@ -1893,8 +1921,6 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                     ptl_ni_limits_t *requested_limits,
                     ptl_ni_limits_t *actual_limits)
 {
-        static int        device_ids[] = {RAPK_MAIN_DEVICE_ID,
-                                          RAPK_EXPANSION_DEVICE_ID};
         struct timeval    tv;
         ptl_process_id_t  process_id;
         int               pkmem = atomic_read(&portal_kmemory);
@@ -2012,11 +2038,14 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 }
         }
 
-        LASSERT(kranal_data.kra_ndevs == 0);
-        for (i = 0; i < sizeof(device_ids)/sizeof(device_ids[0]); i++) {
+        LASSERT (kranal_data.kra_ndevs == 0);
+
+        for (i = 0; i < sizeof(kranal_devids)/sizeof(kranal_devids[0]); i++) {
+                LASSERT (i < RANAL_MAXDEVS);
+
                 dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
 
-                rc = kranal_device_init(device_ids[i], dev);
+                rc = kranal_device_init(kranal_devids[i], dev);
                 if (rc == 0)
                         kranal_data.kra_ndevs++;
 
index 0843058..5cb1640 100644 (file)
@@ -111,10 +111,11 @@ typedef struct
         int                     rad_idx;        /* index in kra_devices */
         int                     rad_ready;      /* set by device callback */
         struct list_head        rad_connq;      /* connections requiring attention */
-        struct list_head        rad_zombies;    /* connections to free */
         wait_queue_head_t       rad_waitq;      /* scheduler waits here */
         spinlock_t              rad_lock;       /* serialise */
         void                   *rad_scheduler;  /* scheduling thread */
+        int                     rad_setri_please; /* ++ when connd wants to setri */
+        struct semaphore        rad_setri_mutex; /* serialise setri */
 } kra_device_t;
 
 typedef struct
index 38f1b77..fc5ed3f 100644 (file)
@@ -1397,6 +1397,10 @@ kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
                 return 0;
 
         case RAP_NOT_DONE:
+                if (time_after_eq(jiffies,
+                                  conn->rac_last_tx + conn->rac_keepalive*HZ))
+                        CDEBUG(D_WARNING, "EAGAIN sending %02x (idle %lu secs)\n",
+                               msg->ram_type, (jiffies - conn->rac_last_tx)/HZ);
                 return -EAGAIN;
         }
 }
@@ -1466,7 +1470,9 @@ kranal_process_fmaq (kra_conn_t *conn)
 
                 if (time_after_eq(jiffies,
                                   conn->rac_last_tx + conn->rac_keepalive * HZ)) {
-                        CDEBUG(D_NET, "sending NOOP (idle)\n");
+                        CDEBUG(D_NET, "sending NOOP -> "LPX64" (%p idle %lu(%ld))\n",
+                               conn->rac_peer->rap_nid, conn,
+                               (jiffies - conn->rac_last_tx)/HZ, conn->rac_keepalive);
                         kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
                         kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
                 }
@@ -1829,12 +1835,14 @@ void
 kranal_complete_closed_conn (kra_conn_t *conn)
 {
         kra_tx_t   *tx;
+        int         nfma;
+        int         nreplies;
 
         LASSERT (conn->rac_state == RANAL_CONN_CLOSED);
         LASSERT (list_empty(&conn->rac_list));
         LASSERT (list_empty(&conn->rac_hashlist));
 
-        while (!list_empty(&conn->rac_fmaq)) {
+        for (nfma = 0; !list_empty(&conn->rac_fmaq); nfma++) {
                 tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
 
                 list_del(&tx->tx_list);
@@ -1843,12 +1851,15 @@ kranal_complete_closed_conn (kra_conn_t *conn)
 
         LASSERT (list_empty(&conn->rac_rdmaq));
 
-        while (!list_empty(&conn->rac_replyq)) {
+        for (nreplies = 0; !list_empty(&conn->rac_replyq); nreplies++) {
                 tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list);
 
                 list_del(&tx->tx_list);
                 kranal_tx_done(tx, -ECONNABORTED);
         }
+
+        CDEBUG(D_WARNING, "Closed conn %p -> "LPX64": nmsg %d nreplies %d\n",
+               conn, conn->rac_peer->rap_nid, nfma, nreplies);
 }
 
 int
@@ -1868,6 +1879,9 @@ kranal_scheduler (void *arg)
         dev->rad_scheduler = current;
         init_waitqueue_entry(&wait, current);
 
+        /* prevent connd from doing setri until requested */
+        down(&dev->rad_setri_mutex);
+
         spin_lock_irqsave(&dev->rad_lock, flags);
 
         while (!kranal_data.kra_shutdown) {
@@ -1882,6 +1896,19 @@ kranal_scheduler (void *arg)
                         spin_lock_irqsave(&dev->rad_lock, flags);
                 }
 
+                /* Ghastly hack to ensure RapkSetRiParams() serialises with
+                 * other comms */
+                if (dev->rad_setri_please != 0) {
+                        spin_unlock_irqrestore(&dev->rad_lock, flags);
+                        up(&dev->rad_setri_mutex);
+                        
+                        wait_event_interruptible(dev->rad_waitq,
+                                                 dev->rad_setri_please == 0);
+                        
+                        down(&dev->rad_setri_mutex);
+                        spin_lock_irqsave(&dev->rad_lock, flags);
+                }
+                
                 if (dev->rad_ready) {
                         /* Device callback fired since I last checked it */
                         dev->rad_ready = 0;
@@ -1933,6 +1960,7 @@ kranal_scheduler (void *arg)
         }
 
         spin_unlock_irqrestore(&dev->rad_lock, flags);
+        up(&dev->rad_setri_mutex);
 
         dev->rad_scheduler = NULL;
         kranal_thread_fini();