Whamcloud - gitweb
* vibnal fixes
[fs/lustre-release.git] / lnet / klnds / ralnd / ralnd.c
index 35f436e..0da7af4 100644 (file)
@@ -22,6 +22,8 @@
  */
 #include "ranal.h"
 
  */
 #include "ranal.h"
 
+static int        kranal_devids[] = {RAPK_MAIN_DEVICE_ID,
+                                     RAPK_EXPANSION_DEVICE_ID};
 
 nal_t                   kranal_api;
 ptl_handle_ni_t         kranal_ni;
 
 nal_t                   kranal_api;
 ptl_handle_ni_t         kranal_ni;
@@ -533,15 +535,32 @@ int
 kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
                        __u32 peer_ip, int peer_port)
 {
 kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
                        __u32 peer_ip, int peer_port)
 {
-        RAP_RETURN    rrc;
+        kra_device_t  *dev = conn->rac_device;
+        unsigned long  flags;
+        RAP_RETURN     rrc;
+
+        /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive
+         * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */
+        conn->rac_last_tx = jiffies;
+        conn->rac_keepalive = 0;
+
+        /* Schedule conn on rad_new_conns */
+        kranal_conn_addref(conn);
+        spin_lock_irqsave(&dev->rad_lock, flags);
+        list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns);
+        wake_up(&dev->rad_waitq);
+        spin_unlock_irqrestore(&dev->rad_lock, flags);
 
         rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
         if (rrc != RAP_SUCCESS) {
                 CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
                        HIPQUAD(peer_ip), peer_port, rrc);
 
         rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
         if (rrc != RAP_SUCCESS) {
                 CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
                        HIPQUAD(peer_ip), peer_port, rrc);
-                return -EPROTO;
+                return -ECONNABORTED;
         }
 
         }
 
+        /* Scheduler doesn't touch conn apart from to deschedule and decref it
+         * after RapkCompleteSync() return success, so conn is all mine */
+
         conn->rac_peerstamp = connreq->racr_peerstamp;
         conn->rac_peer_connstamp = connreq->racr_connstamp;
         conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
         conn->rac_peerstamp = connreq->racr_peerstamp;
         conn->rac_peer_connstamp = connreq->racr_connstamp;
         conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
@@ -894,6 +913,9 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
         if (nstale != 0)
                 CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid);
 
         if (nstale != 0)
                 CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid);
 
+        CDEBUG(D_WARNING, "New connection to "LPX64" on devid[%d] = %d\n",
+               peer_nid, conn->rac_device->rad_idx, conn->rac_device->rad_id);
+
         /* Ensure conn gets checked.  Transmits may have been queued and an
          * FMA event may have happened before it got in the cq hash table */
         kranal_schedule_conn(conn);
         /* Ensure conn gets checked.  Transmits may have been queued and an
          * FMA event may have happened before it got in the cq hash table */
         kranal_schedule_conn(conn);
@@ -1823,13 +1845,19 @@ kranal_api_shutdown (nal_t *nal)
                 break;
         }
 
                 break;
         }
 
+        /* Conn/Peer state all cleaned up BEFORE setting shutdown, so threads
+         * don't have to worry about shutdown races */
+        LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
+        LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
+        
         /* flag threads to terminate; wake and wait for them to die */
         kranal_data.kra_shutdown = 1;
 
         for (i = 0; i < kranal_data.kra_ndevs; i++) {
                 kra_device_t *dev = &kranal_data.kra_devices[i];
 
         /* flag threads to terminate; wake and wait for them to die */
         kranal_data.kra_shutdown = 1;
 
         for (i = 0; i < kranal_data.kra_ndevs; i++) {
                 kra_device_t *dev = &kranal_data.kra_devices[i];
 
-                LASSERT (list_empty(&dev->rad_connq));
+                LASSERT (list_empty(&dev->rad_ready_conns));
+                LASSERT (list_empty(&dev->rad_new_conns));
 
                 spin_lock_irqsave(&dev->rad_lock, flags);
                 wake_up(&dev->rad_waitq);
 
                 spin_lock_irqsave(&dev->rad_lock, flags);
                 wake_up(&dev->rad_waitq);
@@ -1893,8 +1921,6 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                     ptl_ni_limits_t *requested_limits,
                     ptl_ni_limits_t *actual_limits)
 {
                     ptl_ni_limits_t *requested_limits,
                     ptl_ni_limits_t *actual_limits)
 {
-        static int        device_ids[] = {RAPK_MAIN_DEVICE_ID,
-                                          RAPK_EXPANSION_DEVICE_ID};
         struct timeval    tv;
         ptl_process_id_t  process_id;
         int               pkmem = atomic_read(&portal_kmemory);
         struct timeval    tv;
         ptl_process_id_t  process_id;
         int               pkmem = atomic_read(&portal_kmemory);
@@ -1935,7 +1961,8 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 kra_device_t  *dev = &kranal_data.kra_devices[i];
 
                 dev->rad_idx = i;
                 kra_device_t  *dev = &kranal_data.kra_devices[i];
 
                 dev->rad_idx = i;
-                INIT_LIST_HEAD(&dev->rad_connq);
+                INIT_LIST_HEAD(&dev->rad_ready_conns);
+                INIT_LIST_HEAD(&dev->rad_new_conns);
                 init_waitqueue_head(&dev->rad_waitq);
                 spin_lock_init(&dev->rad_lock);
         }
                 init_waitqueue_head(&dev->rad_waitq);
                 spin_lock_init(&dev->rad_lock);
         }
@@ -2012,14 +2039,25 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 }
         }
 
                 }
         }
 
-        LASSERT(kranal_data.kra_ndevs == 0);
-        for (i = 0; i < sizeof(device_ids)/sizeof(device_ids[0]); i++) {
+        LASSERT (kranal_data.kra_ndevs == 0);
+
+        for (i = 0; i < sizeof(kranal_devids)/sizeof(kranal_devids[0]); i++) {
+                LASSERT (i < RANAL_MAXDEVS);
+
                 dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
 
                 dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
 
-                rc = kranal_device_init(device_ids[i], dev);
+                rc = kranal_device_init(kranal_devids[i], dev);
                 if (rc == 0)
                         kranal_data.kra_ndevs++;
                 if (rc == 0)
                         kranal_data.kra_ndevs++;
-
+        }
+        
+        if (kranal_data.kra_ndevs == 0) {
+                CERROR("Can't initialise any RapidArray devices\n");
+                goto failed;
+        }
+        
+        for (i = 0; i < kranal_data.kra_ndevs; i++) {
+                dev = &kranal_data.kra_devices[i];
                 rc = kranal_thread_start(kranal_scheduler, dev);
                 if (rc != 0) {
                         CERROR("Can't spawn ranal scheduler[%d]: %d\n",
                 rc = kranal_thread_start(kranal_scheduler, dev);
                 if (rc != 0) {
                         CERROR("Can't spawn ranal scheduler[%d]: %d\n",
@@ -2028,9 +2066,6 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
                 }
         }
 
                 }
         }
 
-        if (kranal_data.kra_ndevs == 0)
-                goto failed;
-
         rc = libcfs_nal_cmd_register(RANAL, &kranal_cmd, NULL);
         if (rc != 0) {
                 CERROR("Can't initialise command interface (rc = %d)\n", rc);
         rc = libcfs_nal_cmd_register(RANAL, &kranal_cmd, NULL);
         if (rc != 0) {
                 CERROR("Can't initialise command interface (rc = %d)\n", rc);