Whamcloud - gitweb
* 5602 fix improves checks that NID is set correctly and causes incorrect
[fs/lustre-release.git] / lnet / klnds / ralnd / ralnd_cb.c
index a0664be..ff080f4 100644 (file)
@@ -38,7 +38,7 @@ kranal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
 }
 
 void
-kranal_device_callback(RAP_INT32 devid)
+kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg)
 {
         kra_device_t *dev;
         int           i;
@@ -94,7 +94,7 @@ kranal_get_idle_tx (int may_block)
                 /* "normal" descriptor is free */
                 if (!list_empty(&kranal_data.kra_idle_txs)) {
                         tx = list_entry(kranal_data.kra_idle_txs.next,
-                                       kra_tx_t, tx_list);
+                                        kra_tx_t, tx_list);
                         break;
                 }
 
@@ -106,7 +106,7 @@ kranal_get_idle_tx (int may_block)
                         }
 
                         tx = list_entry(kranal_data.kra_idle_nblk_txs.next,
-                                       kra_tx_t, tx_list);
+                                        kra_tx_t, tx_list);
                         break;
                 }
 
@@ -114,7 +114,7 @@ kranal_get_idle_tx (int may_block)
                 spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
 
                 wait_event(kranal_data.kra_idle_tx_waitq,
-                          !list_empty(&kranal_data.kra_idle_txs));
+                           !list_empty(&kranal_data.kra_idle_txs));
         }
 
         if (tx != NULL) {
@@ -241,7 +241,6 @@ kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, ptl_kiov_t *kiov,
         tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset));
         
         phys->Address = kranal_page2phys(kiov->kiov_page);
-        phys->Length  = PAGE_SIZE;
         phys++;
 
         resid = nob - (kiov->kiov_len - offset);
@@ -256,18 +255,17 @@ kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, ptl_kiov_t *kiov,
                         /* Can't have gaps */
                         CERROR("Can't make payload contiguous in I/O VM:"
                                "page %d, offset %d, len %d \n", 
-                               phys - tx->tx_phys
+                               (int)(phys - tx->tx_phys)
                                kiov->kiov_offset, kiov->kiov_len);                        
                         return -EINVAL;
                 }
 
                 if ((phys - tx->tx_phys) == PTL_MD_MAX_IOV) {
-                        CERROR ("payload too big (%d)\n", phys - tx->tx_phys);
+                        CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys));
                         return -EMSGSIZE;
                 }
 
                 phys->Address = kranal_page2phys(kiov->kiov_page);
-                phys->Length  = PAGE_SIZE;
                 phys++;
 
                 resid -= PAGE_SIZE;
@@ -312,7 +310,7 @@ kranal_map_buffer (kra_tx_t *tx)
         case RANAL_BUF_PHYS_UNMAPPED:
                 rrc = RapkRegisterPhys(dev->rad_handle,
                                        tx->tx_phys, tx->tx_phys_npages,
-                                       dev->rad_ptag, &tx->tx_map_key);
+                                       &tx->tx_map_key);
                 LASSERT (rrc == RAP_SUCCESS);
                 tx->tx_buftype = RANAL_BUF_PHYS_MAPPED;
                 break;
@@ -320,7 +318,7 @@ kranal_map_buffer (kra_tx_t *tx)
         case RANAL_BUF_VIRT_UNMAPPED:
                 rrc = RapkRegisterMemory(dev->rad_handle,
                                          tx->tx_buffer, tx->tx_nob,
-                                         dev->rad_ptag, &tx->tx_map_key);
+                                         &tx->tx_map_key);
                 LASSERT (rrc == RAP_SUCCESS);
                 tx->tx_buftype = RANAL_BUF_VIRT_MAPPED;
                 break;
@@ -348,7 +346,7 @@ kranal_unmap_buffer (kra_tx_t *tx)
                 dev = tx->tx_conn->rac_device;
                 LASSERT (current == dev->rad_scheduler);
                 rrc = RapkDeregisterMemory(dev->rad_handle, NULL,
-                                           dev->rad_ptag, &tx->tx_map_key);
+                                           &tx->tx_map_key);
                 LASSERT (rrc == RAP_SUCCESS);
                 tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
                 break;
@@ -358,7 +356,7 @@ kranal_unmap_buffer (kra_tx_t *tx)
                 dev = tx->tx_conn->rac_device;
                 LASSERT (current == dev->rad_scheduler);
                 rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer,
-                                           dev->rad_ptag, &tx->tx_map_key);
+                                           &tx->tx_map_key);
                 LASSERT (rrc == RAP_SUCCESS);
                 tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
                 break;
@@ -483,7 +481,7 @@ kranal_launch_tx (kra_tx_t *tx, ptl_nid_t nid)
         if (!peer->rap_connecting) {
                 LASSERT (list_empty(&peer->rap_tx_queue));
                 
-                now = CURRENT_TIME;
+                now = CURRENT_SECONDS;
                 if (now < peer->rap_reconnect_time) {
                         write_unlock_irqrestore(g_lock, flags);
                         kranal_tx_done(tx, -EHOSTUNREACH);
@@ -496,7 +494,7 @@ kranal_launch_tx (kra_tx_t *tx, ptl_nid_t nid)
                 spin_lock(&kranal_data.kra_connd_lock);
         
                 list_add_tail(&peer->rap_connd_list,
-                             &kranal_data.kra_connd_peers);
+                              &kranal_data.kra_connd_peers);
                 wake_up(&kranal_data.kra_connd_waitq);
         
                 spin_unlock(&kranal_data.kra_connd_lock);
@@ -559,8 +557,8 @@ kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob)
 
         LASSERT (conn->rac_rxmsg != NULL);
 
-        rrc = RapkFmaCopyToUser(conn->rac_rihandle, buffer,
-                                &nob_received, sizeof(kra_msg_t));
+        rrc = RapkFmaCopyOut(conn->rac_rihandle, buffer,
+                             &nob_received, sizeof(kra_msg_t));
         LASSERT (rrc == RAP_SUCCESS);
 
         conn->rac_rxmsg = NULL;
@@ -585,8 +583,8 @@ kranal_do_send (lib_nal_t    *nal,
                 unsigned int  niov, 
                 struct iovec *iov, 
                 ptl_kiov_t   *kiov,
-                size_t        offset,
-                size_t        nob)
+                int           offset,
+                int           nob)
 {
         kra_conn_t *conn;
         kra_tx_t   *tx;
@@ -594,8 +592,8 @@ kranal_do_send (lib_nal_t    *nal,
 
         /* NB 'private' is different depending on what we're sending.... */
 
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-               " pid %d\n", nob, niov, nid , pid);
+        CDEBUG(D_NET, "sending %d bytes in %d frags to nid:"LPX64" pid %d\n",
+               nob, niov, nid, pid);
 
         LASSERT (nob == 0 || niov > 0);
         LASSERT (niov <= PTL_MD_MAX_IOV);
@@ -625,7 +623,7 @@ kranal_do_send (lib_nal_t    *nal,
                 /* Incoming message consistent with immediate reply? */
                 if (conn->rac_rxmsg->ram_type != RANAL_MSG_GET_REQ) {
                         CERROR("REPLY to "LPX64" bad msg type %x!!!\n",
-                              nid, conn->rac_rxmsg->ram_type);
+                               nid, conn->rac_rxmsg->ram_type);
                         return PTL_FAIL;
                 }
 
@@ -744,32 +742,32 @@ kranal_do_send (lib_nal_t    *nal,
 
 ptl_err_t
 kranal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-            ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-            unsigned int niov, struct iovec *iov,
-            size_t offset, size_t len)
+             ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+             unsigned int niov, struct iovec *iov,
+             size_t offset, size_t len)
 {
         return kranal_do_send(nal, private, cookie,
-                             hdr, type, nid, pid,
-                             niov, iov, NULL,
-                             offset, len);
+                              hdr, type, nid, pid,
+                              niov, iov, NULL,
+                              offset, len);
 }
 
 ptl_err_t
 kranal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
-                  ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                  unsigned int niov, ptl_kiov_t *kiov, 
-                  size_t offset, size_t len)
+                   ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+                   unsigned int niov, ptl_kiov_t *kiov, 
+                   size_t offset, size_t len)
 {
         return kranal_do_send(nal, private, cookie,
-                             hdr, type, nid, pid,
-                             niov, NULL, kiov,
-                             offset, len);
+                              hdr, type, nid, pid,
+                              niov, NULL, kiov,
+                              offset, len);
 }
 
 ptl_err_t
-kranal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
-               unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov,
-               size_t offset, size_t mlen, size_t rlen)
+kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
+                unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov,
+                int offset, int mlen, int rlen)
 {
         kra_conn_t  *conn = private;
         kra_msg_t   *rxmsg = conn->rac_rxmsg;
@@ -859,20 +857,20 @@ kranal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
 
 ptl_err_t
 kranal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-            unsigned int niov, struct iovec *iov, 
-            size_t offset, size_t mlen, size_t rlen)
+             unsigned int niov, struct iovec *iov, 
+             size_t offset, size_t mlen, size_t rlen)
 {
-        return kranal_recvmsg(nal, private, msg, niov, iov, NULL,
-                             offset, mlen, rlen);
+        return kranal_do_recv(nal, private, msg, niov, iov, NULL,
+                              offset, mlen, rlen);
 }
 
 ptl_err_t
 kranal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                  unsigned int niov, ptl_kiov_t *kiov, 
-                  size_t offset, size_t mlen, size_t rlen)
+                   unsigned int niov, ptl_kiov_t *kiov, 
+                   size_t offset, size_t mlen, size_t rlen)
 {
-        return kranal_recvmsg(nal, private, msg, niov, NULL, kiov,
-                             offset, mlen, rlen);
+        return kranal_do_recv(nal, private, msg, niov, NULL, kiov,
+                              offset, mlen, rlen);
 }
 
 int
@@ -1027,12 +1025,14 @@ kranal_reaper_check (int idx, unsigned long *min_timeoutp)
 int
 kranal_connd (void *arg)
 {
-       char               name[16];
+        char               name[16];
         wait_queue_t       wait;
         unsigned long      flags;
         kra_peer_t        *peer;
+        kra_acceptsock_t  *ras;
+        int                did_something;
 
-       snprintf(name, sizeof(name), "kranal_connd_%02ld", (long)arg);
+        snprintf(name, sizeof(name), "kranal_connd_%02ld", (long)arg);
         kportal_daemonize(name);
         kportal_blockallsigs();
 
@@ -1041,11 +1041,24 @@ kranal_connd (void *arg)
         spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
 
         while (!kranal_data.kra_shutdown) {
-                /* Safe: kra_shutdown only set when quiescent */
+                did_something = 0;
+                
+                if (!list_empty(&kranal_data.kra_connd_acceptq)) {
+                        ras = list_entry(kranal_data.kra_connd_acceptq.next,
+                                         kra_acceptsock_t, ras_list);
+                        list_del(&ras->ras_list);
+                        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
 
+                        kranal_conn_handshake(ras->ras_sock, NULL);
+                        kranal_free_acceptsock(ras);
+
+                        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
+                        did_something = 1;
+                }
+                
                 if (!list_empty(&kranal_data.kra_connd_peers)) {
                         peer = list_entry(kranal_data.kra_connd_peers.next,
-                                         kra_peer_t, rap_connd_list);
+                                          kra_peer_t, rap_connd_list);
                         
                         list_del_init(&peer->rap_connd_list);
                         spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
@@ -1054,9 +1067,12 @@ kranal_connd (void *arg)
                         kranal_peer_decref(peer);
 
                         spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-                       continue;
+                        did_something = 1;
                 }
 
+                if (did_something)
+                        continue;
+
                 set_current_state(TASK_INTERRUPTIBLE);
                 add_wait_queue(&kranal_data.kra_connd_waitq, &wait);
                 
@@ -1155,7 +1171,7 @@ kranal_reaper (void *arg)
                         /* Compute how many table entries to check now so I
                          * get round the whole table fast enough (NB I do
                          * this at fixed intervals of 'p' seconds) */
-                       chunk = conn_entries;
+                        chunk = conn_entries;
                         if (min_timeout > n * p)
                                 chunk = (chunk * n * p) / min_timeout;
                         if (chunk == 0)
@@ -1220,7 +1236,7 @@ kranal_check_rdma_cq (kra_device_t *dev)
         __u32                event_type;
 
         for (;;) {
-                rrc = RapkCQDone(dev->rad_rdma_cq, &cqid, &event_type);
+                rrc = RapkCQDone(dev->rad_rdma_cqh, &cqid, &event_type);
                 if (rrc == RAP_NOT_DONE)
                         return;
 
@@ -1275,7 +1291,7 @@ kranal_check_fma_cq (kra_device_t *dev)
         int                 i;
 
         for (;;) {
-                rrc = RapkCQDone(dev->rad_fma_cq, &cqid, &event_type);
+                rrc = RapkCQDone(dev->rad_fma_cqh, &cqid, &event_type);
                 if (rrc != RAP_NOT_DONE)
                         return;
                 
@@ -1366,8 +1382,8 @@ kranal_process_fmaq (kra_conn_t *conn)
         int           expect_reply;
 
         /* NB 1. kranal_sendmsg() may fail if I'm out of credits right now.
-         *       However I will be rescheduled some by a rad_fma_cq event when
-         *       I eventually get some.
+         *       However I will be rescheduled some by an FMA completion event
+         *       when I eventually get some.
          * NB 2. Sampling rac_state here, races with setting it elsewhere
          *       kranal_close_conn_locked.  But it doesn't matter if I try to
          *       send a "real" message just as I start closing because I'll get
@@ -1780,14 +1796,14 @@ kranal_scheduler (void *arg)
         while (!kranal_data.kra_shutdown) {
                 /* Safe: kra_shutdown only set when quiescent */
                 
-               if (busy_loops++ >= RANAL_RESCHED) {
+                if (busy_loops++ >= RANAL_RESCHED) {
                         spin_unlock_irqrestore(&dev->rad_lock, flags);
 
                         our_cond_resched();
-                       busy_loops = 0;
+                        busy_loops = 0;
 
                         spin_lock_irqsave(&dev->rad_lock, flags);
-               }
+                }
 
                 if (dev->rad_ready) {
                         /* Device callback fired since I last checked it */
@@ -1799,7 +1815,7 @@ kranal_scheduler (void *arg)
 
                         spin_lock_irqsave(&dev->rad_lock, flags);
                 }
-               
+                
                 if (!list_empty(&dev->rad_connq)) {
                         /* Connection needs attention */
                         conn = list_entry(dev->rad_connq.next,