Whamcloud - gitweb
LU-4360 Fix use after free in ksocknal_send
[fs/lustre-release.git] / lnet / klnds / ralnd / ralnd.c
index a7b22ea..a6d61e2 100644 (file)
@@ -26,6 +26,8 @@
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -292,17 +294,17 @@ kranal_set_conn_uniqueness (kra_conn_t *conn)
 int
 kranal_create_conn(kra_conn_t **connp, kra_device_t *dev)
 {
-        kra_conn_t    *conn;
-        RAP_RETURN     rrc;
+       kra_conn_t    *conn;
+       RAP_RETURN     rrc;
 
-        LASSERT (!cfs_in_interrupt());
-        LIBCFS_ALLOC(conn, sizeof(*conn));
+       LASSERT (!in_interrupt());
+       LIBCFS_ALLOC(conn, sizeof(*conn));
 
-        if (conn == NULL)
-                return -ENOMEM;
+       if (conn == NULL)
+               return -ENOMEM;
 
         memset(conn, 0, sizeof(*conn));
-        cfs_atomic_set(&conn->rac_refcount, 1);
+       atomic_set(&conn->rac_refcount, 1);
         CFS_INIT_LIST_HEAD(&conn->rac_list);
         CFS_INIT_LIST_HEAD(&conn->rac_hashlist);
         CFS_INIT_LIST_HEAD(&conn->rac_schedlist);
@@ -325,7 +327,7 @@ kranal_create_conn(kra_conn_t **connp, kra_device_t *dev)
                 return -ENETDOWN;
         }
 
-        cfs_atomic_inc(&kranal_data.kra_nconns);
+       atomic_inc(&kranal_data.kra_nconns);
         *connp = conn;
         return 0;
 }
@@ -333,81 +335,81 @@ kranal_create_conn(kra_conn_t **connp, kra_device_t *dev)
 void
 kranal_destroy_conn(kra_conn_t *conn)
 {
-        RAP_RETURN         rrc;
-
-        LASSERT (!cfs_in_interrupt());
-        LASSERT (!conn->rac_scheduled);
-        LASSERT (cfs_list_empty(&conn->rac_list));
-        LASSERT (cfs_list_empty(&conn->rac_hashlist));
-        LASSERT (cfs_list_empty(&conn->rac_schedlist));
-        LASSERT (cfs_atomic_read(&conn->rac_refcount) == 0);
-        LASSERT (cfs_list_empty(&conn->rac_fmaq));
-        LASSERT (cfs_list_empty(&conn->rac_rdmaq));
-        LASSERT (cfs_list_empty(&conn->rac_replyq));
-
-        rrc = RapkDestroyRi(conn->rac_device->rad_handle,
-                            conn->rac_rihandle);
-        LASSERT (rrc == RAP_SUCCESS);
-
-        if (conn->rac_peer != NULL)
-                kranal_peer_decref(conn->rac_peer);
-
-        LIBCFS_FREE(conn, sizeof(*conn));
-        cfs_atomic_dec(&kranal_data.kra_nconns);
+       RAP_RETURN         rrc;
+
+       LASSERT (!in_interrupt());
+       LASSERT (!conn->rac_scheduled);
+       LASSERT (cfs_list_empty(&conn->rac_list));
+       LASSERT (cfs_list_empty(&conn->rac_hashlist));
+       LASSERT (cfs_list_empty(&conn->rac_schedlist));
+       LASSERT (atomic_read(&conn->rac_refcount) == 0);
+       LASSERT (cfs_list_empty(&conn->rac_fmaq));
+       LASSERT (cfs_list_empty(&conn->rac_rdmaq));
+       LASSERT (cfs_list_empty(&conn->rac_replyq));
+
+       rrc = RapkDestroyRi(conn->rac_device->rad_handle,
+                           conn->rac_rihandle);
+       LASSERT (rrc == RAP_SUCCESS);
+
+       if (conn->rac_peer != NULL)
+               kranal_peer_decref(conn->rac_peer);
+
+       LIBCFS_FREE(conn, sizeof(*conn));
+       atomic_dec(&kranal_data.kra_nconns);
 }
 
 void
 kranal_terminate_conn_locked (kra_conn_t *conn)
 {
-        LASSERT (!cfs_in_interrupt());
-        LASSERT (conn->rac_state == RANAL_CONN_CLOSING);
-        LASSERT (!cfs_list_empty(&conn->rac_hashlist));
-        LASSERT (cfs_list_empty(&conn->rac_list));
+       LASSERT (!in_interrupt());
+       LASSERT (conn->rac_state == RANAL_CONN_CLOSING);
+       LASSERT (!cfs_list_empty(&conn->rac_hashlist));
+       LASSERT (cfs_list_empty(&conn->rac_list));
 
-        /* Remove from conn hash table: no new callbacks */
-        cfs_list_del_init(&conn->rac_hashlist);
-        kranal_conn_decref(conn);
+       /* Remove from conn hash table: no new callbacks */
+       cfs_list_del_init(&conn->rac_hashlist);
+       kranal_conn_decref(conn);
 
-        conn->rac_state = RANAL_CONN_CLOSED;
+       conn->rac_state = RANAL_CONN_CLOSED;
 
-        /* schedule to clear out all uncompleted comms in context of dev's
-         * scheduler */
-        kranal_schedule_conn(conn);
+       /* schedule to clear out all uncompleted comms in context of dev's
+        * scheduler */
+       kranal_schedule_conn(conn);
 }
 
 void
 kranal_close_conn_locked (kra_conn_t *conn, int error)
 {
-        kra_peer_t        *peer = conn->rac_peer;
+       kra_peer_t        *peer = conn->rac_peer;
 
-        CDEBUG_LIMIT(error == 0 ? D_NET : D_NETERROR,
-                     "closing conn to %s: error %d\n",
-                     libcfs_nid2str(peer->rap_nid), error);
+       CDEBUG_LIMIT(error == 0 ? D_NET : D_NETERROR,
+                    "closing conn to %s: error %d\n",
+                    libcfs_nid2str(peer->rap_nid), error);
 
-        LASSERT (!cfs_in_interrupt());
-        LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED);
-        LASSERT (!cfs_list_empty(&conn->rac_hashlist));
-        LASSERT (!cfs_list_empty(&conn->rac_list));
+       LASSERT (!in_interrupt());
+       LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED);
+       LASSERT (!cfs_list_empty(&conn->rac_hashlist));
+       LASSERT (!cfs_list_empty(&conn->rac_list));
 
-        cfs_list_del_init(&conn->rac_list);
+       cfs_list_del_init(&conn->rac_list);
 
-        if (cfs_list_empty(&peer->rap_conns) &&
-            peer->rap_persistence == 0) {
-                /* Non-persistent peer with no more conns... */
-                kranal_unlink_peer_locked(peer);
-        }
+       if (cfs_list_empty(&peer->rap_conns) &&
+           peer->rap_persistence == 0) {
+               /* Non-persistent peer with no more conns... */
+               kranal_unlink_peer_locked(peer);
+       }
 
-        /* Reset RX timeout to ensure we wait for an incoming CLOSE for the
-         * full timeout.  If we get a CLOSE we know the peer has stopped all
-         * RDMA.  Otherwise if we wait for the full timeout we can also be sure
-         * all RDMA has stopped. */
-        conn->rac_last_rx = jiffies;
-        cfs_mb();
+       /* Reset RX timeout to ensure we wait for an incoming CLOSE for the
+        * full timeout.  If we get a CLOSE we know the peer has stopped all
+        * RDMA.  Otherwise if we wait for the full timeout we can also be sure
+        * all RDMA has stopped. */
+       conn->rac_last_rx = jiffies;
+       smp_mb();
 
-        conn->rac_state = RANAL_CONN_CLOSING;
-        kranal_schedule_conn(conn);             /* schedule sending CLOSE */
+       conn->rac_state = RANAL_CONN_CLOSING;
+       kranal_schedule_conn(conn);             /* schedule sending CLOSE */
 
-        kranal_conn_decref(conn);               /* lose peer's ref */
+       kranal_conn_decref(conn);               /* lose peer's ref */
 }
 
 void
@@ -428,44 +430,44 @@ int
 kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
                        __u32 peer_ip, int peer_port)
 {
-        kra_device_t  *dev = conn->rac_device;
-        unsigned long  flags;
-        RAP_RETURN     rrc;
-
-        /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive
-         * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */
-        conn->rac_last_tx = jiffies;
-        conn->rac_keepalive = 0;
-
-        rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer_ip), peer_port, rrc);
-                return -ECONNABORTED;
-        }
-
-        /* Schedule conn on rad_new_conns */
-        kranal_conn_addref(conn);
+       kra_device_t  *dev = conn->rac_device;
+       unsigned long  flags;
+       RAP_RETURN     rrc;
+
+       /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive
+        * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */
+       conn->rac_last_tx = jiffies;
+       conn->rac_keepalive = 0;
+
+       rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
+       if (rrc != RAP_SUCCESS) {
+               CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
+                      HIPQUAD(peer_ip), peer_port, rrc);
+               return -ECONNABORTED;
+       }
+
+       /* Schedule conn on rad_new_conns */
+       kranal_conn_addref(conn);
        spin_lock_irqsave(&dev->rad_lock, flags);
-        cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns);
-        cfs_waitq_signal(&dev->rad_waitq);
+       cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns);
+       wake_up(&dev->rad_waitq);
        spin_unlock_irqrestore(&dev->rad_lock, flags);
 
-        rrc = RapkWaitToConnect(conn->rac_rihandle);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer_ip), peer_port, rrc);
-                return -ECONNABORTED;
-        }
-
-        /* Scheduler doesn't touch conn apart from to deschedule and decref it
-         * after RapkCompleteSync() return success, so conn is all mine */
-
-        conn->rac_peerstamp = connreq->racr_peerstamp;
-        conn->rac_peer_connstamp = connreq->racr_connstamp;
-        conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
-        kranal_update_reaper_timeout(conn->rac_keepalive);
-        return 0;
+       rrc = RapkWaitToConnect(conn->rac_rihandle);
+       if (rrc != RAP_SUCCESS) {
+               CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n",
+                      HIPQUAD(peer_ip), peer_port, rrc);
+               return -ECONNABORTED;
+       }
+
+       /* Scheduler doesn't touch conn apart from to deschedule and decref it
+        * after RapkCompleteSync() return success, so conn is all mine */
+
+       conn->rac_peerstamp = connreq->racr_peerstamp;
+       conn->rac_peer_connstamp = connreq->racr_connstamp;
+       conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
+       kranal_update_reaper_timeout(conn->rac_keepalive);
+       return 0;
 }
 
 int
@@ -836,8 +838,7 @@ kranal_connect (kra_peer_t *peer)
                 MIN(peer->rap_reconnect_interval,
                     *kranal_tunables.kra_max_reconnect_interval);
 
-        peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval *
-                CFS_HZ;
+       peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval * HZ;
 
         /* Grab all blocked packets while we have the global lock */
         cfs_list_add(&zombies, &peer->rap_tx_queue);
@@ -870,31 +871,31 @@ kranal_free_acceptsock (kra_acceptsock_t *ras)
 int
 kranal_accept (lnet_ni_t *ni, struct socket *sock)
 {
-        kra_acceptsock_t  *ras;
-        int                rc;
-        __u32              peer_ip;
-        int                peer_port;
-        unsigned long      flags;
+       kra_acceptsock_t  *ras;
+       int                rc;
+       __u32              peer_ip;
+       int                peer_port;
+       unsigned long      flags;
 
-        rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
-        LASSERT (rc == 0);                      /* we succeeded before */
+       rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
+       LASSERT (rc == 0);                      /* we succeeded before */
 
-        LIBCFS_ALLOC(ras, sizeof(*ras));
-        if (ras == NULL) {
-                CERROR("ENOMEM allocating connection request from "
-                       "%u.%u.%u.%u\n", HIPQUAD(peer_ip));
-                return -ENOMEM;
-        }
+       LIBCFS_ALLOC(ras, sizeof(*ras));
+       if (ras == NULL) {
+               CERROR("ENOMEM allocating connection request from "
+                      "%u.%u.%u.%u\n", HIPQUAD(peer_ip));
+               return -ENOMEM;
+       }
 
-        ras->ras_sock = sock;
+       ras->ras_sock = sock;
 
        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
 
-        cfs_list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq);
-        cfs_waitq_signal(&kranal_data.kra_connd_waitq);
+       cfs_list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq);
+       wake_up(&kranal_data.kra_connd_waitq);
 
        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-        return 0;
+       return 0;
 }
 
 int
@@ -912,7 +913,7 @@ kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid)
         memset(peer, 0, sizeof(*peer));         /* zero flags etc */
 
         peer->rap_nid = nid;
-        cfs_atomic_set(&peer->rap_refcount, 1);     /* 1 ref for caller */
+       atomic_set(&peer->rap_refcount, 1);     /* 1 ref for caller */
 
         CFS_INIT_LIST_HEAD(&peer->rap_list);
         CFS_INIT_LIST_HEAD(&peer->rap_connd_list);
@@ -933,7 +934,7 @@ kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid)
                 return -ESHUTDOWN;
         }
 
-        cfs_atomic_inc(&kranal_data.kra_npeers);
+       atomic_inc(&kranal_data.kra_npeers);
 
        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
 
@@ -947,7 +948,7 @@ kranal_destroy_peer (kra_peer_t *peer)
         CDEBUG(D_NET, "peer %s %p deleted\n", 
                libcfs_nid2str(peer->rap_nid), peer);
 
-        LASSERT (cfs_atomic_read(&peer->rap_refcount) == 0);
+       LASSERT (atomic_read(&peer->rap_refcount) == 0);
         LASSERT (peer->rap_persistence == 0);
         LASSERT (!kranal_peer_active(peer));
         LASSERT (!peer->rap_connecting);
@@ -961,7 +962,7 @@ kranal_destroy_peer (kra_peer_t *peer)
          * they are destroyed, so we can be assured that _all_ state to do
          * with this peer has been cleaned up when its refcount drops to
          * zero. */
-        cfs_atomic_dec(&kranal_data.kra_npeers);
+       atomic_dec(&kranal_data.kra_npeers);
 }
 
 kra_peer_t *
@@ -983,7 +984,7 @@ kranal_find_peer_locked (lnet_nid_t nid)
 
                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
                        peer, libcfs_nid2str(nid), 
-                       cfs_atomic_read(&peer->rap_refcount));
+                      atomic_read(&peer->rap_refcount));
                 return peer;
         }
         return NULL;
@@ -1173,8 +1174,8 @@ kranal_get_conn_by_idx (int index)
                                                       rac_list);
                                 CDEBUG(D_NET, "++conn[%p] -> %s (%d)\n", conn,
                                        libcfs_nid2str(conn->rac_peer->rap_nid),
-                                       cfs_atomic_read(&conn->rac_refcount));
-                                cfs_atomic_inc(&conn->rac_refcount);
+                                      atomic_read(&conn->rac_refcount));
+                               atomic_inc(&conn->rac_refcount);
                                read_unlock(&kranal_data.kra_global_lock);
                                 return conn;
                         }
@@ -1436,7 +1437,7 @@ kranal_shutdown (lnet_ni_t *ni)
         unsigned long flags;
 
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               cfs_atomic_read(&libcfs_kmemory));
+              atomic_read(&libcfs_kmemory));
 
         LASSERT (ni == kranal_data.kra_ni);
         LASSERT (ni->ni_data == &kranal_data);
@@ -1474,11 +1475,11 @@ kranal_shutdown (lnet_ni_t *ni)
 
                 /* Wait for all peers to be freed */
                 i = 2;
-                while (cfs_atomic_read(&kranal_data.kra_npeers) != 0) {
+               while (atomic_read(&kranal_data.kra_npeers) != 0) {
                         i++;
                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
                                "waiting for %d peers to close down\n",
-                               cfs_atomic_read(&kranal_data.kra_npeers));
+                              atomic_read(&kranal_data.kra_npeers));
                         cfs_pause(cfs_time_seconds(1));
                 }
                 /* fall through */
@@ -1492,39 +1493,39 @@ kranal_shutdown (lnet_ni_t *ni)
          * while there are still active connds, but these will be temporary
          * since peer creation always fails after the listener has started to
          * shut down. */
-        LASSERT (cfs_atomic_read(&kranal_data.kra_npeers) == 0);
+       LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
         
         /* Flag threads to terminate */
         kranal_data.kra_shutdown = 1;
 
-        for (i = 0; i < kranal_data.kra_ndevs; i++) {
-                kra_device_t *dev = &kranal_data.kra_devices[i];
+       for (i = 0; i < kranal_data.kra_ndevs; i++) {
+               kra_device_t *dev = &kranal_data.kra_devices[i];
 
                spin_lock_irqsave(&dev->rad_lock, flags);
-                cfs_waitq_signal(&dev->rad_waitq);
+               wake_up(&dev->rad_waitq);
                spin_unlock_irqrestore(&dev->rad_lock, flags);
-        }
+       }
 
        spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-        cfs_waitq_broadcast(&kranal_data.kra_reaper_waitq);
+       wake_up_all(&kranal_data.kra_reaper_waitq);
        spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
 
-        LASSERT (cfs_list_empty(&kranal_data.kra_connd_peers));
+       LASSERT (cfs_list_empty(&kranal_data.kra_connd_peers));
        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-        cfs_waitq_broadcast(&kranal_data.kra_connd_waitq);
+       wake_up_all(&kranal_data.kra_connd_waitq);
        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
 
         /* Wait for threads to exit */
         i = 2;
-        while (cfs_atomic_read(&kranal_data.kra_nthreads) != 0) {
+       while (atomic_read(&kranal_data.kra_nthreads) != 0) {
                 i++;
                 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
                        "Waiting for %d threads to terminate\n",
-                       cfs_atomic_read(&kranal_data.kra_nthreads));
+                      atomic_read(&kranal_data.kra_nthreads));
                 cfs_pause(cfs_time_seconds(1));
         }
 
-        LASSERT (cfs_atomic_read(&kranal_data.kra_npeers) == 0);
+       LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
         if (kranal_data.kra_peers != NULL) {
                 for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
                         LASSERT (cfs_list_empty(&kranal_data.kra_peers[i]));
@@ -1534,7 +1535,7 @@ kranal_shutdown (lnet_ni_t *ni)
                             kranal_data.kra_peer_hash_size);
         }
 
-        LASSERT (cfs_atomic_read(&kranal_data.kra_nconns) == 0);
+       LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
         if (kranal_data.kra_conns != NULL) {
                 for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
                         LASSERT (cfs_list_empty(&kranal_data.kra_conns[i]));
@@ -1550,20 +1551,21 @@ kranal_shutdown (lnet_ni_t *ni)
         kranal_free_txdescs(&kranal_data.kra_idle_txs);
 
         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               cfs_atomic_read(&libcfs_kmemory));
+              atomic_read(&libcfs_kmemory));
 
-        kranal_data.kra_init = RANAL_INIT_NOTHING;
-        PORTAL_MODULE_UNUSE;
+       kranal_data.kra_init = RANAL_INIT_NOTHING;
+       module_put(THIS_MODULE);
 }
 
 int
 kranal_startup (lnet_ni_t *ni)
 {
         struct timeval    tv;
-        int               pkmem = cfs_atomic_read(&libcfs_kmemory);
+       int               pkmem = atomic_read(&libcfs_kmemory);
         int               rc;
         int               i;
         kra_device_t     *dev;
+       char              name[16];
 
         LASSERT (ni->ni_lnd == &the_kralnd);
 
@@ -1593,43 +1595,43 @@ kranal_startup (lnet_ni_t *ni)
         ni->ni_data = &kranal_data;
         kranal_data.kra_ni = ni;
 
-        /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and
-         * a unique (for all time) connstamp so we can uniquely identify
-         * the sender.  The connstamp is an incrementing counter
-         * initialised with seconds + microseconds at startup time.  So we
-         * rely on NOT creating connections more frequently on average than
-         * 1MHz to ensure we don't use old connstamps when we reboot. */
-        cfs_gettimeofday(&tv);
-        kranal_data.kra_connstamp =
-        kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+       /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and
+        * a unique (for all time) connstamp so we can uniquely identify
+        * the sender.  The connstamp is an incrementing counter
+        * initialised with seconds + microseconds at startup time.  So we
+        * rely on NOT creating connections more frequently on average than
+        * 1MHz to ensure we don't use old connstamps when we reboot. */
+       do_gettimeofday(&tv);
+       kranal_data.kra_connstamp =
+       kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
 
        rwlock_init(&kranal_data.kra_global_lock);
 
-        for (i = 0; i < RANAL_MAXDEVS; i++ ) {
-                kra_device_t  *dev = &kranal_data.kra_devices[i];
+       for (i = 0; i < RANAL_MAXDEVS; i++ ) {
+               kra_device_t  *dev = &kranal_data.kra_devices[i];
 
-                dev->rad_idx = i;
-                CFS_INIT_LIST_HEAD(&dev->rad_ready_conns);
-                CFS_INIT_LIST_HEAD(&dev->rad_new_conns);
-                cfs_waitq_init(&dev->rad_waitq);
+               dev->rad_idx = i;
+               CFS_INIT_LIST_HEAD(&dev->rad_ready_conns);
+               CFS_INIT_LIST_HEAD(&dev->rad_new_conns);
+               init_waitqueue_head(&dev->rad_waitq);
                spin_lock_init(&dev->rad_lock);
-        }
+       }
 
-        kranal_data.kra_new_min_timeout = CFS_MAX_SCHEDULE_TIMEOUT;
-        cfs_waitq_init(&kranal_data.kra_reaper_waitq);
+       kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT;
+       init_waitqueue_head(&kranal_data.kra_reaper_waitq);
        spin_lock_init(&kranal_data.kra_reaper_lock);
 
-        CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq);
-        CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_peers);
-        cfs_waitq_init(&kranal_data.kra_connd_waitq);
+       CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq);
+       CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_peers);
+       init_waitqueue_head(&kranal_data.kra_connd_waitq);
        spin_lock_init(&kranal_data.kra_connd_lock);
 
         CFS_INIT_LIST_HEAD(&kranal_data.kra_idle_txs);
        spin_lock_init(&kranal_data.kra_tx_lock);
 
-        /* OK to call kranal_api_shutdown() to cleanup now */
-        kranal_data.kra_init = RANAL_INIT_DATA;
-        PORTAL_MODULE_USE;
+       /* OK to call kranal_api_shutdown() to cleanup now */
+       kranal_data.kra_init = RANAL_INIT_DATA;
+       try_module_get(THIS_MODULE);
 
         kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE;
         LIBCFS_ALLOC(kranal_data.kra_peers,
@@ -1656,14 +1658,16 @@ kranal_startup (lnet_ni_t *ni)
         if (rc != 0)
                 goto failed;
 
-        rc = kranal_thread_start(kranal_reaper, NULL);
+       rc = kranal_thread_start(kranal_reaper, NULL, "kranal_reaper");
         if (rc != 0) {
                 CERROR("Can't spawn ranal reaper: %d\n", rc);
                 goto failed;
         }
 
         for (i = 0; i < *kranal_tunables.kra_n_connd; i++) {
-                rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i);
+               snprintf(name, sizeof(name), "kranal_connd_%02ld", i);
+               rc = kranal_thread_start(kranal_connd,
+                                        (void *)(unsigned long)i, name);
                 if (rc != 0) {
                         CERROR("Can't spawn ranal connd[%d]: %d\n",
                                i, rc);
@@ -1689,7 +1693,8 @@ kranal_startup (lnet_ni_t *ni)
         
         for (i = 0; i < kranal_data.kra_ndevs; i++) {
                 dev = &kranal_data.kra_devices[i];
-                rc = kranal_thread_start(kranal_scheduler, dev);
+               snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx);
+               rc = kranal_thread_start(kranal_scheduler, dev, name);
                 if (rc != 0) {
                         CERROR("Can't spawn ranal scheduler[%d]: %d\n",
                                i, rc);