Whamcloud - gitweb
LU-1346 libcfs: cleanup macros in kp30.h
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd.c
index 06b1d64..c70ceeb 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -29,7 +27,7 @@
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -130,24 +128,24 @@ ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
         CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
         CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
         CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
-        cfs_spin_lock_init(&peer->ksnp_lock);
+       spin_lock_init(&peer->ksnp_lock);
 
-        cfs_spin_lock_bh (&net->ksnn_lock);
+       spin_lock_bh(&net->ksnn_lock);
 
-        if (net->ksnn_shutdown) {
-                cfs_spin_unlock_bh (&net->ksnn_lock);
+       if (net->ksnn_shutdown) {
+               spin_unlock_bh(&net->ksnn_lock);
 
-                LIBCFS_FREE(peer, sizeof(*peer));
-                CERROR("Can't create peer: network shutdown\n");
-                return -ESHUTDOWN;
-        }
+               LIBCFS_FREE(peer, sizeof(*peer));
+               CERROR("Can't create peer: network shutdown\n");
+               return -ESHUTDOWN;
+       }
 
-        net->ksnn_npeers++;
+       net->ksnn_npeers++;
 
-        cfs_spin_unlock_bh (&net->ksnn_lock);
+       spin_unlock_bh(&net->ksnn_lock);
 
-        *peerp = peer;
-        return 0;
+       *peerp = peer;
+       return 0;
 }
 
 void
@@ -171,9 +169,9 @@ ksocknal_destroy_peer (ksock_peer_t *peer)
          * until they are destroyed, so we can be assured that _all_ state to
          * do with this peer has been cleaned up when its refcount drops to
          * zero. */
-        cfs_spin_lock_bh (&net->ksnn_lock);
-        net->ksnn_npeers--;
-        cfs_spin_unlock_bh (&net->ksnn_lock);
+       spin_lock_bh(&net->ksnn_lock);
+       net->ksnn_npeers--;
+       spin_unlock_bh(&net->ksnn_lock);
 }
 
 ksock_peer_t *
@@ -209,11 +207,11 @@ ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
 {
         ksock_peer_t     *peer;
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
-        peer = ksocknal_find_peer_locked (ni, id);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                ksocknal_peer_addref(peer);
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
+       peer = ksocknal_find_peer_locked(ni, id);
+       if (peer != NULL)                       /* +1 ref for caller? */
+               ksocknal_peer_addref(peer);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 
         return (peer);
 }
@@ -261,7 +259,7 @@ ksocknal_get_peer_info (lnet_ni_t *ni, int index,
         int                j;
         int                rc = -ENOENT;
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
 
@@ -319,7 +317,7 @@ ksocknal_get_peer_info (lnet_ni_t *ni, int index,
                 }
         }
  out:
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
         return (rc);
 }
 
@@ -474,7 +472,7 @@ ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
                 return (-ENOMEM);
         }
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         /* always called with a ref on ni, so shutdown can't have started */
         LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
@@ -506,7 +504,7 @@ ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
                 route2->ksnr_share_count++;
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         return (0);
 }
@@ -578,7 +576,7 @@ ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
         int                i;
         int                rc = -ENOENT;
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         if (id.nid != LNET_NID_ANY)
                 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
@@ -618,7 +616,7 @@ ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
                 }
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         ksocknal_txlist_done(ni, &zombies, 1);
 
@@ -634,7 +632,7 @@ ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
         cfs_list_t        *ctmp;
         int                i;
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
                 cfs_list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
@@ -652,49 +650,38 @@ ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
                                 conn = cfs_list_entry (ctmp, ksock_conn_t,
                                                        ksnc_list);
                                 ksocknal_conn_addref(conn);
-                                cfs_read_unlock (&ksocknal_data. \
+                               read_unlock(&ksocknal_data. \
                                                  ksnd_global_lock);
                                 return (conn);
                         }
                 }
         }
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
         return (NULL);
 }
 
 ksock_sched_t *
-ksocknal_choose_scheduler_locked (unsigned int irq)
+ksocknal_choose_scheduler_locked(unsigned int cpt)
 {
-        ksock_sched_t    *sched;
-        ksock_irqinfo_t  *info;
-        int               i;
-
-        LASSERT (irq < CFS_NR_IRQS);
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        if (irq != 0 &&                         /* hardware NIC */
-            info->ksni_valid) {                 /* already set up */
-                return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
-        }
-
-        /* software NIC (irq == 0) || not associated with a scheduler yet.
-         * Choose the CPU with the fewest connections... */
-        sched = &ksocknal_data.ksnd_schedulers[0];
-        for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
-                if (sched->kss_nconns >
-                    ksocknal_data.ksnd_schedulers[i].kss_nconns)
-                        sched = &ksocknal_data.ksnd_schedulers[i];
-
-        if (irq != 0) {                         /* Hardware NIC */
-                info->ksni_valid = 1;
-                info->ksni_sched = (unsigned int)(sched - ksocknal_data.ksnd_schedulers);
-
-                /* no overflow... */
-                LASSERT (info->ksni_sched == (unsigned int)(sched - ksocknal_data.ksnd_schedulers));
-        }
-
-        return (sched);
+       struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
+       ksock_sched_t           *sched;
+       int                     i;
+
+       LASSERT(info->ksi_nthreads > 0);
+
+       sched = &info->ksi_scheds[0];
+       /*
+        * NB: it's safe so far, but info->ksi_nthreads could be changed
+        * at runtime when we have dynamic LNet configuration, then we
+        * need to take care of this.
+        */
+       for (i = 1; i < info->ksi_nthreads; i++) {
+               if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
+                       sched = &info->ksi_scheds[i];
+       }
+
+       return sched;
 }
 
 int
@@ -704,7 +691,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
         int                i;
         int                nip;
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         nip = net->ksnn_ninterfaces;
         LASSERT (nip <= LNET_MAX_INTERFACES);
@@ -712,7 +699,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
         /* Only offer interfaces for additional connections if I have 
          * more than one. */
         if (nip < 2) {
-                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
                 return 0;
         }
 
@@ -721,7 +708,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
                 LASSERT (ipaddrs[i] != 0);
         }
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
         return (nip);
 }
 
@@ -760,7 +747,7 @@ ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
 int
 ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
 {
-        cfs_rwlock_t       *global_lock = &ksocknal_data.ksnd_global_lock;
+       rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
         ksock_net_t        *net = peer->ksnp_ni->ni_data;
         ksock_interface_t  *iface;
         ksock_interface_t  *best_iface;
@@ -782,7 +769,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
         /* Also note that I'm not going to return more than n_peerips
          * interfaces, even if I have more myself */
 
-        cfs_write_lock_bh (global_lock);
+       write_lock_bh(global_lock);
 
         LASSERT (n_peerips <= LNET_MAX_INTERFACES);
         LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
@@ -858,7 +845,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
         /* Overwrite input peer IP addresses */
         memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
 
-        cfs_write_unlock_bh (global_lock);
+       write_unlock_bh(global_lock);
 
         return (n_ips);
 }
@@ -868,7 +855,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
                        __u32 *peer_ipaddrs, int npeer_ipaddrs)
 {
         ksock_route_t       *newroute = NULL;
-        cfs_rwlock_t        *global_lock = &ksocknal_data.ksnd_global_lock;
+       rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
         lnet_ni_t           *ni = peer->ksnp_ni;
         ksock_net_t         *net = ni->ni_data;
         cfs_list_t          *rtmp;
@@ -886,12 +873,12 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
          * expecting to be dealing with small numbers of interfaces, so the
          * O(n**3)-ness here shouldn't matter */
 
-        cfs_write_lock_bh (global_lock);
+       write_lock_bh(global_lock);
 
         if (net->ksnn_ninterfaces < 2) {
                 /* Only create additional connections 
                  * if I have > 1 interface */
-                cfs_write_unlock_bh (global_lock);
+               write_unlock_bh(global_lock);
                 return;
         }
 
@@ -901,13 +888,13 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
                 if (newroute != NULL) {
                         newroute->ksnr_ipaddr = peer_ipaddrs[i];
                 } else {
-                        cfs_write_unlock_bh (global_lock);
+                       write_unlock_bh(global_lock);
 
                         newroute = ksocknal_create_route(peer_ipaddrs[i], port);
                         if (newroute == NULL)
                                 return;
 
-                        cfs_write_lock_bh (global_lock);
+                       write_lock_bh(global_lock);
                 }
 
                 if (peer->ksnp_closing) {
@@ -976,7 +963,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port,
                 newroute = NULL;
         }
 
-        cfs_write_unlock_bh (global_lock);
+       write_unlock_bh(global_lock);
         if (newroute != NULL)
                 ksocknal_route_decref(newroute);
 }
@@ -1004,12 +991,12 @@ ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock)
         cr->ksncr_ni   = ni;
         cr->ksncr_sock = sock;
 
-        cfs_spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
+       spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
 
         cfs_list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
         cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
 
-        cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
+       spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
         return 0;
 }
 
@@ -1031,7 +1018,7 @@ int
 ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                       cfs_socket_t *sock, int type)
 {
-        cfs_rwlock_t      *global_lock = &ksocknal_data.ksnd_global_lock;
+       rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
         CFS_LIST_HEAD     (zombies);
         lnet_process_id_t  peerid;
         cfs_list_t        *tmp;
@@ -1042,7 +1029,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         ksock_peer_t      *peer2;
         ksock_sched_t     *sched;
         ksock_hello_msg_t *hello;
-        unsigned int       irq;
+       int                cpt;
         ksock_tx_t        *tx;
         ksock_tx_t        *txtmp;
         int                rc;
@@ -1053,8 +1040,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
 
         LASSERT (active == (type != SOCKLND_CONN_NONE));
 
-        irq = ksocknal_lib_sock_irq (sock);
-
         LIBCFS_ALLOC(conn, sizeof(*conn));
         if (conn == NULL) {
                 rc = -ENOMEM;
@@ -1107,9 +1092,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
                 peerid = peer->ksnp_id;
 
-                cfs_write_lock_bh(global_lock);
+               write_lock_bh(global_lock);
                 conn->ksnc_proto = peer->ksnp_proto;
-                cfs_write_unlock_bh(global_lock);
+               write_unlock_bh(global_lock);
 
                 if (conn->ksnc_proto == NULL) {
                          conn->ksnc_proto = &ksocknal_protocol_v3x;
@@ -1140,15 +1125,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         LASSERT (conn->ksnc_proto != NULL);
         LASSERT (peerid.nid != LNET_NID_ANY);
 
+       cpt = lnet_cpt_of_nid(peerid.nid);
+
         if (active) {
                 ksocknal_peer_addref(peer);
-                cfs_write_lock_bh (global_lock);
+               write_lock_bh(global_lock);
         } else {
                 rc = ksocknal_create_peer(&peer, ni, peerid);
                 if (rc != 0)
                         goto failed_1;
 
-                cfs_write_lock_bh (global_lock);
+               write_lock_bh(global_lock);
 
                 /* called with a ref on ni, so shutdown can't have started */
                 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
@@ -1275,7 +1262,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         peer->ksnp_send_keepalive = 0;
         peer->ksnp_error = 0;
 
-        sched = ksocknal_choose_scheduler_locked (irq);
+       sched = ksocknal_choose_scheduler_locked(cpt);
         sched->kss_nconns++;
         conn->ksnc_scheduler = sched;
 
@@ -1301,7 +1288,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                 ksocknal_queue_tx_locked (tx, conn);
         }
 
-        cfs_write_unlock_bh (global_lock);
+       write_unlock_bh(global_lock);
 
         /* We've now got a new connection.  Any errors from here on are just
          * like "normal" comms errors and we close the connection normally.
@@ -1311,14 +1298,12 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
          *        socket callbacks.
          */
 
-        ksocknal_lib_bind_irq (irq);
-
-        CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
-               " incarnation:"LPD64" sched[%d]/%d\n",
-               libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
-               HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
-               conn->ksnc_port, incarnation,
-               (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
+       CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
+              " incarnation:"LPD64" sched[%d:%d]\n",
+              libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
+              HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
+              conn->ksnc_port, incarnation, cpt,
+              (int)(sched - &sched->kss_info->ksi_scheds[0]));
 
         if (active) {
                 /* additional routes after interface exchange? */
@@ -1341,7 +1326,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         if (rc == 0)
                 rc = ksocknal_lib_setup_sock(sock);
 
-        cfs_write_lock_bh(global_lock);
+       write_lock_bh(global_lock);
 
         /* NB my callbacks block while I hold ksnd_global_lock */
         ksocknal_lib_set_callback(sock, conn);
@@ -1349,15 +1334,15 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         if (!active)
                 peer->ksnp_accepting--;
 
-        cfs_write_unlock_bh(global_lock);
+       write_unlock_bh(global_lock);
 
         if (rc != 0) {
-                cfs_write_lock_bh(global_lock);
+               write_lock_bh(global_lock);
                 if (!conn->ksnc_closing) {
                         /* could be closed by another thread */
                         ksocknal_close_conn_locked(conn, rc);
                 }
-                cfs_write_unlock_bh(global_lock);
+               write_unlock_bh(global_lock);
         } else if (ksocknal_connsock_addref(conn) == 0) {
                 /* Allow I/O to proceed. */
                 ksocknal_read_callback(conn);
@@ -1378,7 +1363,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                 ksocknal_unlink_peer_locked(peer);
         }
 
-        cfs_write_unlock_bh (global_lock);
+       write_unlock_bh(global_lock);
 
         if (warn != NULL) {
                 if (rc < 0)
@@ -1398,9 +1383,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
                 }
 
-                cfs_write_lock_bh(global_lock);
+               write_lock_bh(global_lock);
                 peer->ksnp_accepting--;
-                cfs_write_unlock_bh(global_lock);
+               write_unlock_bh(global_lock);
         }
 
         ksocknal_txlist_done(ni, &zombies, 1);
@@ -1479,10 +1464,10 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
                                                 tx_list)
                                 ksocknal_tx_prep(conn, tx);
 
-                        cfs_spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
-                        cfs_list_splice_init(&peer->ksnp_tx_queue,
-                                             &conn->ksnc_tx_queue);
-                        cfs_spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
+                       spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
+                       cfs_list_splice_init(&peer->ksnp_tx_queue,
+                                            &conn->ksnc_tx_queue);
+                       spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
                 }
 
                 peer->ksnp_proto = NULL;        /* renegotiate protocol version */
@@ -1495,13 +1480,13 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
                 }
         }
 
-        cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
+       spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
 
-        cfs_list_add_tail (&conn->ksnc_list,
-                           &ksocknal_data.ksnd_deathrow_conns);
-        cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
+       cfs_list_add_tail(&conn->ksnc_list,
+                         &ksocknal_data.ksnd_deathrow_conns);
+       cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
 
-        cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
+       spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
 }
 
 void
@@ -1514,7 +1499,7 @@ ksocknal_peer_failed (ksock_peer_t *peer)
          * tell LNET I think the peer is dead if it's to another kernel and
          * there are no connections or connection attempts in existance. */
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
             cfs_list_empty(&peer->ksnp_conns) &&
@@ -1524,7 +1509,7 @@ ksocknal_peer_failed (ksock_peer_t *peer)
                 last_alive = peer->ksnp_last_alive;
         }
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 
         if (notify)
                 lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
@@ -1543,7 +1528,7 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn)
          * abort all buffered data */
         LASSERT (conn->ksnc_sock == NULL);
 
-        cfs_spin_lock(&peer->ksnp_lock);
+       spin_lock(&peer->ksnp_lock);
 
         cfs_list_for_each_entry_safe_typed(tx, tmp, &peer->ksnp_zc_req_list,
                                            ksock_tx_t, tx_zc_list) {
@@ -1558,7 +1543,7 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn)
                 cfs_list_add(&tx->tx_zc_list, &zlist);
         }
 
-        cfs_spin_unlock(&peer->ksnp_lock);
+       spin_unlock(&peer->ksnp_lock);
 
         while (!cfs_list_empty(&zlist)) {
                 tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_zc_list);
@@ -1582,7 +1567,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
         LASSERT(conn->ksnc_closing);
 
         /* wake up the scheduler to "send" all remaining packets to /dev/null */
-        cfs_spin_lock_bh (&sched->kss_lock);
+       spin_lock_bh(&sched->kss_lock);
 
         /* a closing conn is always ready to tx */
         conn->ksnc_tx_ready = 1;
@@ -1598,10 +1583,10 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
                 cfs_waitq_signal (&sched->kss_waitq);
         }
 
-        cfs_spin_unlock_bh (&sched->kss_lock);
+       spin_unlock_bh(&sched->kss_lock);
 
-        /* serialise with callbacks */
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       /* serialise with callbacks */
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
 
@@ -1616,7 +1601,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
                 peer->ksnp_error = 0;     /* avoid multiple notifications */
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         if (failed)
                 ksocknal_peer_failed(peer);
@@ -1632,15 +1617,15 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
 void
 ksocknal_queue_zombie_conn (ksock_conn_t *conn)
 {
-        /* Queue the conn for the reaper to destroy */
+       /* Queue the conn for the reaper to destroy */
 
-        LASSERT (cfs_atomic_read(&conn->ksnc_conn_refcount) == 0);
-        cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
+       LASSERT(cfs_atomic_read(&conn->ksnc_conn_refcount) == 0);
+       spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
 
-        cfs_list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
-        cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
+       cfs_list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
+       cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
 
-        cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
+       spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
 }
 
 void
@@ -1736,11 +1721,11 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
         __u32             ipaddr = conn->ksnc_ipaddr;
         int               count;
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         return (count);
 }
@@ -1756,7 +1741,7 @@ ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
         int                 i;
         int                 count = 0;
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         if (id.nid != LNET_NID_ANY)
                 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
@@ -1779,7 +1764,7 @@ ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
                 }
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         /* wildcards always succeed */
         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
@@ -1818,10 +1803,10 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
         cfs_time_t         last_alive = 0;
         cfs_time_t         now = cfs_time_current();
         ksock_peer_t      *peer = NULL;
-        cfs_rwlock_t      *glock = &ksocknal_data.ksnd_global_lock;
+       rwlock_t                *glock = &ksocknal_data.ksnd_global_lock;
         lnet_process_id_t  id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
 
-        cfs_read_lock(glock);
+       read_lock(glock);
 
         peer = ksocknal_find_peer_locked(ni, id);
         if (peer != NULL) {
@@ -1847,7 +1832,7 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
                         connect = 0;
         }
 
-        cfs_read_unlock(glock);
+       read_unlock(glock);
 
         if (last_alive != 0)
                 *when = last_alive;
@@ -1862,13 +1847,13 @@ ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
 
         ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
 
-        cfs_write_lock_bh(glock);
+       write_lock_bh(glock);
 
         peer = ksocknal_find_peer_locked(ni, id);
         if (peer != NULL)
                 ksocknal_launch_all_connections_locked(peer);
 
-        cfs_write_unlock_bh(glock);
+       write_unlock_bh(glock);
         return;
 }
 
@@ -1881,7 +1866,7 @@ ksocknal_push_peer (ksock_peer_t *peer)
         ksock_conn_t     *conn;
 
         for (index = 0; ; index++) {
-                cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+               read_lock(&ksocknal_data.ksnd_global_lock);
 
                 i = 0;
                 conn = NULL;
@@ -1895,7 +1880,7 @@ ksocknal_push_peer (ksock_peer_t *peer)
                         }
                 }
 
-                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
 
                 if (conn == NULL)
                         break;
@@ -1917,7 +1902,7 @@ ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
                 for (j = 0; ; j++) {
-                        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+                       read_lock(&ksocknal_data.ksnd_global_lock);
 
                         index = 0;
                         peer = NULL;
@@ -1940,7 +1925,7 @@ ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
                                 }
                         }
 
-                        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+                       read_unlock(&ksocknal_data.ksnd_global_lock);
 
                         if (peer != NULL) {
                                 rc = 0;
@@ -1971,7 +1956,7 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
             netmask == 0)
                 return (-EINVAL);
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         iface = ksocknal_ip2iface(ni, ipaddress);
         if (iface != NULL) {
@@ -2011,7 +1996,7 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
                 /* NB only new connections will pay attention to the new interface! */
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         return (rc);
 }
@@ -2069,7 +2054,7 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
         int                i;
         int                j;
 
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+       write_lock_bh(&ksocknal_data.ksnd_global_lock);
 
         for (i = 0; i < net->ksnn_ninterfaces; i++) {
                 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
@@ -2100,7 +2085,7 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
                 }
         }
 
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+       write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
         return (rc);
 }
@@ -2117,7 +2102,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
                 ksock_net_t       *net = ni->ni_data;
                 ksock_interface_t *iface;
 
-                cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+               read_lock(&ksocknal_data.ksnd_global_lock);
 
                 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
                         rc = -ENOENT;
@@ -2131,7 +2116,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
                         data->ioc_u32[3] = iface->ksni_nroutes;
                 }
 
-                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
                 return rc;
         }
 
@@ -2198,8 +2183,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
                 data->ioc_u32[1] = conn->ksnc_port;
                 data->ioc_u32[2] = conn->ksnc_myipaddr;
                 data->ioc_u32[3] = conn->ksnc_type;
-                data->ioc_u32[4] = (__u32)(conn->ksnc_scheduler -
-                                   ksocknal_data.ksnd_schedulers);
+               data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
                 data->ioc_u32[5] = rxmem;
                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
                 ksocknal_conn_decref(conn);
@@ -2238,39 +2222,51 @@ ksocknal_free_buffers (void)
 {
         LASSERT (cfs_atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
 
-        if (ksocknal_data.ksnd_schedulers != NULL)
-                LIBCFS_FREE (ksocknal_data.ksnd_schedulers,
-                             sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
+       if (ksocknal_data.ksnd_sched_info != NULL) {
+               struct ksock_sched_info *info;
+               int                     i;
+
+               cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
+                       if (info->ksi_scheds != NULL) {
+                               LIBCFS_FREE(info->ksi_scheds,
+                                           info->ksi_nthreads_max *
+                                           sizeof(info->ksi_scheds[0]));
+                       }
+               }
+               cfs_percpt_free(ksocknal_data.ksnd_sched_info);
+       }
 
         LIBCFS_FREE (ksocknal_data.ksnd_peers,
                      sizeof (cfs_list_t) *
                      ksocknal_data.ksnd_peer_hash_size);
 
-        cfs_spin_lock(&ksocknal_data.ksnd_tx_lock);
+       spin_lock(&ksocknal_data.ksnd_tx_lock);
 
-        if (!cfs_list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
-                cfs_list_t        zlist;
-                ksock_tx_t       *tx;
+       if (!cfs_list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
+               cfs_list_t      zlist;
+               ksock_tx_t      *tx;
 
-                cfs_list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
-                cfs_list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
-                cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock);
+               cfs_list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
+               cfs_list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
+               spin_unlock(&ksocknal_data.ksnd_tx_lock);
 
-                while(!cfs_list_empty(&zlist)) {
-                        tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_list);
-                        cfs_list_del(&tx->tx_list);
-                        LIBCFS_FREE(tx, tx->tx_desc_size);
-                }
-        } else {
-                cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock);
-        }
+               while (!cfs_list_empty(&zlist)) {
+                       tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_list);
+                       cfs_list_del(&tx->tx_list);
+                       LIBCFS_FREE(tx, tx->tx_desc_size);
+               }
+       } else {
+               spin_unlock(&ksocknal_data.ksnd_tx_lock);
+       }
 }
 
 void
-ksocknal_base_shutdown (void)
+ksocknal_base_shutdown(void)
 {
-        ksock_sched_t *sched;
-        int            i;
+       struct ksock_sched_info *info;
+       ksock_sched_t           *sched;
+       int                     i;
+       int                     j;
 
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
                cfs_atomic_read (&libcfs_kmemory));
@@ -2286,46 +2282,63 @@ ksocknal_base_shutdown (void)
                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
                         LASSERT (cfs_list_empty (&ksocknal_data.ksnd_peers[i]));
                 }
+
+               LASSERT(cfs_list_empty(&ksocknal_data.ksnd_nets));
                 LASSERT (cfs_list_empty (&ksocknal_data.ksnd_enomem_conns));
                 LASSERT (cfs_list_empty (&ksocknal_data.ksnd_zombie_conns));
                 LASSERT (cfs_list_empty (&ksocknal_data.ksnd_connd_connreqs));
                 LASSERT (cfs_list_empty (&ksocknal_data.ksnd_connd_routes));
 
-                if (ksocknal_data.ksnd_schedulers != NULL)
-                        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                                ksock_sched_t *kss =
-                                        &ksocknal_data.ksnd_schedulers[i];
-
-                                LASSERT (cfs_list_empty (&kss->kss_tx_conns));
-                                LASSERT (cfs_list_empty (&kss->kss_rx_conns));
-                                LASSERT (cfs_list_empty (&kss-> \
-                                                         kss_zombie_noop_txs));
-                                LASSERT (kss->kss_nconns == 0);
-                        }
-
-                /* flag threads to terminate; wake and wait for them to die */
-                ksocknal_data.ksnd_shuttingdown = 1;
-                cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq);
-                cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
-
-                if (ksocknal_data.ksnd_schedulers != NULL)
-                        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                                sched = &ksocknal_data.ksnd_schedulers[i];
-                                cfs_waitq_broadcast(&sched->kss_waitq);
-                        }
+               if (ksocknal_data.ksnd_sched_info != NULL) {
+                       cfs_percpt_for_each(info, i,
+                                           ksocknal_data.ksnd_sched_info) {
+                               if (info->ksi_scheds == NULL)
+                                       continue;
+
+                               for (j = 0; j < info->ksi_nthreads_max; j++) {
+
+                                       sched = &info->ksi_scheds[j];
+                                       LASSERT(cfs_list_empty(&sched->\
+                                                              kss_tx_conns));
+                                       LASSERT(cfs_list_empty(&sched->\
+                                                              kss_rx_conns));
+                                       LASSERT(cfs_list_empty(&sched-> \
+                                                 kss_zombie_noop_txs));
+                                       LASSERT(sched->kss_nconns == 0);
+                               }
+                       }
+               }
+
+               /* flag threads to terminate; wake and wait for them to die */
+               ksocknal_data.ksnd_shuttingdown = 1;
+               cfs_waitq_broadcast(&ksocknal_data.ksnd_connd_waitq);
+               cfs_waitq_broadcast(&ksocknal_data.ksnd_reaper_waitq);
+
+               if (ksocknal_data.ksnd_sched_info != NULL) {
+                       cfs_percpt_for_each(info, i,
+                                           ksocknal_data.ksnd_sched_info) {
+                               if (info->ksi_scheds == NULL)
+                                       continue;
+
+                               for (j = 0; j < info->ksi_nthreads_max; j++) {
+                                       sched = &info->ksi_scheds[j];
+                                       cfs_waitq_broadcast(&sched->kss_waitq);
+                               }
+                       }
+               }
 
                 i = 4;
-                cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+               read_lock(&ksocknal_data.ksnd_global_lock);
                 while (ksocknal_data.ksnd_nthreads != 0) {
                         i++;
                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
                                "waiting for %d threads to terminate\n",
                                 ksocknal_data.ksnd_nthreads);
-                        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+                       read_unlock(&ksocknal_data.ksnd_global_lock);
                         cfs_pause(cfs_time_seconds(1));
-                        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+                       read_lock(&ksocknal_data.ksnd_global_lock);
                 }
-                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
 
                 ksocknal_free_buffers();
 
@@ -2355,10 +2368,11 @@ ksocknal_new_incarnation (void)
 }
 
 int
-ksocknal_base_startup (void)
+ksocknal_base_startup(void)
 {
-        int               rc;
-        int               i;
+       struct ksock_sched_info *info;
+       int                     rc;
+       int                     i;
 
         LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
         LASSERT (ksocknal_data.ksnd_nnets == 0);
@@ -2375,53 +2389,65 @@ ksocknal_base_startup (void)
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
                 CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
 
-        cfs_rwlock_init(&ksocknal_data.ksnd_global_lock);
+       rwlock_init(&ksocknal_data.ksnd_global_lock);
+       CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
 
-        cfs_spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
+       spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
         cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
 
-        cfs_spin_lock_init (&ksocknal_data.ksnd_connd_lock);
+       spin_lock_init(&ksocknal_data.ksnd_connd_lock);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
         cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq);
 
-        cfs_spin_lock_init (&ksocknal_data.ksnd_tx_lock);
+       spin_lock_init(&ksocknal_data.ksnd_tx_lock);
         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
 
-        /* NB memset above zeros whole of ksocknal_data, including
-         * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
+       /* NB memset above zeros whole of ksocknal_data */
 
-        /* flag lists/ptrs/locks initialised */
-        ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
-        PORTAL_MODULE_USE;
+       /* flag lists/ptrs/locks initialised */
+       ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
+       PORTAL_MODULE_USE;
 
-        ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
-        LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers,
-                     sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
-        if (ksocknal_data.ksnd_schedulers == NULL)
-                goto failed;
+       ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
+                                                        sizeof(*info));
+       if (ksocknal_data.ksnd_sched_info == NULL)
+               goto failed;
 
-        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
+       cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
+               ksock_sched_t   *sched;
+               int             nthrs;
 
-                cfs_spin_lock_init (&kss->kss_lock);
-                CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
-                CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
-                CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs);
-                cfs_waitq_init (&kss->kss_waitq);
-        }
+               nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
+               if (*ksocknal_tunables.ksnd_nscheds > 0) {
+                       nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
+               } else {
+                       /* max to half of CPUs, assume another half should be
+                        * reserved for upper layer modules */
+                       nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
+               }
 
-        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                rc = ksocknal_thread_start (ksocknal_scheduler,
-                                            &ksocknal_data.ksnd_schedulers[i]);
-                if (rc != 0) {
-                        CERROR("Can't spawn socknal scheduler[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
+               info->ksi_nthreads_max = nthrs;
+               info->ksi_cpt = i;
+
+               LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
+                                info->ksi_nthreads_max * sizeof(*sched));
+               if (info->ksi_scheds == NULL)
+                       goto failed;
+
+               for (; nthrs > 0; nthrs--) {
+                       sched = &info->ksi_scheds[nthrs - 1];
+
+                       sched->kss_info = info;
+                       spin_lock_init(&sched->kss_lock);
+                       CFS_INIT_LIST_HEAD(&sched->kss_rx_conns);
+                       CFS_INIT_LIST_HEAD(&sched->kss_tx_conns);
+                       CFS_INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
+                       cfs_waitq_init(&sched->kss_waitq);
+               }
         }
 
         ksocknal_data.ksnd_connd_starting         = 0;
@@ -2439,22 +2465,25 @@ ksocknal_base_startup (void)
         }
 
         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
-                cfs_spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-                ksocknal_data.ksnd_connd_starting++;
-                cfs_spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
-
-                rc = ksocknal_thread_start (ksocknal_connd,
-                                            (void *)((ulong_ptr_t)i));
-                if (rc != 0) {
-                        cfs_spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
-                        ksocknal_data.ksnd_connd_starting--;
-                        cfs_spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+               char name[16];
+               spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+               ksocknal_data.ksnd_connd_starting++;
+               spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+
+
+               snprintf(name, sizeof(name), "socknal_cd%02d", i);
+               rc = ksocknal_thread_start(ksocknal_connd,
+                                          (void *)((ulong_ptr_t)i), name);
+               if (rc != 0) {
+                       spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
+                       ksocknal_data.ksnd_connd_starting--;
+                       spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
                         CERROR("Can't spawn socknal connd: %d\n", rc);
                         goto failed;
                 }
         }
 
-        rc = ksocknal_thread_start (ksocknal_reaper, NULL);
+       rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
         if (rc != 0) {
                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
                 goto failed;
@@ -2473,11 +2502,11 @@ ksocknal_base_startup (void)
 void
 ksocknal_debug_peerhash (lnet_ni_t *ni)
 {
-        ksock_peer_t     *peer = NULL;
-        cfs_list_t       *tmp;
-        int               i;
+       ksock_peer_t    *peer = NULL;
+       cfs_list_t      *tmp;
+       int             i;
 
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
                 cfs_list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
@@ -2520,7 +2549,7 @@ ksocknal_debug_peerhash (lnet_ni_t *ni)
                 }
         }
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
         return;
 }
 
@@ -2537,37 +2566,38 @@ ksocknal_shutdown (lnet_ni_t *ni)
         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
         LASSERT(ksocknal_data.ksnd_nnets > 0);
 
-        cfs_spin_lock_bh (&net->ksnn_lock);
-        net->ksnn_shutdown = 1;                 /* prevent new peers */
-        cfs_spin_unlock_bh (&net->ksnn_lock);
+       spin_lock_bh(&net->ksnn_lock);
+       net->ksnn_shutdown = 1;                 /* prevent new peers */
+       spin_unlock_bh(&net->ksnn_lock);
 
-        /* Delete all peers */
-        ksocknal_del_peer(ni, anyid, 0);
+       /* Delete all peers */
+       ksocknal_del_peer(ni, anyid, 0);
 
-        /* Wait for all peer state to clean up */
-        i = 2;
-        cfs_spin_lock_bh (&net->ksnn_lock);
-        while (net->ksnn_npeers != 0) {
-                cfs_spin_unlock_bh (&net->ksnn_lock);
+       /* Wait for all peer state to clean up */
+       i = 2;
+       spin_lock_bh(&net->ksnn_lock);
+       while (net->ksnn_npeers != 0) {
+               spin_unlock_bh(&net->ksnn_lock);
 
-                i++;
-                CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                       "waiting for %d peers to disconnect\n",
-                       net->ksnn_npeers);
-                cfs_pause(cfs_time_seconds(1));
+               i++;
+               CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+                      "waiting for %d peers to disconnect\n",
+                      net->ksnn_npeers);
+               cfs_pause(cfs_time_seconds(1));
 
-                ksocknal_debug_peerhash(ni);
+               ksocknal_debug_peerhash(ni);
 
-                cfs_spin_lock_bh (&net->ksnn_lock);
-        }
-        cfs_spin_unlock_bh (&net->ksnn_lock);
+               spin_lock_bh(&net->ksnn_lock);
+       }
+       spin_unlock_bh(&net->ksnn_lock);
 
         for (i = 0; i < net->ksnn_ninterfaces; i++) {
                 LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
                 LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
         }
 
-        LIBCFS_FREE(net, sizeof(*net));
+       cfs_list_del(&net->ksnn_list);
+       LIBCFS_FREE(net, sizeof(*net));
 
         ksocknal_data.ksnd_nnets--;
         if (ksocknal_data.ksnd_nnets == 0)
@@ -2618,6 +2648,8 @@ ksocknal_enumerate_interfaces(ksock_net_t *net)
 
                 net->ksnn_interfaces[j].ksni_ipaddr = ip;
                 net->ksnn_interfaces[j].ksni_netmask = mask;
+               strncpy(&net->ksnn_interfaces[j].ksni_name[0],
+                       names[i], IFNAMSIZ);
                 j++;
         }
 
@@ -2630,6 +2662,120 @@ ksocknal_enumerate_interfaces(ksock_net_t *net)
 }
 
 int
+ksocknal_search_new_ipif(ksock_net_t *net)
+{
+       int     new_ipif = 0;
+       int     i;
+
+       for (i = 0; i < net->ksnn_ninterfaces; i++) {
+               char            *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
+               char            *colon = strchr(ifnam, ':');
+               int             found  = 0;
+               ksock_net_t     *tmp;
+               int             j;
+
+               if (colon != NULL) /* ignore alias device */
+                       *colon = 0;
+
+               cfs_list_for_each_entry(tmp, &ksocknal_data.ksnd_nets,
+                                       ksnn_list) {
+                       for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
+                               char *ifnam2 = &tmp->ksnn_interfaces[j].\
+                                            ksni_name[0];
+                               char *colon2 = strchr(ifnam2, ':');
+
+                               if (colon2 != NULL)
+                                       *colon2 = 0;
+
+                               found = strcmp(ifnam, ifnam2) == 0;
+                               if (colon2 != NULL)
+                                       *colon2 = ':';
+                       }
+                       if (found)
+                               break;
+               }
+
+               new_ipif += !found;
+               if (colon != NULL)
+                       *colon = ':';
+       }
+
+       return new_ipif;
+}
+
+int
+ksocknal_start_schedulers(struct ksock_sched_info *info)
+{
+       int     nthrs;
+       int     rc = 0;
+       int     i;
+
+       if (info->ksi_nthreads == 0) {
+               if (*ksocknal_tunables.ksnd_nscheds > 0) {
+                       nthrs = info->ksi_nthreads_max;
+               } else {
+                       nthrs = cfs_cpt_weight(lnet_cpt_table(),
+                                              info->ksi_cpt);
+                       nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
+                       nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
+               }
+               nthrs = min(nthrs, info->ksi_nthreads_max);
+       } else {
+               LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
+               /* increase two threads if there is new interface */
+               nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
+       }
+
+       for (i = 0; i < nthrs; i++) {
+               long            id;
+               char            name[20];
+               ksock_sched_t   *sched;
+               id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
+               sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
+               snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
+                        info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
+
+               rc = ksocknal_thread_start(ksocknal_scheduler,
+                                          (void *)id, name);
+               if (rc == 0)
+                       continue;
+
+               CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
+                      info->ksi_cpt, info->ksi_nthreads + i, rc);
+               break;
+       }
+
+       info->ksi_nthreads += i;
+       return rc;
+}
+
+int
+ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
+{
+       int     newif = ksocknal_search_new_ipif(net);
+       int     rc;
+       int     i;
+
+       LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
+
+       for (i = 0; i < ncpts; i++) {
+               struct ksock_sched_info *info;
+               int cpt = (cpts == NULL) ? i : cpts[i];
+
+               LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
+               info = ksocknal_data.ksnd_sched_info[cpt];
+
+               if (!newif && info->ksi_nthreads > 0)
+                       continue;
+
+               rc = ksocknal_start_schedulers(info);
+               if (rc != 0)
+                       return rc;
+       }
+       return 0;
+}
+
+int
 ksocknal_startup (lnet_ni_t *ni)
 {
         ksock_net_t  *net;
@@ -2648,8 +2794,7 @@ ksocknal_startup (lnet_ni_t *ni)
         if (net == NULL)
                 goto fail_0;
 
-        memset(net, 0, sizeof(*net));
-        cfs_spin_lock_init(&net->ksnn_lock);
+       spin_lock_init(&net->ksnn_lock);
         net->ksnn_incarnation = ksocknal_new_incarnation();
         ni->ni_data = net;
         ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
@@ -2686,12 +2831,21 @@ ksocknal_startup (lnet_ni_t *ni)
                                        ni->ni_interfaces[i]);
                                 goto fail_1;
                         }
-                }
-                net->ksnn_ninterfaces = i;
-        }
 
-        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
-                                net->ksnn_interfaces[0].ksni_ipaddr);
+                       strncpy(&net->ksnn_interfaces[i].ksni_name[0],
+                               ni->ni_interfaces[i], IFNAMSIZ);
+               }
+               net->ksnn_ninterfaces = i;
+       }
+
+       /* call it before add it to ksocknal_data.ksnd_nets */
+       rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
+       if (rc != 0)
+               goto fail_1;
+
+       ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
+                               net->ksnn_interfaces[0].ksni_ipaddr);
+       cfs_list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
 
         ksocknal_data.ksnd_nnets++;