Whamcloud - gitweb
* moved #defines in qswnal.h around to separate fixed constants
authoreeb <eeb>
Thu, 2 Jun 2005 07:09:25 +0000 (07:09 +0000)
committereeb <eeb>
Thu, 2 Jun 2005 07:09:25 +0000 (07:09 +0000)
     from ones that can be set via modparams

*    placeholder vibnal for newconfig; still need to get the IPoIB
     IP address somehow.

lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/viblnd/Makefile.in
lnet/klnds/viblnd/viblnd.c
lnet/klnds/viblnd/viblnd.h
lnet/klnds/viblnd/viblnd_cb.c
lnet/klnds/viblnd/viblnd_modparams.c [new file with mode: 0644]

index 767fce8..8269d29 100644 (file)
@@ -92,9 +92,6 @@ typedef unsigned long kqsw_csum_t;
  * Performance Tuning defines
  * NB no mention of PAGE_SIZE for interoperability
  */
-#define KQSW_MAXPAYLOAD                 PTL_MTU
-#define KQSW_SMALLPAYLOAD               ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
-
 #define KQSW_TX_MAXCONTIG               (1<<10) /* largest payload that gets made contiguous on transmit */
 
 #define KQSW_NTXMSGS                    8       /* # normal transmit messages */
@@ -106,11 +103,14 @@ typedef unsigned long kqsw_csum_t;
 #define KQSW_NRXMSGS_SMALL              256     /* # small receive buffers */
 #define KQSW_EP_ENVELOPES_SMALL         2048    /* # small ep envelopes */
 
-#define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
-
 #define KQSW_OPTIMIZED_GETS             1       /* optimize gets >= this size */
 #define KQSW_OPTIMIZED_PUTS            (32<<10) /* optimize puts >= this size */
 
+/* fixed constants */
+#define KQSW_MAXPAYLOAD                 PTL_MTU
+#define KQSW_SMALLPAYLOAD               ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
+#define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
+
 /*
  * derived constants
  */
index 5287e70..5fe1630 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := kvibnal
-kvibnal-objs := vibnal.o vibnal_cb.o
+kvibnal-objs := vibnal.o vibnal_cb.o vibnal_modparams.o
 
 EXTRA_POST_CFLAGS := @VIBCPPFLAGS@
 
index 32e1ff1..0486ca9 100644 (file)
@@ -36,25 +36,6 @@ ptl_nal_t               kibnal_nal = {
 };
 
 kib_data_t              kibnal_data;
-kib_tunables_t          kibnal_tunables;
-
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL             202
-
-#define IBNAL_SYSCTL_TIMEOUT     1
-
-static ctl_table kibnal_ctl_table[] = {
-        {IBNAL_SYSCTL_TIMEOUT, "timeout", 
-         &kibnal_tunables.kib_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
-
-static ctl_table kibnal_top_ctl_table[] = {
-        {IBNAL_SYSCTL, "vibnal", NULL, 0, 0555, kibnal_ctl_table},
-        { 0 }
-};
-#endif
 
 void vibnal_assert_wire_constants (void)
 {
@@ -181,13 +162,6 @@ void vibnal_assert_wire_constants (void)
         CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12);
 }
 
-void
-kibnal_pause(int ticks)
-{
-        set_current_state(TASK_UNINTERRUPTIBLE);
-        schedule_timeout(ticks);
-}
-
 __u32 
 kibnal_cksum (void *ptr, int nob)
 {
@@ -225,10 +199,11 @@ kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid,
         msg->ibm_dstnid   = dstnid;
         msg->ibm_dststamp = dststamp;
         msg->ibm_seq      = seq;
-#if IBNAL_CKSUM
-        /* NB ibm_cksum zero while computing cksum */
-        msg->ibm_cksum    = kibnal_cksum(msg, msg->ibm_nob);
-#endif
+
+        if (*kibnal_tunables.kib_cksum) {
+                /* NB ibm_cksum zero while computing cksum */
+                msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
+        }
 }
 
 int
@@ -421,103 +396,75 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob)
 }
 
 int
-kibnal_set_mynid(ptl_nid_t nid)
+kibnal_start_listener (ptl_ni_t *ni)
 {
-        static cm_listen_data_t info;           /* protected by kib_nid_mutex */
+        static cm_listen_data_t info;
 
-        ptl_ni_t        *ni = kibnal_data.kib_ni;
-        int              rc;
         cm_return_t      cmrc;
 
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_nid);
+        LASSERT (kibnal_data.kib_listen_handle == NULL);
 
-        down (&kibnal_data.kib_nid_mutex);
-
-        if (nid == ni->ni_nid) {
-                /* no change of NID */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
+        kibnal_data.kib_listen_handle = 
+                cm_create_cep(cm_cep_transp_rc);
+        if (kibnal_data.kib_listen_handle == NULL) {
+                CERROR ("Can't create listen CEP\n");
+                return -ENOMEM;
         }
 
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_nid, nid);
+        CDEBUG(D_NET, "Created CEP %p for listening\n", 
+               kibnal_data.kib_listen_handle);
 
-        if (kibnal_data.kib_listen_handle != NULL) {
-                cmrc = cm_cancel(kibnal_data.kib_listen_handle);
-                if (cmrc != cm_stat_success)
-                        CERROR ("Error %d stopping listener\n", cmrc);
+        memset(&info, 0, sizeof(info));
+        info.listen_addr.end_pt.sid = 
+                (__u64)(*kibnal_tunables.kib_service_number);
 
-                kibnal_pause(HZ/10);            /* ensure no more callbacks */
+        cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
+                         kibnal_listen_callback, NULL);
+        if (cmrc == cm_stat_success)
+                return 0;
         
-                cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
-                if (cmrc != vv_return_ok)
-                        CERROR ("Error %d destroying CEP\n", cmrc);
-
-                kibnal_data.kib_listen_handle = NULL;
-        }
-
-        /* Change NID.  NB queued passive connection requests (if any) will be
-         * rejected with an incorrect destination NID */
-        ni->ni_nid = nid;
-        kibnal_data.kib_incarnation++;
-        mb();
-
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kibnal_del_peer (PTL_NID_ANY, 0);
-
-        if (ni->ni_nid != PTL_NID_ANY) {    /* got a new NID to install */
-                kibnal_data.kib_listen_handle = 
-                        cm_create_cep(cm_cep_transp_rc);
-                if (kibnal_data.kib_listen_handle == NULL) {
-                        CERROR ("Can't create listen CEP\n");
-                        rc = -ENOMEM;
-                        goto failed_0;
-                }
+        CERROR ("cm_listen error: %d\n", cmrc);
 
-                CDEBUG(D_NET, "Created CEP %p for listening\n", 
-                       kibnal_data.kib_listen_handle);
+        cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
+        LASSERT (cmrc == cm_stat_success);
 
-                memset(&info, 0, sizeof(info));
-                info.listen_addr.end_pt.sid = kibnal_data.kib_svc_id;
+        kibnal_data.kib_listen_handle = NULL;
+        return -EINVAL;
+}
 
-                cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
-                                 kibnal_listen_callback, NULL);
-                if (cmrc != 0) {
-                        CERROR ("cm_listen error: %d\n", cmrc);
-                        rc = -EINVAL;
-                        goto failed_1;
-                }
-        }
+void
+kibnal_stop_listener(ptl_ni_t *ni)
+{
+        cm_return_t      cmrc;
 
-        up (&kibnal_data.kib_nid_mutex);
-        return (0);
+        LASSERT (kibnal_data.kib_listen_handle != NULL);
+        
+        cmrc = cm_cancel(kibnal_data.kib_listen_handle);
+        if (cmrc != cm_stat_success)
+                CERROR ("Error %d stopping listener\n", cmrc);
 
- failed_1:
+        libcfs_pause(cfs_time_seconds(1)/10);   /* ensure no more callbacks */
+        
         cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
-        LASSERT (cmrc == cm_stat_success);
+        if (cmrc != vv_return_ok)
+                CERROR ("Error %d destroying CEP\n", cmrc);
+
         kibnal_data.kib_listen_handle = NULL;
- failed_0:
-        ni->ni_nid = PTL_NID_ANY;
-        kibnal_data.kib_incarnation++;
-        mb();
-        kibnal_del_peer (PTL_NID_ANY, 0);
-        up (&kibnal_data.kib_nid_mutex);
-        return rc;
 }
 
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
+int
+kibnal_create_peer (kib_peer_t **peerp, ptl_nid_t nid)
 {
-        kib_peer_t *peer;
+        kib_peer_t     *peer;
+        unsigned long   flags;
+        int             rc;
 
         LASSERT (nid != PTL_NID_ANY);
 
         PORTAL_ALLOC(peer, sizeof (*peer));
         if (peer == NULL) {
                 CERROR("Canot allocate perr\n");
-                return (NULL);
+                return -ENOMEM;
         }
 
         memset(peer, 0, sizeof(*peer));         /* zero flags etc */
@@ -530,20 +477,39 @@ kibnal_create_peer (ptl_nid_t nid)
         INIT_LIST_HEAD (&peer->ibp_tx_queue);
 
         peer->ibp_reconnect_time = jiffies;
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+        peer->ibp_reconnect_interval = 
+                *kibnal_tunables.kib_min_reconnect_interval * HZ;
 
-        atomic_inc (&kibnal_data.kib_npeers);
-        if (atomic_read(&kibnal_data.kib_npeers) <= IBNAL_CONCURRENT_PEERS)
-                return peer;
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
         
-        CERROR("Too many peers: CQ will overflow\n");
-        kibnal_peer_decref(peer);
-        return NULL;
+        if (kibnal_data.kib_npeers < 
+            *kibnal_tunables.kib_concurrent_peers) {
+                rc = -EOVERFLOW;        /* !! but at least it distinguishes */
+        } else if (kibnal_data.kib_listen_handle == NULL) {
+                rc = -ESHUTDOWN;        /* shutdown has started */
+        } else {
+                rc = 0;
+                kibnal_data.kib_npeers++;
+        }
+        
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
+        
+        if (rc != 0) {
+                CERROR("Can't create peer: %s\n", 
+                       (rc == -ESHUTDOWN) ? "shutting down" : 
+                       "too many peers");
+                PORTAL_FREE(peer, sizeof(*peer));
+        } else {
+                *peerp = peer;
+        }
+        
+        return rc;
 }
 
 void
 kibnal_destroy_peer (kib_peer_t *peer)
 {
+        unsigned long flags;
 
         LASSERT (atomic_read (&peer->ibp_refcount) == 0);
         LASSERT (peer->ibp_persistence == 0);
@@ -554,18 +520,22 @@ kibnal_destroy_peer (kib_peer_t *peer)
         
         PORTAL_FREE (peer, sizeof (*peer));
 
+        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+
         /* NB a peer's connections keep a reference on their peer until
          * they are destroyed, so we can be assured that _all_ state to do
          * with this peer has been cleaned up when its refcount drops to
          * zero. */
-        atomic_dec (&kibnal_data.kib_npeers);
+        kibnal_data.kib_npeers--;
+
+        write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 }
 
-/* the caller is responsible for accounting for the additional reference
- * that this creates */
 kib_peer_t *
 kibnal_find_peer_locked (ptl_nid_t nid)
 {
+        /* the caller is responsible for accounting the additional reference
+         * that this creates */
         struct list_head *peer_list = kibnal_nid2peerlist (nid);
         struct list_head *tmp;
         kib_peer_t       *peer;
@@ -643,15 +613,16 @@ kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip)
         kib_peer_t        *peer;
         kib_peer_t        *peer2;
         unsigned long      flags;
+        int                rc;
 
         CDEBUG(D_NET, LPX64"@%08x\n", nid, ip);
         
         if (nid == PTL_NID_ANY)
                 return (-EINVAL);
 
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
+        rc = kibnal_create_peer(&peer, nid);
+        if (rc != 0)
+                return rc;
 
         write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
@@ -1176,7 +1147,7 @@ kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
 
         LASSERT (ni == kibnal_data.kib_ni);
 
-        switch(data->ioc_command) {
+        switch(cmd) {
         case IOC_PORTAL_GET_PEER: {
                 ptl_nid_t   nid = 0;
                 __u32       ip = 0;
@@ -1187,7 +1158,7 @@ kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                 data->ioc_nid    = nid;
                 data->ioc_count  = share_count;
                 data->ioc_u32[0] = ip;
-                data->ioc_u32[1] = IBNAL_SERVICE_NUMBER; /* port */
+                data->ioc_u32[1] = *kibnal_tunables.kib_service_number; /* port */
                 break;
         }
         case IOC_PORTAL_ADD_PEER: {
@@ -1216,10 +1187,14 @@ kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                 break;
         }
         case IOC_PORTAL_REGISTER_MYNID: {
-                if (data->ioc_nid == PTL_NID_ANY)
+                if (ni->ni_nid == data->ioc_nid) {
+                        rc = 0;
+                } else {
+                        CERROR("obsolete IOC_PORTAL_REGISTER_MYNID: %s(%s)\n",
+                               libcfs_nid2str(data->ioc_nid),
+                               libcfs_nid2str(ni->ni_nid));
                         rc = -EINVAL;
-                else
-                        rc = kibnal_set_mynid (data->ioc_nid);
+                }
                 break;
         }
         }
@@ -1331,14 +1306,14 @@ kibnal_alloc_tx_descs (void)
         int    i;
         
         PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
+                      IBNAL_TX_MSGS() * sizeof(kib_tx_t));
         if (kibnal_data.kib_tx_descs == NULL)
                 return -ENOMEM;
         
         memset(kibnal_data.kib_tx_descs, 0,
-               IBNAL_TX_MSGS * sizeof(kib_tx_t));
+               IBNAL_TX_MSGS() * sizeof(kib_tx_t));
 
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
 
                 PORTAL_ALLOC(tx->tx_wrq, 
@@ -1371,7 +1346,7 @@ kibnal_free_tx_descs (void)
         if (kibnal_data.kib_tx_descs == NULL)
                 return;
 
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
 
                 if (tx->tx_wrq != NULL)
@@ -1391,7 +1366,7 @@ kibnal_free_tx_descs (void)
         }
 
         PORTAL_FREE(kibnal_data.kib_tx_descs,
-                    IBNAL_TX_MSGS * sizeof(kib_tx_t));
+                    IBNAL_TX_MSGS() * sizeof(kib_tx_t));
 }
 
 int
@@ -1412,15 +1387,15 @@ kibnal_setup_tx_descs (void)
         /* No fancy arithmetic when we do the buffer calculations */
         CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
 
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES, 
-                                0);
+        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, 
+                                IBNAL_TX_MSG_PAGES(), 0);
         if (rc != 0)
                 return (rc);
 
         /* ignored for the whole_mem case */
         vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
 
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
+        for (i = 0; i < IBNAL_TX_MSGS(); i++) {
                 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
                 tx = &kibnal_data.kib_tx_descs[i];
 
@@ -1445,7 +1420,7 @@ kibnal_setup_tx_descs (void)
 #else
                 tx->tx_vaddr = vaddr;
 #endif
-                tx->tx_isnblk = (i >= IBNAL_NTX);
+                tx->tx_isnblk = (i >= *kibnal_tunables.kib_ntx);
                 tx->tx_mapped = KIB_TX_UNMAPPED;
 
                 CDEBUG(D_NET, "Tx[%d] %p->%p[%x:"LPX64"]\n", i, tx, 
@@ -1459,7 +1434,7 @@ kibnal_setup_tx_descs (void)
                                   &kibnal_data.kib_idle_txs);
 
                 vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
+                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES());
 
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
@@ -1467,7 +1442,7 @@ kibnal_setup_tx_descs (void)
                 if (page_offset == PAGE_SIZE) {
                         page_offset = 0;
                         ipage++;
-                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
+                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
                 }
         }
         
@@ -1477,8 +1452,9 @@ kibnal_setup_tx_descs (void)
 void
 kibnal_shutdown (ptl_ni_t *ni)
 {
-        int         i;
-        vv_return_t vvrc;
+        unsigned long flags;
+        int           i;
+        vv_return_t   vvrc;
 
         LASSERT (ni == kibnal_data.kib_ni);
         LASSERT (ni->ni_data == &kibnal_data);
@@ -1489,20 +1465,28 @@ kibnal_shutdown (ptl_ni_t *ni)
         switch (kibnal_data.kib_init) {
 
         case IBNAL_INIT_ALL:
-                /* resetting my NID removes my listener and nukes all current
-                 * peers and their connections */
-                kibnal_set_mynid (PTL_NID_ANY);
+                /* stop accepting connections and prevent new peers */
+                kibnal_stop_listener(ni);
+
+                /* nuke all existing peers */
+                kibnal_del_peer(PTL_NID_ANY);
 
                 /* Wait for all peer state to clean up */
                 i = 2;
-                while (atomic_read (&kibnal_data.kib_npeers) != 0) {
+                write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
+                while (kibnal_data.kib_npeers != 0) {
+                        write_unlock_irqrestore(&kibnal_data.kib_global_lock, 
+                                                flags);
                         i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */
                                "waiting for %d peers to disconnect\n",
-                               atomic_read (&kibnal_data.kib_npeers));
+                               kibnal_data.kib_npeers);
                         set_current_state (TASK_UNINTERRUPTIBLE);
                         schedule_timeout (HZ);
+
+                        write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
                 }
+                write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
                 /* fall through */
 
         case IBNAL_INIT_CQ:
@@ -1539,7 +1523,7 @@ kibnal_shutdown (ptl_ni_t *ni)
                 /* fall through */
 
         case IBNAL_INIT_DATA:
-                LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
+                LASSERT (kibnal_data.kib_npeers == 0);
                 LASSERT (kibnal_data.kib_peers != NULL);
                 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
                         LASSERT (list_empty (&kibnal_data.kib_peers[i]));
@@ -1610,6 +1594,8 @@ kibnal_startup (ptl_ni_t *ni)
                 CERROR("Explicit interface config not supported\n");
                 return PTL_FAIL;
         }
+
+#warning discover IPoIB IP address here
         
         PORTAL_MODULE_USE;
         memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
@@ -1619,9 +1605,6 @@ kibnal_startup (ptl_ni_t *ni)
         
         do_gettimeofday(&tv);
         kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        kibnal_data.kib_svc_id = IBNAL_SERVICE_NUMBER;
-
-        init_MUTEX (&kibnal_data.kib_nid_mutex);
 
         rwlock_init(&kibnal_data.kib_global_lock);
 
@@ -1801,10 +1784,11 @@ kibnal_startup (ptl_ni_t *ni)
         /* flag TX descs initialised */
         kibnal_data.kib_init = IBNAL_INIT_TXD;
         /*****************************************************/
+
         {
                 uint32_t nentries;
 
-                vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
+                vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
                                     kibnal_cq_callback, 
                                     NULL, /* context */
                                     &kibnal_data.kib_cq, &nentries);
@@ -1816,9 +1800,9 @@ kibnal_startup (ptl_ni_t *ni)
                 /* flag CQ initialised */
                 kibnal_data.kib_init = IBNAL_INIT_CQ;
 
-                if (nentries < IBNAL_CQ_ENTRIES) {
+                if (nentries < IBNAL_CQ_ENTRIES()) {
                         CERROR ("CQ only has %d entries, need %d\n", 
-                                nentries, IBNAL_CQ_ENTRIES);
+                                nentries, IBNAL_CQ_ENTRIES());
                         goto failed;
                 }
 
@@ -1830,6 +1814,12 @@ kibnal_startup (ptl_ni_t *ni)
                         goto failed;
                 }
         }
+
+        rc = kibnal_start_listener(ni);
+        if (rc != 0) {
+                CERROR("Can't start listener: %d\n", rc);
+                goto failed;
+        }
         
         /* flag everything initialised */
         kibnal_data.kib_init = IBNAL_INIT_ALL;
@@ -1849,11 +1839,8 @@ kibnal_startup (ptl_ni_t *ni)
 void __exit
 kibnal_module_fini (void)
 {
-#ifdef CONFIG_SYSCTL
-        if (kibnal_tunables.kib_sysctl != NULL)
-                unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
         ptl_unregister_nal(&kibnal_nal);
+        kibnal_tunables_fini();
 }
 
 int __init
@@ -1871,21 +1858,14 @@ kibnal_module_init (void)
                   <= IBNAL_MSG_SIZE);
         CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
                   <= IBNAL_MSG_SIZE);
-        
-        /* the following must be sizeof(int) for proc_dointvec() */
-        CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
 
-        /* Initialise dynamic tunables to defaults once only */
-        kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
+        rc = kibnal_tunables_init();
+        if (rc != 0)
+                return rc;
 
         ptl_register_nal(&kibnal_nal);
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kibnal_tunables.kib_sysctl = 
-                register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
-        return (0);
+
+        return 0;
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
index f3fbacd..bc1790c 100644 (file)
 # define IBNAL_N_SCHED      1                   /* # schedulers */
 #endif
 
+#define IBNAL_WHOLE_MEM  1
+#if !IBNAL_WHOLE_MEM
+# error "incompatible with voltaire adaptor-tavor (REGISTER_RAM_IN_ONE_PHY_MR)"
+#endif
+
+/* defaults for modparams/tunables */
+#define IBNAL_SERVICE_NUMBER         0x11b9a2   /* Fixed service number */
+#define IBNAL_MIN_RECONNECT_INTERVAL 1          /* first failed connection retry... */
+#define IBNAL_MAX_RECONNECT_INTERVAL 60         /* ...exponentially increasing to this */
+#define IBNAL_CONCURRENT_PEERS       1024       /* # nodes all talking at once to me */
+#define IBNAL_CKSUM                  0          /* checksum kib_msg_t? */
+#define IBNAL_TIMEOUT                50         /* default comms timeout (seconds) */
+#define IBNAL_NTX                    64         /* # tx descs */
+#define IBNAL_NTX_NBLK               128        /* # reserved tx descs */
+
+/* tunables fixed at compile time */
+#define IBNAL_PEER_HASH_SIZE         101        /* # peer lists */
+#define IBNAL_RESCHED                100        /* # scheduler loops before reschedule */
+#define IBNAL_MSG_QUEUE_SIZE         8          /* # messages/RDMAs in-flight */
+#define IBNAL_CREDIT_HIGHWATER       7          /* when to eagerly return credits */
+#define IBNAL_MSG_SIZE              (4<<10)     /* max size of queued messages (inc hdr) */
+
 /* sdp-connection.c */
 #define IBNAL_QKEY               0
 #define IBNAL_PKEY               0xffff
 #define IBNAL_ARB_INITIATOR_DEPTH 0
 #define IBNAL_ARB_RESP_RES        0
 #define IBNAL_FAILOVER_ACCEPTED   0
-#define IBNAL_SERVICE_NUMBER      0x11b9a2      /* Fixed service number */
-
-#define IBNAL_MIN_RECONNECT_INTERVAL HZ         /* first failed connection retry... */
-#define IBNAL_MAX_RECONNECT_INTERVAL (60*HZ)    /* ...exponentially increasing to this */
-
-#define IBNAL_MSG_SIZE           (4<<10)        /* max size of queued messages (inc hdr) */
-
-#define IBNAL_MSG_QUEUE_SIZE      8             /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER    7             /* when to eagerly return credits */
-
-#define IBNAL_NTX                 64            /* # tx descs */
-#define IBNAL_NTX_NBLK            128           /* # reserved tx descs */
-/* reduced from 256 to ensure we register < 255 pages per region.  
- * this can change if we register all memory. */
-
-#define IBNAL_PEER_HASH_SIZE      101           /* # peer lists */
-
-#define IBNAL_RESCHED             100           /* # scheduler loops before reschedule */
-
-#define IBNAL_CONCURRENT_PEERS    1000          /* # nodes all talking at once to me */
-
-#define IBNAL_RDMA_BASE  0x0eeb0000
-#define IBNAL_CKSUM      0
-#define IBNAL_WHOLE_MEM  1
-#if !IBNAL_WHOLE_MEM
-# error "incompatible with voltaire adaptor-tavor (REGISTER_RAM_IN_ONE_PHY_MR)"
-#endif
-
-/* default vals for runtime tunables */
-#define IBNAL_IO_TIMEOUT          50            /* default comms timeout (seconds) */
 
 /************************/
 /* derived constants... */
 
 /* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS       (IBNAL_NTX + IBNAL_NTX_NBLK)
-#define IBNAL_TX_MSG_BYTES  (IBNAL_TX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES  ((IBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
+#define IBNAL_TX_MSGS()       (*kibnal_tunables.kib_ntx +       \
+                               *kibnal_tunables.kib_ntx_nblk)
+#define IBNAL_TX_MSG_BYTES()  (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)
+#define IBNAL_TX_MSG_PAGES()  ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
 
 #if IBNAL_WHOLE_MEM
 # define IBNAL_MAX_RDMA_FRAGS PTL_MD_MAX_IOV
 #else
+# define IBNAL_RDMA_BASE      0x0eeb0000
 # define IBNAL_MAX_RDMA_FRAGS 1
 #endif
 
 /* RX messages (per connection) */
-#define IBNAL_RX_MSGS       IBNAL_MSG_QUEUE_SIZE
-#define IBNAL_RX_MSG_BYTES  (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES  ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
+#define IBNAL_RX_MSGS         IBNAL_MSG_QUEUE_SIZE
+#define IBNAL_RX_MSG_BYTES    (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
+#define IBNAL_RX_MSG_PAGES    ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
 
-#define IBNAL_CQ_ENTRIES  (IBNAL_TX_MSGS * (1 + IBNAL_MAX_RDMA_FRAGS) + \
-                           IBNAL_RX_MSGS * IBNAL_CONCURRENT_PEERS)
+#define IBNAL_CQ_ENTRIES()    (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) +           \
+                               IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)
 
 typedef struct
 {
-        int               kib_io_timeout;       /* comms timeout (seconds) */
+        unsigned int     *kib_service_number;   /* IB service number */
+        int              *kib_min_reconnect_interval; /* first failed connection retry... */
+        int              *kib_max_reconnect_interval; /* ...exponentially increasing to this */
+        int              *kib_concurrent_peers; /* max # nodes all talking to me */
+        int              *kib_cksum;            /* checksum kib_msg_t? */
+        int              *kib_timeout;          /* comms timeout (seconds) */
+        int              *kib_ntx;              /* # tx descs */
+        int              *kib_ntx_nblk;         /* # reserved tx descs */
+
         struct ctl_table_header *kib_sysctl;    /* sysctl interface */
 } kib_tunables_t;
 
@@ -201,11 +203,9 @@ typedef struct
         atomic_t          kib_nthreads;         /* # live threads */
         ptl_ni_t         *kib_ni;               /* _the_ nal instance */
 
-        __u64             kib_svc_id;           /* service number I listen on */
         vv_gid_t          kib_port_gid;         /* device/port GID */
         vv_p_key_t        kib_port_pkey;        /* device/port pkey */
         
-        struct semaphore  kib_nid_mutex;        /* serialise NID ops */
         cm_cep_handle_t   kib_listen_handle;    /* IB listen handle */
 
         rwlock_t          kib_global_lock;      /* stabilize peer/conn ops */
@@ -215,7 +215,7 @@ typedef struct
         
         struct list_head *kib_peers;            /* hash table of all my known peers */
         int               kib_peer_hash_size;   /* size of kib_peers */
-        atomic_t          kib_npeers;           /* # peers extant */
+        int               kib_npeers;           /* # peers extant */
         atomic_t          kib_nconns;           /* # connections extant */
 
         void             *kib_connd;            /* the connd task (serialisation assertions) */
@@ -434,10 +434,11 @@ ptl_err_t kibnal_recv_pages(ptl_ni_t *ni, void *private,
 extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob);
 extern void kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid,
                             __u64 dststamp, __u64 seq);
-extern int kibnal_unpack_msg(kib_msg_t *msg, int nob);
-extern kib_peer_t *kibnal_create_peer(ptl_nid_t nid);
+extern int  kibnal_unpack_msg(kib_msg_t *msg, int nob);
+extern int  kibnal_create_peer(kib_peer_t **peerp, ptl_nid_t nid);
 extern void kibnal_destroy_peer(kib_peer_t *peer);
-extern int kibnal_del_peer(ptl_nid_t nid);
+extern int  kibnal_add_persistent_peer (ptl_nid_t nid, __u32 ip);
+extern int  kibnal_del_peer(ptl_nid_t nid);
 extern kib_peer_t *kibnal_find_peer_locked(ptl_nid_t nid);
 extern void kibnal_unlink_peer_locked(kib_peer_t *peer);
 extern int  kibnal_close_stale_conns_locked(kib_peer_t *peer,
@@ -445,7 +446,7 @@ extern int  kibnal_close_stale_conns_locked(kib_peer_t *peer,
 extern kib_conn_t *kibnal_create_conn(cm_cep_handle_t cep);
 extern void kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *info, void *arg);
 
-extern int kibnal_alloc_pages(kib_pages_t **pp, int npages, int access);
+extern int  kibnal_alloc_pages(kib_pages_t **pp, int npages, int access);
 extern void kibnal_free_pages(kib_pages_t *p);
 
 extern void kibnal_check_sends(kib_conn_t *conn);
@@ -460,10 +461,11 @@ extern int  kibnal_set_qp_state(kib_conn_t *conn, vv_qp_state_t new_state);
 extern void kibnal_async_callback(vv_event_record_t ev);
 extern void kibnal_cq_callback(unsigned long context);
 extern void kibnal_passive_connreq(kib_pcreq_t *pcr, int reject);
-extern void kibnal_pause(int ticks);
 extern void kibnal_queue_tx(kib_tx_t *tx, kib_conn_t *conn);
 extern int  kibnal_init_rdma(kib_tx_t *tx, int type, int nob,
                              kib_rdma_desc_t *dstrd, __u64 dstcookie);
+extern int  kibnal_tunables_init(void);
+extern void kibnal_tunables_fini(void);
 
 static inline int
 wrq_signals_completion (vv_wr_t *wrq)
@@ -545,7 +547,7 @@ kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
                 LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE);
         }
         tx->tx_queued = 1;
-        tx->tx_deadline = jiffies + kibnal_tunables.kib_io_timeout * HZ;
+        tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ);
         list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
 }
 
index 6cea29c..c55671f 100644 (file)
@@ -528,6 +528,8 @@ kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
          * network address, given how it maps all phys mem into 1 region */
         addr = kibnal_page2phys(page) + page_offset + PAGE_OFFSET;
 
+        /* NB this relies entirely on there being a single region for the whole
+         * of memory, since "high" memory will wrap in the (void *) cast! */
         vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, 
                                     (void *)((unsigned long)addr),
                                     len, &mem_h, &l_key, &r_key);
@@ -576,7 +578,7 @@ kibnal_kvaddr_to_page (unsigned long vaddr)
 #if CONFIG_HIGHMEM
         if (vaddr >= PKMAP_BASE &&
             vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
-                /* No highmem pages only used for bulk (kiov) I/O */
+                /* Highmem pages only used for bulk (kiov) I/O */
                 CERROR("find page for address in highmem\n");
                 LBUG();
         }
@@ -693,6 +695,7 @@ kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd,
                      int niov, struct iovec *iov, int offset, int nob)
                  
 {
+#error  "check this thoroughly before enabling"
         /* active if I'm sending */
         int         active = ((access & vv_acc_r_mem_write) == 0);
         void       *vaddr;
@@ -742,6 +745,7 @@ kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd,
                       vv_access_con_bit_mask_t access,
                       int nkiov, ptl_kiov_t *kiov, int offset, int nob)
 {
+#error  "check this thoroughly before enabling"
         /* active if I'm sending */
         int            active = ((access & vv_acc_r_mem_write) == 0);
         vv_return_t    vvrc;
@@ -1225,6 +1229,8 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid)
         kib_conn_t      *conn;
         unsigned long    flags;
         rwlock_t        *g_lock = &kibnal_data.kib_global_lock;
+        int              retry;
+        int              rc;
 
         /* If I get here, I've committed to send, so I complete the tx with
          * failure on any problems */
@@ -1232,38 +1238,51 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid)
         LASSERT (tx->tx_conn == NULL);          /* only set when assigned a conn */
         LASSERT (tx->tx_nwrq > 0);              /* work items have been set up */
 
-        read_lock_irqsave(g_lock, flags);
+        for (retry = 0; ; retry = 1) {
+                read_lock_irqsave(g_lock, flags);
         
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
-                read_unlock_irqrestore(g_lock, flags);
-                tx->tx_status = -EHOSTUNREACH;
-                tx->tx_waiting = 0;
-                kibnal_tx_done (tx);
-                return;
-        }
+                peer = kibnal_find_peer_locked (nid);
+                if (peer != NULL) {
+                        conn = kibnal_find_conn_locked (peer);
+                        if (conn != NULL) {
+                                kibnal_conn_addref(conn); /* 1 ref for me... */
+                                read_unlock_irqrestore(g_lock, flags);
 
-        conn = kibnal_find_conn_locked (peer);
-        if (conn != NULL) {
-                kibnal_conn_addref(conn);       /* 1 ref for me... */
-                read_unlock_irqrestore(g_lock, flags);
+                                kibnal_queue_tx (tx, conn);
+                                kibnal_conn_decref(conn); /* ...to here */
+                                return;
+                        }
+                }
                 
-                kibnal_queue_tx (tx, conn);
-                kibnal_conn_decref(conn);       /* ...to here */
-                return;
-        }
-        
-        /* Making one or more connections; I'll need a write lock... */
-        read_unlock(g_lock);
-        write_lock(g_lock);
+                /* Making one or more connections; I'll need a write lock... */
+                read_unlock(g_lock);
+                write_lock(g_lock);
+
+                peer = kibnal_find_peer_locked (nid);
+                if (peer != NULL)
+                        break;
 
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
                 write_unlock_irqrestore(g_lock, flags);
-                tx->tx_status = -EHOSTUNREACH;
-                tx->tx_waiting = 0;
-                kibnal_tx_done (tx);
-                return;
+
+                if (retry) {
+                        CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
+
+                        tx->tx_status = -EHOSTUNREACH;
+                        tx->tx_waiting = 0;
+                        kibnal_tx_done (tx);
+                        return;
+                }
+
+                rc = kibnal_add_persistent_peer(nid, PTL_NIDADDR(nid));
+                if (rc != 0) {
+                        CERROR("Can't add peer %s: %d\n",
+                               libcfs_nid2str(nid), rc);
+                        
+                        tx->tx_status = -EHOSTUNREACH;
+                        tx->tx_waiting = 0;
+                        kibnal_tx_done (tx);
+                        return;
+                }
         }
 
         conn = kibnal_find_conn_locked (peer);
@@ -1888,7 +1907,6 @@ kibnal_peer_connect_failed (kib_peer_t *peer, int active)
         /* Only the connd creates conns => single threaded */
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
-        LASSERT (peer->ibp_reconnect_interval >= IBNAL_MIN_RECONNECT_INTERVAL);
 
         write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
 
@@ -1909,8 +1927,9 @@ kibnal_peer_connect_failed (kib_peer_t *peer, int active)
                 /* Say when active connection can be re-attempted */
                 peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval;
                 /* Increase reconnection interval */
-                peer->ibp_reconnect_interval = MIN (peer->ibp_reconnect_interval * 2,
-                                                    IBNAL_MAX_RECONNECT_INTERVAL);
+                peer->ibp_reconnect_interval = 
+                        MIN (peer->ibp_reconnect_interval * 2,
+                             *kibnal_tunables.kib_max_reconnect_interval * HZ);
         
                 /* Take peer's blocked transmits to complete with error */
                 list_add(&zombies, &peer->ibp_tx_queue);
@@ -1983,7 +2002,7 @@ kibnal_connreq_done(kib_conn_t *conn, int active, int status)
                 case IBNAL_CONN_ACTIVE_CONNECT:
                         LASSERT (active);
                         cm_cancel(conn->ibc_cep);
-                        kibnal_pause(HZ/10);
+                        libcfs_pause(cfs_time_seconds(1)/10);
                         /* cm_connect() failed immediately or
                          * callback returned failure */
                         break;
@@ -2062,7 +2081,8 @@ kibnal_connreq_done(kib_conn_t *conn, int active, int status)
         list_del_init(&peer->ibp_tx_queue);
         
         /* reset reconnect interval for next attempt */
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+        peer->ibp_reconnect_interval = 
+                *kibnal_tunables.kib_min_reconnect_interval * HZ;
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
         /* Schedule blocked txs */
@@ -2221,9 +2241,9 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq)
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
 
-        if (cmreq->sid != IBNAL_SERVICE_NUMBER) {
+        if (cmreq->sid != (__u64)(*kibnal_tunables.kib_service_number)) {
                 CERROR(LPX64" != IBNAL_SERVICE_NUMBER("LPX64")\n",
-                       cmreq->sid, (__u64)IBNAL_SERVICE_NUMBER);
+                       cmreq->sid, (__u64)(*kibnal_tunables.kib_service_number));
                 goto reject;
         }
 
@@ -2277,8 +2297,8 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq)
         }
         
         /* assume 'rxmsg.ibm_srcnid' is a new peer */
-        tmp_peer = kibnal_create_peer (rxmsg.ibm_srcnid);
-        if (tmp_peer == NULL) {
+        rc = kibnal_create_peer (&tmp_peer, rxmsg.ibm_srcnid);
+        if (rc != 0) {
                 CERROR("Can't create tmp peer for "LPX64"\n", rxmsg.ibm_srcnid);
                 kibnal_conn_decref(conn);
                 conn = NULL;
@@ -2443,7 +2463,7 @@ kibnal_connect_conn (kib_conn_t *conn)
 
         memset(&cmreq, 0, sizeof(cmreq));
         
-        cmreq.sid = IBNAL_SERVICE_NUMBER;
+        cmreq.sid = (__u64)(*kibnal_tunables.kib_service_number);
 
         cmreq.cep_data.ca_guid              = kibnal_data.kib_hca_attrs.guid;
         cmreq.cep_data.qpn                  = cv->cv_local_qpn;
@@ -2957,7 +2977,7 @@ kibnal_disconnect_conn (kib_conn_t *conn)
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
         cm_cancel(conn->ibc_cep);
-        kibnal_pause(HZ/10);
+        libcfs_pause(cfs_time_seconds(1)/10);
 
         if (!conn->ibc_disconnect)              /* CM callback will never happen now */
                 kibnal_conn_decref(conn);
@@ -3087,9 +3107,9 @@ kibnal_connd (void *arg)
                          * connection within (n+1)/n times the timeout
                          * interval. */
 
-                        if (kibnal_tunables.kib_io_timeout > n * p)
+                        if (*kibnal_tunables.kib_timeout > n * p)
                                 chunk = (chunk * n * p) / 
-                                        kibnal_tunables.kib_io_timeout;
+                                        *kibnal_tunables.kib_timeout;
                         if (chunk == 0)
                                 chunk = 1;
 
diff --git a/lnet/klnds/viblnd/viblnd_modparams.c b/lnet/klnds/viblnd/viblnd_modparams.c
new file mode 100644 (file)
index 0000000..b084d48
--- /dev/null
@@ -0,0 +1,133 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "vibnal.h"
+
+static int service_number = IBNAL_SERVICE_NUMBER;
+CFS_MODULE_PARM(service_number, "i", int, 0444,
+                "IB service number");
+
+static int min_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
+CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
+               "minimum connection retry interval (seconds)");
+
+static int max_reconnect_interval = IBNAL_MAX_RECONNECT_INTERVAL;
+CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
+               "maximum connection retry interval (seconds)");
+
+static int concurrent_peers = IBNAL_CONCURRENT_PEERS;
+CFS_MODULE_PARM(concurrent_peers, "i", int, 0444,
+               "maximum number of peers that may connect");
+
+static int cksum = IBNAL_CKSUM;
+CFS_MODULE_PARM(cksum, "i", int, 0644,
+               "set non-zero to enable message (not RDMA) checksums");
+
+static int timeout = IBNAL_TIMEOUT;
+CFS_MODULE_PARM(timeout, "i", int, 0644,
+               "timeout (seconds)");
+
+static int ntx = IBNAL_NTX;
+CFS_MODULE_PARM(ntx, "i", int, 0444,
+               "# of 'normal' message descriptors");
+
+static int ntx_nblk = IBNAL_NTX_NBLK;
+CFS_MODULE_PARM(ntx_nblk, "i", int, 0444,
+               "# of 'reserved' message descriptors");
+
+kib_tunables_t kibnal_tunables = {
+        .kib_service_number         = &service_number,
+        .kib_min_reconnect_interval = &min_reconnect_interval,
+        .kib_max_reconnect_interval = &max_reconnect_interval,
+        .kib_concurrent_peers       = &concurrent_peers,
+        .kib_cksum                  = &cksum,
+        .kib_timeout                = &timeout,
+        .kib_ntx                    = &ntx,
+        .kib_ntx_nblk               = &ntx_nblk,
+};
+
+#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
+static ctl_table kibnal_ctl_table[] = {
+       {1, "service_number", &service_number, 
+        sizeof(int), 0444, NULL, &proc_dointvec},
+       {2, "min_reconnect_interval", &min_reconnect_interval, 
+        sizeof(int), 0644, NULL, &proc_dointvec},
+       {3, "max_reconnect_interval", &max_reconnect_interval, 
+        sizeof(int), 0644, NULL, &proc_dointvec},
+       {4, "concurrent_peers", &concurrent_peers, 
+        sizeof(int), 0444, NULL, &proc_dointvec},
+       {5, "cksum", &cksum, 
+        sizeof(int), 0644, NULL, &proc_dointvec},
+       {6, "timeout", &timeout, 
+        sizeof(int), 0644, NULL, &proc_dointvec},
+       {7, "ntx", &ntx, 
+        sizeof(int), 0444, NULL, &proc_dointvec},
+       {8, "ntx_nblk", &ntx_nblk, 
+        sizeof(int), 0444, NULL, &proc_dointvec},
+       {0}
+};
+
+static ctl_table kibnal_top_ctl_table[] = {
+       {203, "vibnal", NULL, 0, 0555, kibnal_ctl_table},
+       {0}
+};
+
+int
+kibnal_tunables_init ()
+{
+       kibnal_tunables.kib_sysctl =
+               register_sysctl_table(kibnal_top_ctl_table, 0);
+       
+       if (kibnal_tunables.kib_sysctl == NULL)
+               CWARN("Can't setup /proc tunables\n");
+
+       return 0;
+}
+
+void
+kibnal_tunables_fini ()
+{
+       if (kibnal_tunables.kib_sysctl != NULL)
+               unregister_sysctl_table(kibnal_tunables.kib_sysctl);
+}
+
+#else
+
+int
+kibnal_tunables_init ()
+{
+       return 0;
+}
+
+void
+kibnal_tunables_fini ()
+{
+}
+
+#endif
+       
+               
+               
+
+       
+