Whamcloud - gitweb
Update from b_hd_newconfig (b1_4)
authorgreen <green>
Mon, 5 Dec 2005 12:37:10 +0000 (12:37 +0000)
committergreen <green>
Mon, 5 Dec 2005 12:37:10 +0000 (12:37 +0000)
59 files changed:
lnet/ChangeLog
lnet/autoconf/lustre-lnet.m4
lnet/include/libcfs/darwin/kp30.h
lnet/include/libcfs/linux/kp30.h
lnet/include/libcfs/linux/linux-fs.h
lnet/include/libcfs/list.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/linux/lib-lnet.h
lnet/include/lnet/lnetctl.h
lnet/include/lnet/types.h
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/gmlnd/gmlnd_utils.c
lnet/klnds/iiblnd/autoMakefile.am
lnet/klnds/iiblnd/iiblnd.c
lnet/klnds/iiblnd/iiblnd.h
lnet/klnds/iiblnd/iiblnd_cb.c
lnet/klnds/openiblnd/autoMakefile.am
lnet/klnds/openiblnd/openiblnd.c
lnet/klnds/openiblnd/openiblnd.h
lnet/klnds/openiblnd/openiblnd_cb.c
lnet/klnds/ptllnd/autoMakefile.am
lnet/klnds/ptllnd/ptllnd.h
lnet/klnds/ptllnd/ptllnd_cb.c
lnet/klnds/ptllnd/ptllnd_rx_buf.c
lnet/klnds/qswlnd/autoMakefile.am
lnet/klnds/ralnd/autoMakefile.am
lnet/klnds/ralnd/ralnd.c
lnet/klnds/ralnd/ralnd.h
lnet/klnds/ralnd/ralnd_cb.c
lnet/klnds/socklnd/autoMakefile.am
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/klnds/socklnd/socklnd_modparams.c
lnet/klnds/viblnd/autoMakefile.am
lnet/klnds/viblnd/viblnd.h
lnet/klnds/viblnd/viblnd_cb.c
lnet/libcfs/autoMakefile.am
lnet/libcfs/debug.c
lnet/libcfs/linux/linux-proc.c
lnet/libcfs/module.c
lnet/libcfs/tracefile.c
lnet/lnet/api-ni.c
lnet/lnet/autoMakefile.am
lnet/lnet/router.c
lnet/tests/autoMakefile.am
lnet/ulnds/ptllnd/ptllnd.c
lnet/ulnds/ptllnd/ptllnd.h
lnet/ulnds/ptllnd/ptllnd_cb.c
lnet/ulnds/socklnd/connection.c
lnet/ulnds/socklnd/connection.h
lnet/ulnds/socklnd/procapi.c
lnet/ulnds/socklnd/procbridge.h
lnet/ulnds/socklnd/tcplnd.c
lnet/utils/Makefile.am
lnet/utils/debug.c
lnet/utils/l_ioctl.c
lnet/utils/portals.c

index 582ac96..44c216f 100644 (file)
           (LNDS) for the supported network fabrics have also been created
           for this new infrastructure.
        
+2005-08-08  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.4.4
+       * bug fixes
+
+Severity   : major
+Frequency  : rare (large Voltaire clusters only)
+Bugzilla   : 6993
+Description: the default number of reserved transmit descriptors was too low
+            for some large clusters
+Details    : As a workaround, the number was increased.  A proper fix includes
+            a run-time tunable.
+
+2005-06-02  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.4.3
+       * bug fixes
+
+Severity   : major
+Frequency  : occasional (large-scale events, cluster reboot, network failure)
+Bugzilla   : 6411
+Description: too many error messages on console obscure actual problem and
+            can slow down/panic server, or cause recovery to fail repeatedly
+Details    : enable rate-limiting of console error messages, and some messages
+            that were console errors now only go to the kernel log
+
+Severity   : enhancement
+Bugzilla   : 1693
+Description: add /proc/sys/portals/catastrophe entry which will report if
+            that node has previously LBUGged
+
 2005-04-06  Cluster File Systems, Inc. <info@clusterfs.com>
        * bugs
        - update gmnal to use PTL_MTU, fix module refcounting (b=5786)
index bd8d455..bda8459 100644 (file)
@@ -62,11 +62,11 @@ else
                #include <linux/sched.h>
        ],[
                struct task_struct t;
-               #ifdef CPU_ARRAY_SIZE
-               cpumask_t m;
-               #else
-               unsigned long m;
-               #endif
+               #if HAVE_CPUMASK_T
+               cpumask_t     m;
+               #else
+               unsigned long m;
+               #endif
                set_cpus_allowed(&t, m);
        ],[
                AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
@@ -534,27 +534,6 @@ if test -n "$VIBLND"; then
        ],[
                AC_MSG_RESULT([no])
        ])
-       AC_MSG_CHECKING([if page_to_phys() must avoid sign extension])
-       LB_LINUX_TRY_COMPILE([
-               #include <linux/kernel.h>
-               #include <linux/mm.h>
-               #include <linux/unistd.h>
-               #include <asm/system.h>
-               #include <asm/io.h>
-       ],[
-               struct page p;
-
-               switch (42) {
-               case 0:
-               case (sizeof(typeof(page_to_phys(&p))) < 8):
-                       break;
-               }
-       ],[
-               AC_MSG_RESULT([yes])
-               VIBCPPFLAGS="$VIBCPPFLAGS -DIBNAL_32BIT_PAGE2PHYS=1"
-       ],[
-               AC_MSG_RESULT([no])
-       ])
        EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
 fi
 AC_SUBST(VIBCPPFLAGS)
@@ -704,6 +683,8 @@ fi
 #
 AC_DEFUN([LN_PROG_LINUX],
 [LN_CONFIG_ZEROCOPY
+LN_FUNC_CPU_ONLINE
+LN_TYPE_CPUMASK_T
 LN_CONFIG_AFFINITY
 LN_CONFIG_QUADRICS
 LN_CONFIG_GM
@@ -715,8 +696,6 @@ LN_CONFIG_PTLLND
 
 LN_STRUCT_PAGE_LIST
 LN_STRUCT_SIGHAND
-LN_FUNC_CPU_ONLINE
-LN_TYPE_CPUMASK_T
 LN_FUNC_SHOW_TASK
 ])
 
@@ -815,9 +794,9 @@ if test "$enable_libpthread" = "yes" ; then
                [ENABLE_LIBPTHREAD="yes"],
                [ENABLE_LIBPTHREAD="no"])
        if test "$ENABLE_LIBPTHREAD" = "yes" ; then
-               AC_MSG_RESULT([no libpthread is found])
-       else
                AC_MSG_RESULT([$ENABLE_LIBPTHREAD])
+       else
+               AC_MSG_RESULT([no libpthread is found])
        fi
 else
        AC_MSG_RESULT([no (disabled explicitly)])
@@ -855,7 +834,9 @@ if test x$enable_liblustre = xyes ; then
                        CAP_LIBS="-lcap"
                        AC_DEFINE([HAVE_LIBCAP], 1, [use libcap])
                ],
-               [CAP_LIBS=""])
+               [
+                       CAP_LIBS=""
+               ])
        AC_SUBST(CAP_LIBS)
 
        if test "$ENABLE_LIBPTHREAD" = "yes" ; then
index 7188a4a..4b2e94f 100644 (file)
@@ -31,6 +31,7 @@
 #else
 #define LASSERT_SPIN_LOCKED(lock) do {} while(0)
 #endif
+#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */
 
 #define LBUG_WITH_LOC(file, func, line)    do {libcfs_catastrophe = 1;} while(0)
 
index 600226c..017ca73 100644 (file)
@@ -89,6 +89,7 @@ static inline void our_cond_resched(void)
 #else
 #define LASSERT_SPIN_LOCKED(lock) do {} while(0)
 #endif
+#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0)
 
 #ifdef __arch_um__
 #define LBUG_WITH_LOC(file, func, line)                                 \
@@ -172,7 +173,7 @@ do {                                                                    \
 #else  /* !__KERNEL__ */
 # include <stdio.h>
 # include <stdlib.h>
-#ifdef CRAY_XT3
+#if CRAY_XT3
 # include <ioctl.h>
 #elif defined(__CYGWIN__)
 # include <cygwin-ioctl.h>
@@ -322,6 +323,7 @@ extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
 #endif
 
 #if (defined(__x86_64__) && defined(__KERNEL__))
+/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
index d7fd1cf..b046999 100644 (file)
@@ -66,7 +66,6 @@ typedef struct file_lock cfs_flock_t;
 #define CFS_FLOCK_SET_END(fl, end)          do { (fl)->fl_end = (end); } while(0)
 
 ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset);
-
 #endif
 
 #endif
index 7bd40d6..badbd69 100644 (file)
@@ -221,6 +221,9 @@ static inline void list_splice_init(struct list_head *list,
        for (pos = (head)->next, n = pos->next; pos != (head); \
                pos = n, n = pos->next)
 
+#define hlist_head     list_head
+#define hlist_node     list_head
+
 #endif /* __linux__*/
 
 #ifndef list_for_each_prev
index 124999a..3f4e93c 100644 (file)
@@ -174,6 +174,9 @@ lnet_msg_alloc (void)
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
                 memset (msg, 0, sizeof (*msg));
+#if CRAY_XT3
+                msg->msg_ev.uid = LNET_UID_ANY;
+#endif
         }
         return(msg);
 }
@@ -275,6 +278,9 @@ lnet_msg_alloc(void)
         if (msg != NULL) {
                 /* NULL pointers, clear flags etc */
                 memset (msg, 0, sizeof (*msg));
+#if CRAY_XT3
+                msg->msg_ev.uid = LNET_UID_ANY;
+#endif
         }
         return (msg);
 }
index 81ab97a..8494198 100644 (file)
 #ifdef __KERNEL__
 # include <asm/page.h>
 # include <linux/string.h>
-#else
+# include <asm/io.h>
+# include <libcfs/kp30.h>
+
+static inline __u64
+lnet_page2phys (struct page *p)
+{
+        /* compiler optimizer will elide unused branches */
+
+        switch (sizeof(typeof(page_to_phys(p)))) {
+        case 4:
+                /* page_to_phys returns a 32 bit physical address.  This must
+                 * be a 32 bit machine with <= 4G memory and we must ensure we
+                 * don't sign extend when converting to 64 bits. */
+                return (unsigned long)page_to_phys(p);
+
+        case 8:
+                /* page_to_phys returns a 64 bit physical address :) */
+                return page_to_phys(p);
+                
+        default:
+                LBUG();
+                return 0;
+        }
+}
+
+#else  /* __KERNEL__ */
 # include <libcfs/list.h>
 # include <string.h>
-#ifdef HAVE_LIBPTHREAD
-# include <pthread.h>
-#endif
+# ifdef HAVE_LIBPTHREAD
+#  include <pthread.h>
+# endif
 #endif
 
-#endif
+#endif /* __LNET_LINUX_LIB_LNET_H__ */
index b14e484..4ff635e 100644 (file)
@@ -40,6 +40,7 @@
 
 int ptl_initialize(int argc, char **argv);
 int jt_ptl_network(int argc, char **argv);
+int jt_ptl_get_nids(__u64 **nid_list);
 int jt_ptl_list_nids(int argc, char **argv);
 int jt_ptl_which_nid(int argc, char **argv);
 int jt_ptl_print_interfaces(int argc, char **argv);
index 34c97b3..c043ee2 100644 (file)
@@ -9,6 +9,11 @@ typedef __u32 lnet_pid_t;
 #define LNET_NID_ANY      ((lnet_nid_t) -1)
 #define LNET_PID_ANY      ((lnet_pid_t) -1)
 
+#if CRAY_XT3
+typedef __u32 lnet_uid_t;
+#define LNET_UID_ANY      ((lnet_uid_t) -1)
+#endif
+
 #define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
 #define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
 
@@ -110,6 +115,9 @@ typedef struct {
         lnet_event_kind_t   type;
        lnet_process_id_t   target;
         lnet_process_id_t   initiator;
+#if CRAY_XT3
+       lnet_uid_t          uid;
+#endif
         unsigned int        pt_index;
         __u64               match_bits;
         unsigned int        rlength;
index 209ae4a..9a46978 100644 (file)
@@ -71,8 +71,6 @@ gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
         lnet_hdr_t       *hdr= &lntmsg->msg_hdr;
         int               type = lntmsg->msg_type;
         lnet_process_id_t target = lntmsg->msg_target;
-        int               target_is_router = lntmsg->msg_target_is_router;
-        int               routing = lntmsg->msg_routing;
         unsigned int      niov = lntmsg->msg_niov;
         struct iovec     *iov = lntmsg->msg_iov;
         lnet_kiov_t      *kiov = lntmsg->msg_kiov;
index a4e19f0..217bed9 100644 (file)
@@ -149,7 +149,6 @@ gmnal_tx_done(gmnal_tx_t *tx, int rc)
 {
        gmnal_ni_t *gmni = tx->tx_gmni;
         int         wake_sched = 0;
-        int         wake_idle = 0;
         
         LASSERT(tx->tx_lntmsg == NULL);
 
index 3bf4493..e70b185 100644 (file)
@@ -52,14 +52,16 @@ gmnal_alloc_netbuf_pages (gmnal_ni_t *gmni, gmnal_netbuf_t *nb, int npages)
 
                 CDEBUG(D_NET,"[%3d] page %p, phys "LPX64", @ "LPX64"\n",
                        i, nb->nb_kiov[i].kiov_page, 
-                       (__u64)page_to_phys(nb->nb_kiov[i].kiov_page),
+                       lnet_page2phys(nb->nb_kiov[i].kiov_page),
                        gmni->gmni_netaddr_base);
 
-                gmrc = gm_register_memory_ex_phys(gmni->gmni_port,
-                                                  page_to_phys(nb->nb_kiov[i].kiov_page),
-                                                  PAGE_SIZE,
-                                                  gmni->gmni_netaddr_base);
-                CDEBUG(D_NET,"[%3d] page %p: %d\n", i, nb->nb_kiov[i].kiov_page, gmrc);
+                gmrc = gm_register_memory_ex_phys(
+                        gmni->gmni_port,
+                        lnet_page2phys(nb->nb_kiov[i].kiov_page),
+                        PAGE_SIZE,
+                        gmni->gmni_netaddr_base);
+                CDEBUG(D_NET,"[%3d] page %p: %d\n", 
+                       i, nb->nb_kiov[i].kiov_page, gmrc);
 
                 if (gmrc != GM_SUCCESS) {
                         CERROR("Can't map page: %d(%s)\n", gmrc,
index 016e333..d08d079 100644 (file)
@@ -9,5 +9,5 @@ modulenet_DATA = kiiblnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kiiblnd-objs:%.o=%.c) iiblnd.h
index ff04b78..3a947c5 100644 (file)
@@ -1089,7 +1089,7 @@ kibnal_create_conn (lnet_nid_t nid)
                              page_offset);
 
                 rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
-                                 kibnal_page2phys(page) + page_offset;
+                                 lnet_page2phys(page) + page_offset;
                 
                 page_offset += IBNAL_MSG_SIZE;
                 LASSERT (page_offset <= PAGE_SIZE);
@@ -1529,7 +1529,7 @@ kibnal_setup_tx_descs (void)
                                             page_offset);
 
                 tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
-                                 kibnal_page2phys(page) + page_offset;
+                                 lnet_page2phys(page) + page_offset;
 
                 CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", 
                        i, tx, tx->tx_msg, tx->tx_hca_msg);
index 1b281a3..69761c9 100644 (file)
@@ -600,13 +600,6 @@ kibnal_show_rdma_attr (kib_conn_t *conn)
 }
 #endif
 
-static inline __u64
-kibnal_page2phys (struct page *p)
-{
-        return page_to_phys(p);
-}
-
-
 /* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the
  * lowest 2 bits of the work request id to stash the work item type (the op
  * field is not valid when the wc completes in error). */
index ee6b4e2..a4b21a2 100644 (file)
@@ -536,7 +536,7 @@ kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
 
         frag->rf_nob  = len;
         frag->rf_addr = kibnal_data.kib_whole_mem.md_addr +
-                        kibnal_page2phys(page) + page_offset;
+                        lnet_page2phys(page) + page_offset;
 
         CDEBUG(D_NET,"map key %x frag [%d]["LPX64" for %d]\n", 
                rd->rd_key, rd->rd_nfrag, frag->rf_addr, frag->rf_nob);
@@ -717,7 +717,7 @@ kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
                         return -EFAULT;
                 }
 
-                tx->tx_pages[npages++] = kibnal_page2phys(page);
+                tx->tx_pages[npages++] = lnet_page2phys(page);
 
                 fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
                 vaddr += fragnob;
@@ -771,7 +771,7 @@ kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
                         return -EINVAL;
                 }
 
-                tx->tx_pages[npages++] = kibnal_page2phys(kiov->kiov_page);
+                tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
                 resid -= kiov->kiov_len;
                 kiov++;
                 nkiov--;
@@ -837,7 +837,7 @@ kibnal_check_sends (kib_conn_t *conn)
                 if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) {
                         list_del (&tx->tx_list);
                         tx->tx_queued = 0;
-                        tx->tx_status -ECONNABORTED;
+                        tx->tx_status -ECONNABORTED;
                         tx->tx_waiting = 0;
                         done = (tx->tx_sending == 0);
                         if (!done)
index 0124f85..b4e0fb7 100644 (file)
@@ -9,5 +9,5 @@ modulenet_DATA = kopeniblnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kopeniblnd-objs:%.o=%.c) openiblnd.h
index 7a9da5e..a7c4bcd 100644 (file)
@@ -300,7 +300,6 @@ kibnal_handle_svcqry (struct socket *sock)
         __u32                peer_ip;
         unsigned int         peer_port;
         kib_msg_t           *msg;
-        __u32                magic;
         __u64                srcnid;
         __u64                srcstamp;
         int                  rc;
@@ -406,7 +405,6 @@ int
 kibnal_accept(lnet_ni_t *ni, struct socket *sock)
 {
         kib_acceptsock_t  *as;
-        int                rc;
         unsigned long      flags;
 
         LIBCFS_ALLOC(as, sizeof(*as));
@@ -539,8 +537,6 @@ kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
 void
 kibnal_destroy_peer (kib_peer_t *peer)
 {
-        unsigned long flags;
-
         CDEBUG (D_NET, "peer %s %p deleted\n", 
                 libcfs_nid2str(peer->ibp_nid), peer);
 
@@ -760,7 +756,7 @@ kibnal_del_peer (lnet_nid_t nid)
                         rc = 0;         /* matched something */
                 }
         }
- out:
+
         write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
 
         return (rc);
@@ -1213,7 +1209,7 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
         for (i = 0; i < npages; i++) {
                 phys_pages[i].size = PAGE_SIZE;
                 phys_pages[i].address =
-                        kibnal_page2phys(p->ibp_pages[i]);
+                        lnet_page2phys(p->ibp_pages[i]);
         }
 
         p->ibp_vaddr = 0;
index 52a55f3..b4e6bcb 100644 (file)
@@ -455,12 +455,6 @@ kibnal_show_rdma_attr (kib_conn_t *conn)
 }
 #endif
 
-static inline __u64
-kibnal_page2phys (struct page *p)
-{
-        return page_to_phys(p);
-}
-
 /* CAVEAT EMPTOR:
  * We rely on tx/rx descriptor alignment to allow us to use the lowest bit
  * of the work request id as a flag to determine if the completion is for a
index bee44c5..afa5ace 100644 (file)
@@ -50,6 +50,12 @@ kibnal_tx_done (kib_tx_t *tx)
         LASSERT (tx->tx_sending == 0);          /* mustn't be awaiting callback */
         LASSERT (!tx->tx_passive_rdma_wait);    /* mustn't be awaiting RDMA */
 
+        if (in_interrupt()) {
+                /* can't deregister memory/flush FMAs/finalize in IRQ context... */
+                kibnal_schedule_tx_done(tx);
+                return;
+        }
+
         switch (tx->tx_mapped) {
         default:
                 LBUG();
@@ -58,11 +64,6 @@ kibnal_tx_done (kib_tx_t *tx)
                 break;
                 
         case KIB_TX_MAPPED:
-                if (in_interrupt()) {
-                        /* can't deregister memory in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }
                 rc = ib_memory_deregister(tx->tx_md.md_handle.mr);
                 LASSERT (rc == 0);
                 tx->tx_mapped = KIB_TX_UNMAPPED;
@@ -70,12 +71,6 @@ kibnal_tx_done (kib_tx_t *tx)
 
 #if IBNAL_FMR
         case KIB_TX_MAPPED_FMR:
-                if (in_interrupt() && tx->tx_status != 0) {
-                        /* can't flush FMRs in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }              
-
                 rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr);
                 LASSERT (rc == 0);
 
@@ -422,7 +417,7 @@ kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp)
             !VALID_PAGE (page))
                 return (-EFAULT);
 
-        *physp = kibnal_page2phys(page) + (vaddr & (PAGE_SIZE - 1));
+        *physp = lnet_page2phys(page) + (vaddr & (PAGE_SIZE - 1));
         return (0);
 }
 #endif
@@ -510,9 +505,9 @@ kibnal_map_kiov (kib_tx_t *tx, enum ib_memory_access access,
 
         page_offset = kiov->kiov_offset + offset;
 #if IBNAL_FMR
-        phys[0] = kibnal_page2phys(kiov->kiov_page);
+        phys[0] = lnet_page2phys(kiov->kiov_page);
 #else
-        phys[0].address = kibnal_page2phys(kiov->kiov_page);
+        phys[0].address = lnet_page2phys(kiov->kiov_page);
         phys[0].size = PAGE_SIZE;
 #endif
         nphys = 1;
@@ -550,9 +545,9 @@ kibnal_map_kiov (kib_tx_t *tx, enum ib_memory_access access,
 
                 LASSERT (nphys * sizeof (*phys) < phys_size);
 #if IBNAL_FMR
-                phys[nphys] = kibnal_page2phys(kiov->kiov_page);
+                phys[nphys] = lnet_page2phys(kiov->kiov_page);
 #else
-                phys[nphys].address = kibnal_page2phys(kiov->kiov_page);
+                phys[nphys].address = lnet_page2phys(kiov->kiov_page);
                 phys[nphys].size = PAGE_SIZE;
 #endif
                 nphys++;
@@ -978,10 +973,11 @@ kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid)
 }
 
 int
-kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg)
+kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg,
+                           int niov, struct iovec *iov, lnet_kiov_t *kiov,
+                           int nob)
 {
         lnet_nid_t  nid = lntmsg->msg_target.nid;
-        int         nob = lntmsg->msg_md->md_length;
         kib_tx_t   *tx;
         kib_msg_t  *ibmsg;
         int         rc;
@@ -1007,16 +1003,11 @@ kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg)
                 return -ENOMEM;
         }
 
-        if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) 
-                rc = kibnal_map_iov (tx, access,
-                                     lntmsg->msg_md->md_niov,
-                                     lntmsg->msg_md->md_iov.iov,
-                                     0, nob);
+        
+        if (iov != NULL) 
+                rc = kibnal_map_iov (tx, access, niov, iov, 0, nob);
         else
-                rc = kibnal_map_kiov (tx, access,
-                                      lntmsg->msg_md->md_niov, 
-                                      lntmsg->msg_md->md_iov.kiov,
-                                      0, nob);
+                rc = kibnal_map_kiov (tx, access, niov, kiov, 0, nob);
 
         if (rc != 0) {
                 CERROR ("Can't map RDMA for %s: %d\n", 
@@ -1244,7 +1235,16 @@ kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
                 if (nob <= IBNAL_MSG_SIZE)
                         break;                  /* send IMMEDIATE */
 
-                return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg);
+                if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
+                        return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, 
+                                                         lntmsg->msg_md->md_niov, 
+                                                         lntmsg->msg_md->md_iov.iov, NULL,
+                                                         lntmsg->msg_md->md_length);
+
+                return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, 
+                                                 lntmsg->msg_md->md_niov, 
+                                                 NULL, lntmsg->msg_md->md_iov.kiov,
+                                                 lntmsg->msg_md->md_length);
 
         case LNET_MSG_REPLY: {
                 /* reply's 'private' is the incoming receive */
@@ -1277,7 +1277,10 @@ kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
                 if (nob <= IBNAL_MSG_SIZE)
                         break;                  /* send IMMEDIATE */
                 
-                return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, lntmsg);
+                return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, lntmsg,
+                                                 payload_niov,
+                                                 payload_iov, payload_kiov,
+                                                 payload_nob);
         }
 
         /* Send IMMEDIATE */
@@ -1781,7 +1784,7 @@ kibnal_conn_callback (tTS_IB_CM_EVENT event,
                 break;
                 
         case TS_IB_CM_DISCONNECTED:
-                CDEBUG(D_WARNING, "Connection %p -> %s DISCONNECTED.\n",
+                CWARN("Connection %p -> %s DISCONNECTED.\n",
                        conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
                 kibnal_close_conn (conn, 0);
                 break;
@@ -1917,7 +1920,7 @@ kibnal_passive_conn_callback (tTS_IB_CM_EVENT event,
 
         case TS_IB_CM_ESTABLISHED:
                 LASSERT (conn != NULL);
-                CDEBUG(D_WARNING, "Connection %p -> %s ESTABLISHED.\n",
+                CWARN("Connection %p -> %s ESTABLISHED.\n",
                        conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
 
                 kibnal_connreq_done(conn, 0);
@@ -1989,7 +1992,7 @@ kibnal_active_conn_callback (tTS_IB_CM_EVENT event,
         }
 
         case TS_IB_CM_ESTABLISHED:
-                CDEBUG(D_WARNING, "Connection %p -> %s ESTABLISHED\n",
+                CWARN("Connection %p -> %s ESTABLISHED\n",
                        conn, libcfs_nid2str(conn->ibc_peer->ibp_nid));
 
                 kibnal_connreq_done(conn, 0);
index 07a36ec..bd8cc9c 100755 (executable)
@@ -4,5 +4,5 @@ modulenet_DATA = kptllnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kptllnd-objs:%.o=%.c) ptllnd.h
index 6203c2e..2559f8b 100755 (executable)
@@ -138,7 +138,10 @@ typedef struct kptl_rx                          /* receive message */
         kptl_rx_buffer_t       *rx_rxb;         /* the rx buffer pointer */
         kptl_msg_t             *rx_msg;
         int                     rx_nob;         /* the number of bytes rcvd */
-        ptl_process_id_t        rx_initiator;   /* who send the packet */
+        ptl_process_id_t        rx_initiator;   /* sender's address */
+#if CRAY_XT3
+        ptl_uid_t               rx_uid;         /* sender's uid */
+#endif
         kptl_peer_t            *rx_peer;        /* pointer to peer */
         size_t                  rx_payload[0];  /* payload */
 } kptl_rx_t;
index 325568a..4762d62 100644 (file)
@@ -155,23 +155,27 @@ kptllnd_setup_md(
                         LASSERT (payload_niov > 0);
                 }
 
-                while(payload_nob){
-
-                        LASSERT( payload_offset < payload_kiov->kiov_len);
+                while (payload_nob > 0) {
+                        __u64 phys_page = lnet_page2phys(payload_kiov->kiov_page);
+                        __u64 phys      = phys_page + 
+                                          payload_kiov->kiov_offset + 
+                                          payload_offset;
+                        int   nob = min((int)(payload_kiov->kiov_len - payload_offset),
+                                        (int)payload_nob);
+                        
+                        LASSERT (payload_offset < payload_kiov->kiov_len);
                         LASSERT (payload_niov > 0);
                         LASSERT (niov < sizeof(tempiov->iov)/sizeof(tempiov->iov[0]));
+                        LASSERT (sizeof(void *) > 4 || 
+                                 (phys <= 0xffffffffULL &&
+                                  phys + (nob - 1) <= 0xffffffffULL));
 
-                        PJK_UT_MSG("kiov_page  [%d]=%p (phys)\n",niov,(void*)page_to_phys(payload_kiov->kiov_page));
+                        PJK_UT_MSG("kiov_page  [%d]="LPX64" (phys)\n",niov,phys_page);
                         PJK_UT_MSG("kiov_offset[%d]=%d (phys)\n",niov,payload_kiov->kiov_offset);
                         PJK_UT_MSG("kiov_len   [%d]=%d (phys)\n",niov,payload_kiov->kiov_len);
 
-                        tempiov->iov[niov].iov_base = (void *)(
-                                page_to_phys(payload_kiov->kiov_page) +
-                                payload_kiov->kiov_offset +
-                                payload_offset);
-                        tempiov->iov[niov].iov_len = min(
-                                (int)(payload_kiov->kiov_len - payload_offset),
-                                (int)payload_nob);
+                        tempiov->iov[niov].iov_base = (void *)((unsigned long)phys);
+                        tempiov->iov[niov].iov_len = nob;
 
                         PJK_UT_MSG("iov_base[%d]=%p\n",niov,tempiov->iov[niov].iov_base);
                         PJK_UT_MSG("iov_len [%d]=%d\n",niov,tempiov->iov[niov].iov_len);
@@ -728,6 +732,20 @@ int kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
         if(delayed)
                 STAT_UPDATE(kps_recv_delayed);
 
+#if CRAY_XT3
+        if (lntmsg != NULL) {
+                LASSERT (lntmsg->msg_ev.uid == LNET_UID_ANY);
+
+                /* Set the UID if the sender's uid isn't 0; i.e. non-root
+                 * running in userspace (e.g. a catamount node; linux kernel
+                 * senders, including routers have uid 0).  If this is a lustre
+                 * RPC request, this tells lustre not to trust the creds in the
+                 * RPC message body. */
+
+                if (rx->rx_uid != 0)
+                        lntmsg->msg_ev.uid = rx->rx_uid;
+        }
+#endif
         switch(rxmsg->ptlm_type)
         {
         default:
index cca8459..55ecc16 100644 (file)
@@ -589,7 +589,9 @@ kptllnd_rx_buffer_callback(ptl_event_t *ev)
         rx->rx_rxb = rxb;
         rx->rx_nob = nob;
         rx->rx_initiator = ev->initiator;
-
+#if CRAY_XT3
+        rx->rx_uid = ev->uid;
+#endif
         kptllnd_rx_schedule(rx);
 
         if(!rxbp->rxbp_shutdown){
index c9560c7..721e86f 100644 (file)
@@ -9,5 +9,5 @@ modulenet_DATA = kqswlnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kqswlnd-objs:%.o=%.c) qswlnd.h
index 9627cac..7f3df4c 100644 (file)
@@ -9,5 +9,5 @@ modulenet_DATA = kralnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kralnd-objs:%.o=%.c) ralnd.h
index b9324f9..005c283 100644 (file)
@@ -697,7 +697,7 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
                 CWARN("Closed %d stale conns to %s\n", nstale, 
                       libcfs_nid2str(peer_nid));
 
-        CDEBUG(D_WARNING, "New connection to %s on devid[%d] = %d\n",
+        CWARN("New connection to %s on devid[%d] = %d\n",
                libcfs_nid2str(peer_nid), 
                conn->rac_device->rad_idx, conn->rac_device->rad_id);
 
@@ -861,8 +861,6 @@ kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid)
 void
 kranal_destroy_peer (kra_peer_t *peer)
 {
-        unsigned long flags;
-
         CDEBUG(D_NET, "peer %s %p deleted\n", 
                libcfs_nid2str(peer->rap_nid), peer);
 
@@ -1060,7 +1058,7 @@ kranal_del_peer (lnet_nid_t nid)
                         rc = 0;         /* matched something */
                 }
         }
- out:
+
         write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
 
         return rc;
index fecb6de..5b43340 100644 (file)
@@ -443,12 +443,6 @@ kranal_tx_mapped (kra_tx_t *tx)
                 tx->tx_buftype == RANAL_BUF_PHYS_MAPPED);
 }
 
-static inline __u64
-kranal_page2phys (struct page *p)
-{
-        return page_to_phys(p);
-}
-
 int kranal_startup (lnet_ni_t *ni);
 void kranal_shutdown (lnet_ni_t *ni);
 int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
index 21f5df1..a3ebbf9 100644 (file)
@@ -212,7 +212,7 @@ kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov,
         tx->tx_nob = nob;
         tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset));
 
-        phys->Address = kranal_page2phys(kiov->kiov_page);
+        phys->Address = lnet_page2phys(kiov->kiov_page);
         phys++;
 
         resid = nob - (kiov->kiov_len - offset);
@@ -237,7 +237,7 @@ kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov,
                         return -EMSGSIZE;
                 }
 
-                phys->Address = kranal_page2phys(kiov->kiov_page);
+                phys->Address = lnet_page2phys(kiov->kiov_page);
                 phys++;
 
                 resid -= PAGE_SIZE;
@@ -430,7 +430,6 @@ kranal_launch_tx (kra_tx_t *tx, lnet_nid_t nid)
         unsigned long    flags;
         kra_peer_t      *peer;
         kra_conn_t      *conn;
-        unsigned long    now;
         int              rc;
         int              retry;
         rwlock_t        *g_lock = &kranal_data.kra_global_lock;
@@ -1396,7 +1395,7 @@ kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
         case RAP_NOT_DONE:
                 if (time_after_eq(jiffies,
                                   conn->rac_last_tx + conn->rac_keepalive*HZ))
-                        CDEBUG(D_WARNING, "EAGAIN sending %02x (idle %lu secs)\n",
+                        CWARN("EAGAIN sending %02x (idle %lu secs)\n",
                                msg->ram_type, (jiffies - conn->rac_last_tx)/HZ);
                 return -EAGAIN;
         }
@@ -1864,7 +1863,7 @@ kranal_complete_closed_conn (kra_conn_t *conn)
                 kranal_tx_done(tx, -ECONNABORTED);
         }
 
-        CDEBUG(D_WARNING, "Closed conn %p -> %s: nmsg %d nreplies %d\n",
+        CWARN("Closed conn %p -> %s: nmsg %d nreplies %d\n",
                conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies);
 }
 
index 30bfb8d..0dbe697 100644 (file)
@@ -27,4 +27,4 @@ endif # DARWIN
 EXTRA_DIST := $(plist_DATA)
 install-data-hook: $(install_data_hook)
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c socklnd_lib.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ socklnd_lib.c
index 2f001c6..71cc574 100644 (file)
@@ -972,7 +972,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         int                rc;
         char              *warn = NULL;
 
-        LASSERT (route == NULL == (type == SOCKLND_CONN_NONE));
+        LASSERT ((route == NULL) == (type == SOCKLND_CONN_NONE));
 
         rc = ksocknal_lib_setup_sock (sock);
         if (rc != 0)
@@ -1204,7 +1204,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         write_unlock_irqrestore (global_lock, flags);
 
         if (rc != 0)
-                CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n",
+                CDEBUG(D_HA, "Closed %d stale conns to %s ip %d.%d.%d.%d\n",
                         rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
                         HIPQUAD(conn->ksnc_ipaddr));
 
@@ -1329,12 +1329,13 @@ ksocknal_peer_failed (ksock_peer_t *peer)
         int       notify = 0;
 
         /* There has been a connection failure or comms error; but I'll only
-         * tell LNET I think the peer is dead if there are no connections or
-         * connection attempts in existance. */
+         * tell LNET I think the peer is dead if it's to another kernel and
+         * there are no connections or connection attempts in existance. */
         
         read_lock (&ksocknal_data.ksnd_global_lock);
 
-        if (list_empty(&peer->ksnp_conns) &&
+        if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
+            list_empty(&peer->ksnp_conns) &&
             peer->ksnp_accepting == 0 &&
             ksocknal_find_connecting_route_locked(peer) == NULL) {
                 notify = 1;
index dc1547a..7e46090 100644 (file)
@@ -55,9 +55,9 @@
 #define SOCKNAL_BUFFER_SIZE     (8<<20)         /* default socket buffer size */
 #define SOCKNAL_NAGLE            0              /* enable/disable NAGLE? */
 #define SOCKNAL_IRQ_AFFINITY     1              /* enable/disable IRQ affinity? */
-#define SOCKNAL_KEEPALIVE_IDLE   30             /* # seconds idle before 1st probe */
-#define SOCKNAL_KEEPALIVE_COUNT  10             /* # unanswered probes to determine peer death */
-#define SOCKNAL_KEEPALIVE_INTVL  2              /* seconds between probes */
+#define SOCKNAL_KEEPALIVE_IDLE   35             /* # seconds idle before 1st probe */
+#define SOCKNAL_KEEPALIVE_COUNT               /* # unanswered probes to determine peer death */
+#define SOCKNAL_KEEPALIVE_INTVL  5              /* seconds between probes */
 #define SOCKNAL_CREDITS          256            /* # concurrent sends */
 #define SOCKNAL_PEERCREDITS      8              /* # concurrent sends to 1 peer */
 
index fab6729..169a8e7 100644 (file)
@@ -481,7 +481,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
                                       HIPQUAD(conn->ksnc_ipaddr), rc);
                         break;
                 }
-                CERROR("[%p] Error %d on write to %s"
+                CDEBUG(D_HA, "[%p] Error %d on write to %s"
                        " ip %d.%d.%d.%d:%d\n", conn, rc,
                        libcfs_id2str(conn->ksnc_peer->ksnp_id),
                        HIPQUAD(conn->ksnc_ipaddr),
@@ -1564,12 +1564,6 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
                                libcfs_id2str(*peerid),
                                HIPQUAD(conn->ksnc_ipaddr),
                                libcfs_id2str(recv_id));
-                               
-                CERROR ("Connected to %s ip %u.%u.%u.%u "
-                        "but expecting %s\n",
-                        libcfs_id2str(recv_id),
-                        HIPQUAD(conn->ksnc_ipaddr),
-                        libcfs_id2str(*peerid));
                 return (-EPROTO);
         }
 
@@ -1718,7 +1712,7 @@ ksocknal_connect (ksock_route_t *route)
                                            route->ksnr_retry_interval);
 
         if (!list_empty(&peer->ksnp_tx_queue) &&
-            peer->ksnp_accepting != 0 &&
+            peer->ksnp_accepting == 0 &&
             ksocknal_find_connecting_route_locked(peer) == NULL) {
                 /* ksnp_tx_queue is queued on a conn on successful
                  * connection */
@@ -1850,14 +1844,14 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
                                 break;
                         default:
                                 LCONSOLE_WARN("An unexpected network error "
-                                              "occurred with %u.%u.%u.%u: %d.\n",
+                                              "occurred with %u.%u.%u.%u: %d\n",
                                               HIPQUAD(conn->ksnc_ipaddr),
                                               SOCK_ERROR(conn->ksnc_sock));
                                 break;
                         }
 
                         /* Something (e.g. failed keepalive) set the socket error */
-                        CERROR ("Socket error %d: %s %p %d.%d.%d.%d\n",
+                        CDEBUG(D_HA, "Socket error %d: %s %p %d.%d.%d.%d\n",
                                 SOCK_ERROR(conn->ksnc_sock), 
                                 libcfs_id2str(peer->ksnp_id),
                                 conn, HIPQUAD(conn->ksnc_ipaddr));
index ebac4c1..e9f0011 100644 (file)
@@ -77,8 +77,8 @@ CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
                 "seconds between probes");
 
 #if CPU_AFFINITY
-static int irq_affinity = SOCKNAL_IRQ_AFFINITY;
-CFS_MODULE_PARM(irq_affinity, "i", int, 0644,
+static int enable_irq_affinity = SOCKNAL_IRQ_AFFINITY;
+CFS_MODULE_PARM(enable_irq_affinity, "i", int, 0644,
                 "enable IRQ affinity");
 #endif
 
@@ -107,7 +107,7 @@ ksock_tunables_t ksocknal_tunables = {
         .ksnd_zc_min_frag     = &zc_min_frag,
 #endif
 #if CPU_AFFINITY
-        .ksnd_irq_affinity    = &irq_affinity,
+        .ksnd_irq_affinity    = &enable_irq_affinity,
 #endif
 };
 
index c900144..19861a9 100644 (file)
@@ -9,5 +9,5 @@ modulenet_DATA = kviblnd$(KMODEXT)
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
 DIST_SOURCES = $(kviblnd-objs:%.o=%.c) viblnd.h viblnd_wire.h
index 9fcac45..f379b46 100644 (file)
@@ -521,23 +521,6 @@ kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
         list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
 }
 
-static inline __u64
-kibnal_page2phys (struct page *p)
-{
-#if IBNAL_32BIT_PAGE2PHYS
-        CLASSERT (sizeof(typeof(page_to_phys(p))) == 4);
-        CLASSERT (sizeof(unsigned long) == 4);
-        /* page_to_phys returns a 32 bit physical address.  This must be a 32
-         * bit machine with <= 4G memory and we must ensure we don't sign
-         * extend when converting to 64 bits. */
-        return (unsigned long)page_to_phys(p);
-#else
-        CLASSERT (sizeof(typeof(page_to_phys(p))) == 8);
-        /* page_to_phys returns a 64 bit physical address :) */
-        return page_to_phys(p);
-#endif
-}
-
 #if IBNAL_VOIDSTAR_SGADDR
 # if CONFIG_HIGHMEM
 #  if CONFIG_X86 && CONFIG_HIGHMEM4G
index 64dbad4..9fa2a8b 100644 (file)
@@ -521,7 +521,7 @@ kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
 
         /* Try to create an address that adaptor-tavor will munge into a valid
          * network address, given how it maps all phys mem into 1 region */
-        addr = kibnal_page2phys(page) + page_offset + PAGE_OFFSET;
+        addr = lnet_page2phys(page) + page_offset + PAGE_OFFSET;
 
         /* NB this relies entirely on there being a single region for the whole
          * of memory, since "high" memory will wrap in the (void *) cast! */
@@ -749,7 +749,7 @@ kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd,
                         return -EFAULT;
                 }
 
-                tx->tx_pages[npages++] = kibnal_page2phys(page);
+                tx->tx_pages[npages++] = lnet_page2phys(page);
 
                 fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
                 vaddr += fragnob;
@@ -806,7 +806,7 @@ kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd,
                         return -EINVAL;
                 }
 
-                tx->tx_pages[npages++] = kibnal_page2phys(kiov->kiov_page);
+                tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
                 resid -= kiov->kiov_len;
                 kiov++;
                 nkiov--;
index d00f1fd..21f5548 100644 (file)
@@ -48,5 +48,5 @@ install-data-hook: $(install_data_hook)
 
 EXTRA_DIST := Info.plist
 
-MOSTLYCLEANFILES := *.o *.ko *.mod.c linux-*.c linux/*.o darwin/*.o libcfs
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs
 DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h
index e54cd51..ba382c6 100644 (file)
@@ -45,7 +45,8 @@ unsigned int libcfs_subsystem_debug = ~0 - (S_LNET | S_LND);
 EXPORT_SYMBOL(libcfs_subsystem_debug);
 
 unsigned int libcfs_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
-                             D_RPCTRACE | D_VFSTRACE);
+                             D_RPCTRACE | D_VFSTRACE | D_CONFIG | D_IOCTL |
+                             D_CONSOLE);
 EXPORT_SYMBOL(libcfs_debug);
 
 unsigned int libcfs_printk;
index f16aa25..bcbbf5a 100644 (file)
@@ -87,7 +87,7 @@ static struct ctl_table lnet_table[] = {
          sizeof(lnet_upcall), 0644, NULL, &proc_dostring,
          &sysctl_string},
         {PSDEV_LNET_MEMUSED, "memused", (int *)&libcfs_kmemory.counter,
-         sizeof(int), 0644, NULL, &proc_dointvec},
+         sizeof(int), 0444, NULL, &proc_dointvec},
         {PSDEV_LNET_CATASTROPHE, "catastrophe", &libcfs_catastrophe,
          sizeof(int), 0444, NULL, &proc_dointvec},
         {0}
@@ -282,7 +282,7 @@ int insert_proc(void)
         ent->write_proc = trace_write_debug_mb;
         ent->read_proc = trace_read_debug_mb;
 
-        proc_symlink("sys/portals", NULL, "/proc/sys/lnet");
+        proc_symlink("sys/portals", NULL, "lnet");
 
         return 0;
 }
index ef40672..9fd8cff 100644 (file)
@@ -215,7 +215,7 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
 
         if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) {
                 CERROR("PORTALS ioctl: data error\n");
-                return (-EINVAL);
+                RETURN(-EINVAL);
         }
         data = (struct libcfs_ioctl_data *)buf;
 
index 1aaab9e..77adaae 100644 (file)
@@ -82,12 +82,14 @@ static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
         list_move_tail(&tage->linkage, queue);
 }
 
-static int tage_invariant(struct trace_page *tage)
+static void LASSERT_TAGE_INVARIANT(struct trace_page *tage)
 {
-        return (tage != NULL &&
-                tage->page != NULL &&
-                tage->used <= CFS_PAGE_SIZE &&
-                cfs_page_count(tage->page) > 0);
+        LASSERT(tage != NULL);
+        LASSERT(tage->page != NULL);
+        LASSERTF(tage->used <= CFS_PAGE_SIZE, "used = %u, PAGE_SIZE %lu\n",
+                 tage->used, CFS_PAGE_SIZE);
+        LASSERTF(cfs_page_count(tage->page) > 0, "count = %d\n",
+                 cfs_page_count(tage->page));
 }
 
 /* return a page that has 'len' bytes left at the end */
@@ -201,7 +203,13 @@ void libcfs_debug_msg(int subsys, int mask, char *file, const char *fn,
         debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
 
         max_nob = CFS_PAGE_SIZE - tage->used - known_size;
-        LASSERT(max_nob > 0);
+        if (max_nob <= 0) {
+                printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
+                debug_buf = format;
+                needed = strlen(format);
+                mask |= D_ERROR;
+                goto out;
+        }
         va_start(ap, format);
         needed = vsnprintf(debug_buf, max_nob, format, ap);
         va_end(ap);
@@ -261,7 +269,7 @@ static void collect_pages_on_cpu(void *info)
         tcd->tcd_cur_pages = 0;
         if (pc->pc_want_daemon_pages) {
                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
-                CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
+                CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
                 tcd->tcd_cur_daemon_pages = 0;
         }
         spin_unlock(&pc->pc_lock);
@@ -293,7 +301,7 @@ static void put_pages_back_on_cpu(void *info)
         spin_lock(&pc->pc_lock);
         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 if (tage->cpu != smp_processor_id())
                         continue;
@@ -330,7 +338,7 @@ static void put_pages_on_daemon_list_on_cpu(void *info)
         spin_lock(&pc->pc_lock);
         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 if (tage->cpu != smp_processor_id())
                         continue;
@@ -344,7 +352,7 @@ static void put_pages_on_daemon_list_on_cpu(void *info)
                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
 
-                        LASSERT(tage_invariant(victim));
+                        LASSERT_TAGE_INVARIANT(victim);
 
                         list_del(&victim->linkage);
                         tage_free(victim);
@@ -375,7 +383,7 @@ void trace_debug_print(void)
                 char *p, *file, *fn;
                 cfs_page_t *page;
 
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 page = tage->page;
                 p = cfs_page_address(page);
@@ -430,7 +438,7 @@ int tracefile_dump_all_pages(char *filename)
         CFS_MMSPACE_OPEN;
         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
                                     tage->used, cfs_filp_poff(filp));
@@ -465,7 +473,7 @@ void trace_flush_pages(void)
         collect_pages(&pc);
         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 list_del(&tage->linkage);
                 tage_free(tage);
@@ -560,7 +568,7 @@ static int tracefiled(void *arg)
 
                 /* mark the first header, so we can sort in chunks */
                 tage = tage_from_list(pc.pc_pages.next);
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 hdr = cfs_page_address(tage->page);
                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
@@ -568,7 +576,7 @@ static int tracefiled(void *arg)
                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
                         static loff_t f_pos;
 
-                        LASSERT(tage_invariant(tage));
+                        LASSERT_TAGE_INVARIANT(tage);
 
                         if (f_pos >= tracefile_size)
                                 f_pos = 0;
@@ -661,7 +669,7 @@ static void trace_cleanup_on_cpu(void *info)
         tcd->tcd_shutting_down = 1;
 
         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
-                LASSERT(tage_invariant(tage));
+                LASSERT_TAGE_INVARIANT(tage);
 
                 list_del(&tage->linkage);
                 tage_free(tage);
index bb26d58..91e7f14 100644 (file)
@@ -1217,6 +1217,9 @@ LNetNIFini()
 
         the_lnet.ln_refcount--;
         if (the_lnet.ln_refcount == 0) {
+
+                LASSERT (!the_lnet.ln_niinit_self);
+
                 lnet_proc_fini();
                 lnet_acceptor_stop();
                 lnet_destroy_routes();
index c0606b2..f2f31bb 100644 (file)
@@ -42,5 +42,5 @@ install-data-hook: $(install_data_hook)
 
 EXTRA_DIST := Info.plist
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c lnet
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ lnet
 DIST_SOURCES = $(lnet-objs:%.o=%.c)
index c58aa0b..03dc839 100644 (file)
@@ -170,7 +170,7 @@ lnet_notify (lnet_ni_t *ni, lnet_nid_t gateway_nid, int alive, time_t when)
                 /* userland notified me: notify LND? */
                 ni = lp->lp_ni;
                 if (ni->ni_lnd->lnd_notify != NULL) {
-                        ni->ni_lnd->lnd_notify(ni, gateway_nid, alive);
+                        (ni->ni_lnd->lnd_notify)(ni, gateway_nid, alive);
                 }
         } else {
                 /* LND notified me: */
@@ -224,6 +224,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
        lnet_remotenet_t    *rnet2;
        lnet_route_t        *route;
        lnet_route_t        *route2;
+        lnet_ni_t           *ni;
         int                  add_route;
         int                  rc;
 
@@ -312,10 +313,19 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
         }
         
         if (add_route) {
+                ni = route->lr_gateway->lp_ni;
+                lnet_ni_addref_locked(ni);
+                
                 LASSERT (rc == 0);
                 list_add_tail(&route->lr_list, &rnet2->lrn_routes);
                 the_lnet.ln_remote_nets_version++;
                 LNET_UNLOCK();
+
+                /* XXX Assume alive */
+                if (ni->ni_lnd->lnd_notify != NULL)
+                        (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
+
+                lnet_ni_decref(ni);
         } else {
                 lnet_peer_decref_locked(route->lr_gateway);
                 LNET_UNLOCK();
index 2300c2d..0f34884 100644 (file)
@@ -15,6 +15,6 @@ endif
 endif
 endif
 
-MOSTLYCLEANFILES = *.o *.ko *.mod.c pingsrv.c pingcli.c spingsrv.c spingcli.c
+MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ pingsrv.c pingcli.c spingsrv.c spingcli.c
 DIST_SOURCES = ping_srv.c ping_cli.c sping_srv.c sping_cli.c ping.h 
 #ut_cli.c ut_srv.c ut.h
index dd5ad6f..dc60e7f 100644 (file)
@@ -25,6 +25,7 @@ lnd_t               the_ptllnd = {
         .lnd_send       = ptllnd_send,
         .lnd_recv       = ptllnd_recv,
         .lnd_eager_recv = ptllnd_eager_recv,
+        .lnd_notify     = ptllnd_notify,
         .lnd_wait       = ptllnd_wait,
 };
 
index e0a46c5..0a2287e 100644 (file)
@@ -164,7 +164,9 @@ int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
                 unsigned int offset, unsigned int mlen, unsigned int rlen);
 int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
                       void **new_privatep);
+
 ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob);
+void ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive);
 void ptllnd_wait(lnet_ni_t *ni, int milliseconds);
 void ptllnd_check_sends(ptllnd_peer_t *peer);
 void ptllnd_destroy_peer(ptllnd_peer_t *peer);
index e7e369e..dfa2ce5 100644 (file)
@@ -147,6 +147,28 @@ ptllnd_find_peer(lnet_ni_t *ni, lnet_nid_t nid, int create)
         return plp;
 }
 
+void
+ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
+{
+        ptllnd_peer_t *peer;
+
+        /* This is only actually used to connect to routers at startup! */
+        if (!alive) {
+                LBUG();
+                return;
+        }
+        
+        peer = ptllnd_find_peer(ni, nid, 1);
+        if (peer == NULL)
+                return;
+
+        /* wait for the peer to reply */
+        while (!peer->plp_recvd_hello)
+                ptllnd_wait(ni, -1);
+
+        ptllnd_peer_decref(peer);
+}
+
 ptllnd_tx_t *
 ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob)
 {
index 93d63e6..f1112a9 100644 (file)
@@ -75,10 +75,16 @@ tcpnal_env_param (char *name, int *val)
 int
 tcpnal_set_global_params (void)
 {
-        return  tcpnal_env_param("TCPLND_PORT", 
+        return  tcpnal_env_param("TCPNAL_PORT",
                                 &tcpnal_acceptor_port) &&
-                tcpnal_env_param("TCPLND_BUFFER_SIZE",   
+                tcpnal_env_param("TCPLND_PORT",
+                                &tcpnal_acceptor_port) &&
+                tcpnal_env_param("TCPNAL_BUFFER_SIZE",
+                                 &tcpnal_buffer_size) &&
+                tcpnal_env_param("TCPLND_BUFFER_SIZE",
                                  &tcpnal_buffer_size) &&
+                tcpnal_env_param("TCPNAL_NAGLE",
+                                 &tcpnal_nagle) &&
                 tcpnal_env_param("TCPLND_NAGLE",
                                  &tcpnal_nagle);
 }
@@ -367,6 +373,7 @@ connection force_tcp_connection(manager    m,
     int                fd;
     int                option;
     int                rc;
+    int                sz;
 
     pthread_mutex_lock(&m->conn_lock);
 
@@ -382,6 +389,7 @@ connection force_tcp_connection(manager    m,
     memset(&locaddr, 0, sizeof(locaddr)); 
     locaddr.sin_family = AF_INET; 
     locaddr.sin_addr.s_addr = INADDR_ANY;
+    locaddr.sin_port = htons(m->port);
 
 #if 1 /* tcpnal connects from a non-privileged port */
     fd = socket(AF_INET, SOCK_STREAM, 0);
@@ -399,6 +407,16 @@ connection force_tcp_connection(manager    m,
             goto out;
     } 
 
+    if (m->port != 0) {
+            /* Bind all subsequent connections to the same port */
+            rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
+            if (rc != 0) {
+                    perror("Error binding port");
+                    close(fd);
+                    goto out;
+            }
+    }
+    
     rc = connect(fd, (struct sockaddr *)&addr,
                  sizeof(struct sockaddr_in));
     if (rc != 0) {
@@ -406,6 +424,18 @@ connection force_tcp_connection(manager    m,
             close(fd);
             goto out;
     }
+
+    sz = sizeof(locaddr);
+    rc = getsockname(fd, (struct sockaddr *)&locaddr, &sz);
+    if (rc != 0) {
+            perror ("Error on getsockname");
+            close(fd);
+            goto out;
+    }
+
+    if (m->port == 0)
+            m->port = ntohs(locaddr.sin_port);
+    
 #else
     for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
             fd = socket(AF_INET, SOCK_STREAM, 0);
@@ -565,6 +595,7 @@ manager init_connections(int (*input)(void *, void *), void *a)
     m->connections = hash_create_table(compare_connection,connection_key);
     m->handler = input;
     m->handler_arg = a;
+    m->port = 0;                                /* set on first connection */
     pthread_mutex_init(&m->conn_lock, 0);
 
     return m;
index ba43cd0..0c4718e 100644 (file)
 typedef struct manager {
     table           connections;
     pthread_mutex_t conn_lock; /* protect connections table */
-#if 0                                           /* we don't accept connections */
+#if 0                          /* we don't accept connections */
     int             bound;
     io_handler      bound_handler;
 #endif
     int           (*handler)(void *, void *);
     void           *handler_arg;
-    unsigned short  port;
+    int             port;
 } *manager;
 
 
index bf9e33a..5fd5f46 100644 (file)
@@ -74,6 +74,7 @@ lnd_t the_tcplnd = {
         .lnd_shutdown  = procbridge_shutdown,
         .lnd_send      = tcpnal_send,
         .lnd_recv      = tcpnal_recv,
+        .lnd_notify    = tcpnal_notify,
 };
 int       tcpnal_running;
 
index 3e12de2..204beb1 100644 (file)
@@ -43,6 +43,8 @@ extern void procbridge_wakeup_nal(procbridge p);
 extern int procbridge_startup (lnet_ni_t *);
 extern void procbridge_shutdown (lnet_ni_t *);
 
+extern void tcpnal_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive);
+
 extern int tcpnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
 int tcpnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *cookie,
                 int delayed, unsigned int niov,
index c237ba3..5aac6ef 100644 (file)
 #include <syscall.h>
 #endif
 
+void
+tcpnal_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
+{
+        bridge     b = (bridge)ni->ni_data;
+        connection c;
+
+        if (!alive) {
+                LBUG();
+        }
+
+        c = force_tcp_connection((manager)b->lower, nid, b->local);
+        if (c == NULL)
+                CERROR("Can't create connection to %s\n",
+                       libcfs_nid2str(nid));
+}
+
 /*
  * sends a packet to the peer, after insuring that a connection exists
  */
index be3d48d..9cd3f25 100644 (file)
@@ -13,7 +13,7 @@ endif
 
 libuptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c
 libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
-libuptlctl_a_CFLAGS = $(LLCFLAGS)
+libuptlctl_a_CFLAGS = $(LLCFLAGS) -DLUSTRE_UTILS=1
 
 sbin_PROGRAMS = debugctl
 
index 448784e..ef5deda 100644 (file)
@@ -275,7 +275,7 @@ static void print_saved_records(struct list_head *list, FILE *out)
                 list_del(&line->chain);
 
                 hdr = line->hdr;
-                fprintf(out, "%06x:%06x:%u:%u.%06Lu:%u:%u:%u:(%s:%u:%s()) %s",
+                fprintf(out, "%07x:%06x:%u:%u.%06Lu:%u:%u:%u:(%s:%u:%s()) %s",
                         hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id,
                         hdr->ph_sec, (unsigned long long)hdr->ph_usec,
                         hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid,
@@ -684,6 +684,8 @@ static struct mod_paths {
         {"ptlrpcs", "lustre/sec"},
         {"ptlrpcs_gss", "lustre/sec/gss"},
         {"ptlrpc", "lustre/ptlrpc"},
+        {"gks", "lustre/sec/gks"},
+        {"gkc", "lustre/sec/gks"},
         {"obdext2", "lustre/obdext2"},
         {"ost", "lustre/ost"},
         {"osc", "lustre/osc"},
index 728e81a..335c05b 100644 (file)
@@ -36,7 +36,7 @@
 #include <lnet/api-support.h>
 #include <lnet/lnetctl.h>
 #include <libcfs/portals_utils.h>
-#include <linux/kdev_t.h>  /* for MKDEV */
+
 
 static ioc_handler_t  do_ioctl;                 /* forward ref */
 static ioc_handler_t *current_ioc_handler = &do_ioctl;
@@ -67,6 +67,11 @@ set_ioc_handler (ioc_handler_t *handler)
                 current_ioc_handler = handler;
 }
 
+/* Catamount has no <linux/kdev_t.h>, so just define it here */
+#ifndef MKDEV
+# define MKDEV(a,b) (((a) << 8) | (b))
+#endif
+
 static int
 open_ioc_dev(int dev_id) 
 {
index 39f9d1c..7a24c3d 100644 (file)
@@ -352,6 +352,50 @@ int jt_ptl_network(int argc, char **argv)
         return -1;
 }
 
+/* Warning - this allocates memory that user must free (if nid_list != NULL) */
+int jt_ptl_get_nids(__u64 **nid_list)
+{
+        struct libcfs_ioctl_data data;
+        int i, count = 0, rc, arraysize = 0;
+        __u64 *nids = NULL;
+        
+        for (i = 0;; i++) {
+                LIBCFS_IOC_INIT (data);
+                data.ioc_count = i;
+                rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NI, &data);
+
+                if (rc >= 0) {
+                        if (LNET_NETTYP(LNET_NIDNET(data.ioc_nid)) == LOLND) 
+                                continue;
+                        /* got one */
+                        //printf("%s\n", libcfs_nid2str(data.ioc_nid));
+                        if (count >= arraysize) {
+                                arraysize += 10;
+                                nids = realloc(nids, 
+                                               arraysize * sizeof(__u64));
+                                if (!nids) 
+                                        break;
+                        }
+                        nids[count++] = data.ioc_nid;
+                        continue;
+                }
+
+                /* last one */
+                if (errno == ENOENT)
+                        break;
+
+                if (nids)
+                        free(nids);
+                return -errno;
+        }
+        
+        if (nid_list) 
+                *nid_list = realloc(nids, count * sizeof(__u64));
+        else
+                free(nids);
+        return count;
+}
+
 int 
 jt_ptl_list_nids(int argc, char **argv)
 {