Whamcloud - gitweb
Update b1_2 from HEAD (20040309_1747)
authoradilger <adilger>
Wed, 10 Mar 2004 01:19:30 +0000 (01:19 +0000)
committeradilger <adilger>
Wed, 10 Mar 2004 01:19:30 +0000 (01:19 +0000)
b=2818, b=2901, b=2663, b=2908, b=2530, b=2464, b=2306 (socknal zc part)

65 files changed:
lnet/archdep.m4
lnet/include/linux/kp30.h
lnet/include/lnet/types.h
lnet/klnds/gmlnd/gmlnd.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/gmlnd/gmlnd_module.c
lnet/klnds/gmlnd/gmlnd_utils.c
lnet/klnds/qswlnd/qswlnd.c
lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/qswlnd/qswlnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/router/router.c
lnet/utils/Makefile.am
lnet/utils/gmlndnid.c
lustre/ChangeLog
lustre/include/linux/lustre_cfg.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/obd.h
lustre/kernel_patches/series/vanilla-2.4.20
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lov/lov_obd.c
lustre/mdc/mdc_request.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c
lustre/obdecho/echo.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_lvb.c
lustre/osc/osc_request.c
lustre/portals/archdep.m4
lustre/portals/include/linux/kp30.h
lustre/portals/include/portals/types.h
lustre/portals/knals/gmnal/gmnal.h
lustre/portals/knals/gmnal/gmnal_api.c
lustre/portals/knals/gmnal/gmnal_cb.c
lustre/portals/knals/gmnal/gmnal_comm.c
lustre/portals/knals/gmnal/gmnal_module.c
lustre/portals/knals/gmnal/gmnal_utils.c
lustre/portals/knals/qswnal/qswnal.c
lustre/portals/knals/qswnal/qswnal.h
lustre/portals/knals/qswnal/qswnal_cb.c
lustre/portals/knals/socknal/socknal.c
lustre/portals/knals/socknal/socknal.h
lustre/portals/knals/socknal/socknal_cb.c
lustre/portals/router/router.c
lustre/portals/utils/Makefile.am
lustre/portals/utils/gmnalnid.c
lustre/scripts/lustre.spec.in
lustre/tests/.RC_CURRENT.tag
lustre/tests/acceptance-small.sh
lustre/tests/munlink.c
lustre/tests/replay-single.sh
lustre/tests/sanity.sh
lustre/utils/llmount.c

index 3bdaf32..7801957 100644 (file)
@@ -133,8 +133,8 @@ case ${host_cpu} in
 
        powerpc )
        AC_MSG_RESULT($host_cpu)
-        KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
-        KCPPFLAGS='-D__KERNEL__'
+        KCFLAGS='-O2 -g -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+        KCPPFLAGS='-D__KERNEL__ -DMODULE'
         MOD_LINK=elf32ppclinux
 ;;
 
@@ -338,6 +338,18 @@ AC_SUBST(MOD_LINK)
 AC_SUBST(LINUX25)
 AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 
+# ---------- Red Hat 2.4.18 has iobuf->dovary --------------
+# But other kernels don't
+
+AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+AC_TRY_COMPILE([#define __KERNEL__
+               #include <linux/iobuf.h>],
+              [struct kiobuf iobuf;
+               iobuf.dovary = 1;],
+              [AC_MSG_RESULT([yes])
+                CPPFLAGS="$CPPFLAGS -DHAVE_KIOBUF_DOVARY"],
+              [AC_MSG_RESULT([no])])
+
 # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
 # This needs to run after we've defined the KCPPFLAGS
 
index d56a120..c080a57 100644 (file)
@@ -368,13 +368,14 @@ typedef struct {
         struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
         ptl_nid_t            kprfd_target_nid;  /* final destination NID */
         ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
-        int                  kprfd_nob;         /* # message bytes (including header) */
-        int                  kprfd_niov;        /* # message frags (including header) */
-        struct iovec        *kprfd_iov;         /* message fragments */
-        void                *kprfd_router_arg;  // originating NAL's router arg
+        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
+        int                  kprfd_nob;         /* # payload bytes */
+        int                  kprfd_niov;        /* # payload frags */
+        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
+        void                *kprfd_router_arg;  /* originating NAL's router arg */
         kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
         void                *kprfd_callback_arg; /* completion callback arg */
-        kprfd_scratch_t      kprfd_scratch;    // scratchpad for routing targets
+        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
 } kpr_fwd_desc_t;
 
 typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
@@ -477,15 +478,16 @@ kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid
 }
 
 static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
-              int nob, int niov, struct iovec *iov,
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+              int nob, int niov, ptl_kiov_t *kiov,
               kpr_fwd_callback_t callback, void *callback_arg)
 {
         fwd->kprfd_target_nid   = nid;
         fwd->kprfd_gateway_nid  = nid;
+        fwd->kprfd_hdr          = hdr;
         fwd->kprfd_nob          = nob;
         fwd->kprfd_niov         = niov;
-        fwd->kprfd_iov          = iov;
+        fwd->kprfd_kiov         = kiov;
         fwd->kprfd_callback     = callback;
         fwd->kprfd_callback_arg = callback_arg;
 }
index 7ffe797..d4ca453 100644 (file)
@@ -3,7 +3,13 @@
 
 #ifdef __linux__
 # include <asm/types.h>
-# include <asm/timex.h>
+# if defined(__powerpc__) && !defined(__KERNEL__)
+#  define __KERNEL__
+#  include <asm/timex.h>
+#  undef __KERNEL__
+# else
+#  include <asm/timex.h>
+# endif
 #else
 # include <sys/types.h>
 typedef u_int32_t __u32;
@@ -14,7 +20,7 @@ typedef u_int64_t __u64;
 # include <linux/time.h>
 #else
 # include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL)
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
 #endif
 
 #include <portals/errno.h>
@@ -129,7 +135,7 @@ typedef struct {
         struct timeval     arrival_time;
 
         volatile ptl_seq_t sequence;
-} ptl_event_t;
+} __attribute__((packed)) ptl_event_t;
 #ifdef __CYGWIN__
 #pragma pop
 #endif
index cdde5b7..ad46b90 100644 (file)
@@ -45,6 +45,7 @@
 #include "linux/init.h"
 #include "linux/sem.h"
 #include "linux/vmalloc.h"
+#include "linux/sysctl.h"
 
 #define DEBUG_SUBSYSTEM S_GMNAL
 
 extern  int gmnal_small_msg_size;
 extern  int num_rx_threads;
 extern  int num_stxds;
+extern  int gm_port;
 #define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
 #define GMNAL_MAGIC                            0x1234abcd
+/*
+ *     The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT  gm_port
 
 
 /*
@@ -218,6 +224,7 @@ typedef struct _gmnal_data_t {
        gmnal_rxtwe_t   *rxtwe_tail;
        spinlock_t      rxtwe_lock;
        struct  semaphore rxtwe_wait;
+        struct ctl_table_header *sysctl;
 } gmnal_data_t;
 
 /*
@@ -234,11 +241,6 @@ typedef struct _gmnal_data_t {
 extern gmnal_data_t    *global_nal_data;
 
 /*
- *     The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT  4
-
-/*
  * for ioctl get pid
  */
 #define GMNAL_IOC_GET_GNID 1   
@@ -353,6 +355,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
 
 int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
 
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
 void *gmnal_cb_malloc(nal_cb_t *, size_t);
 
 void gmnal_cb_free(nal_cb_t *, void *, size_t);
@@ -382,7 +386,7 @@ void  gmnal_fini(void);
                                a->cb_recv_pages = gmnal_cb_recv_pages; \
                                a->cb_read = gmnal_cb_read; \
                                a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = NULL; \
+                               a->cb_callback = gmnal_cb_callback; \
                                a->cb_malloc = gmnal_cb_malloc; \
                                a->cb_free = gmnal_cb_free; \
                                a->cb_map = NULL; \
@@ -418,6 +422,7 @@ void                gmnal_stop_rxthread(gmnal_data_t *);
 void           gmnal_stop_ctthread(gmnal_data_t *);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
 char           *gmnal_gm_error(gm_status_t);
 char           *gmnal_rxevent(gm_recv_event_t*);
 int            gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
index 1cb1317..1442aa7 100644 (file)
 
 #include "gmnal.h"
 
+
+
 gmnal_data_t   *global_nal_data = NULL;
+#define         GLOBAL_NID_STR_LEN      16
+char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ *      Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL    201
+#define GMNAL_SYSCTL_GLOBALNID  1
+
+static ctl_table gmnal_sysctl_table[] = {
+        {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+         global_nid_str, GLOBAL_NID_STR_LEN,
+         0444, NULL, &proc_dostring},
+        { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+        {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+        { 0 }
+};
+
+
+
+
+
+
 /*
  *     gmnal_api_forward
  *     This function takes a pack block of arguments from the NAL API
@@ -193,8 +222,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        ptl_pid_t       portals_pid = 0;
 
 
-       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], 
-              ac_size[%d]\n", interface, ptl_size, ac_size);
+       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+              "ac_size[%d]\n", interface, ptl_size, ac_size);
 
 
        PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
@@ -255,8 +284,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
 
 
-       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], 
-                      name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
+       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+                      "name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
               "gmnal", GM_API_VERSION);
 
        GMNAL_GM_LOCK(nal_data);
@@ -280,15 +309,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                        CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
                        break;
                case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib 
-                              and driver\n");
+                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+                              "and driver\n");
                        break;
                case(GM_OUT_OF_MEMORY):
                        CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
                        break;
                default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error 
-                              code [%d]\n", gm_status);
+                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+                              "code [%d]\n", gm_status);
                        break;
                }       
                GMNAL_GM_LOCK(nal_data);
@@ -403,6 +432,7 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
        nal_data->gm_global_nid = global_nid;
+        snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
 
 /*
        pid = gm_getpid();
@@ -429,6 +459,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                return(NULL);
                
        }
+        nal_data->sysctl = NULL;
+        nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
        
        CDEBUG(D_INFO, "gmnal_init finished\n");
        global_nal_data = nal->nal_data;
@@ -459,6 +492,8 @@ void gmnal_fini()
        gm_close(nal_data->gm_port);
        gm_finalize();
        GMNAL_GM_UNLOCK(nal_data);
+        if (nal_data->sysctl)
+                unregister_sysctl_table (nal_data->sysctl);
        PORTAL_FREE(nal, sizeof(nal_t));        
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
        PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
index e055242..1f28746 100644 (file)
@@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], 
-              niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
+       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+              "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
               nal_cb, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
@@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
        struct iovec    *iovec = NULL, *iovec_dup = NULL;
        int             i = 0;
+       ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], 
-              cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+              "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
               nal_cb, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
@@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
                status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
+               for (i=0; i<kniov; i++) {
+                       kunmap(kiov_dup->kiov_page);
+                       kiov_dup++;
+               }
                PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
        }
                
@@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
@@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int     i = 0;
        gmnal_data_t    *nal_data;
        struct  iovec   *iovec = NULL, *iovec_dup = NULL;
+       ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
        nal_data = nal_cb->nal_data;
@@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
+       for (i=0; i<kniov; i++) {
+               kunmap(kiov_dup->kiov_page);
+               kiov_dup++;
+       }
        PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
        return(PTL_OK);
 }
@@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
        return(PTL_OK);
 }
 
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
+                      ptl_event_t *ev)
+{
+
+       if (eq->event_callback != NULL) {
+               CDEBUG(D_INFO, "found callback\n");
+               eq->event_callback(ev);
+       }
+       
+       return(PTL_OK);
+}
+
 void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
 {
        void *ptr = NULL;
index a0d3530..1bcd9bd 100644 (file)
@@ -203,14 +203,14 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        gmnal_msghdr = (gmnal_msghdr_t*)buffer;
        portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
 
-       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], 
-              type [%d], length [%d], buffer [%p]\n",
+       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+              "type [%d], length [%d], buffer [%p]\n",
               snode, sport, type, length, buffer);
-       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], 
-              gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
+       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
               gmnal_msghdr->magic, gmnal_msghdr->type);
-       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], 
-              dest_node ["LPD64"]\n", portals_hdr->src_nid, 
+       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
               portals_hdr->dest_nid);
 
        
@@ -321,6 +321,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -343,7 +344,6 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
        lib_finalize(nal_cb, private, cookie, PTL_OK);
-
        /*
         *      return buffer so it can be used again
         */
@@ -377,9 +377,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] 
-              hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] 
-              iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -440,9 +440,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        stxd->msg_size = tot_size;
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, global_nid, local_nid, stxd);
 
        GMNAL_GM_LOCK(nal_data);
@@ -493,8 +493,8 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                /*
                 *      do a resend on the dropped ones
                 */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped 
-                              resending\n", context);
+                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+                              "resending\n", context);
                        GMNAL_GM_LOCK(nal_data);
                        gm_send_to_peer_with_callback(nal_data->gm_port, 
                                                      stxd->buffer, 
@@ -569,6 +569,11 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                case(GM_YP_NO_MATCH):
                default:
                        CDEBUG(D_ERROR, "Unknown send error\n");
+                gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+                                      stxd->gm_target_node, GMNAL_GM_PORT,
+                                      gmnal_resume_sending_callback, context);
+                return;
+
        }
 
        /*
@@ -588,10 +593,22 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
        }
        gmnal_return_stxd(nal_data, stxd);
        lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
        return;
 }
 
+/*
+ *     After an error on the port
+ *     call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+                                 gm_status_t status)
+{
+        gmnal_data_t    *nal_data;
+        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
+        CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+        gmnal_return_stxd(stxd->nal_data, stxd);
+        return;
+}
 
 
 void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, 
@@ -611,8 +628,8 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              context);
                GMNAL_GM_LOCK(nal_data);
        } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is 
-                      [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+                      "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
        }
 
 
@@ -644,9 +661,9 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] 
-              hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], 
-              iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+              "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        if (nal_cb)
@@ -729,8 +746,8 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               iov->iov_base, iov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               iov->iov_base, iov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -806,12 +823,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], 
-              cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+              "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
                nal_cb, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -846,8 +864,8 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               riov->iov_base, riov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               riov->iov_base, riov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -902,8 +920,8 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        int     ncalls = 0;
 
-       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], 
-              nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+              "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
 
 
        ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
@@ -958,8 +976,8 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                            srxd->gm_source_node, 
                                            &source_node) != GM_SUCCESS) {
 
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] 
-                              to local node_id\n", srxd->gm_source_node);
+                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+                              "to local node_id\n", srxd->gm_source_node);
                        GMNAL_GM_UNLOCK(nal_data);
                        return(GMNAL_STATUS_FAIL);
                }
@@ -1201,9 +1219,9 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        stxd->msg_size= sizeof(gmnal_msghdr_t);
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
index 1260629..31f6819 100644 (file)
@@ -30,6 +30,7 @@ int gmnal_small_msg_size = 525312;
  */
 int num_rx_threads = -1;
 int num_stxds = 5;
+int gm_port = 4;
 
 ptl_handle_ni_t        kgmnal_ni;
 
@@ -139,6 +140,7 @@ EXPORT_SYMBOL(kgmnal_ni);
 MODULE_PARM(gmnal_small_msg_size, "i");
 MODULE_PARM(num_rx_threads, "i");
 MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
 
 MODULE_AUTHOR("Morgan Doyle");
 
index 55606f3..6a52319 100644 (file)
@@ -117,8 +117,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -131,8 +131,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->stxd;
                nal_data->stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        for (i=0; i<=nrxt_stx; i++) {
@@ -146,8 +146,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -160,8 +160,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->rxt_stxd;
                nal_data->rxt_stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        /*
@@ -187,8 +187,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        CDEBUG(D_TRACE, "gmnal_free_small tx\n");
 
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -198,8 +198,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        }
         txd = nal_data->rxt_stxd;
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -392,22 +392,22 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
 #if 0
                PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], 
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+                              "size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
-               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] 
-                      rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
+               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
                       rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_LOCK(nal_data);
                gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
                                               GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
-                              index [%d]\n", rxbuffer, i);
+                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+                              index [%d]\n", rxbuffer, i);
                        switch(gm_status) {
                                case(GM_FAILURE):
                                        CDEBUG(D_ERROR, "GM_FAILURE\n");
@@ -432,8 +432,8 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -447,15 +447,15 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                if (gm_hash_insert(nal_data->srxd_hash, 
                                   (void*)rxbuffer, (void*)rxd)) {
 
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] 
-                              for rxbuffer[%p]\n", rxd, rxbuffer);
+                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+                              "for rxbuffer[%p]\n", rxd, rxbuffer);
                        return(GMNAL_STATUS_FAIL);
                }
 
                rxd->next = nal_data->srxd;
                nal_data->srxd = rxd;
-               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], 
-                      size [%d]\n", rxd, rxd->buffer, rxd->size);
+               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+                      "size [%d]\n", rxd, rxd->buffer, rxd->size);
        }
 
        return(GMNAL_STATUS_OK);
@@ -623,6 +623,8 @@ gmnal_stop_ctthread(gmnal_data_t *nal_data)
 char * 
 gmnal_gm_error(gm_status_t status)
 {
+       return(gm_strerror(status));
+
        switch(status) {
                case(GM_SUCCESS):
                        return("SUCCESS");
@@ -972,7 +974,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                }
                spin_lock(&nal_data->rxtwe_lock);
                if (nal_data->rxtwe_head) {
-                       CDEBUG(D_WARNING, "Got a work entry\n");
+                       CDEBUG(D_INFO, "Got a work entry\n");
                        we = nal_data->rxtwe_head;
                        nal_data->rxtwe_head = we->next;
                        if (!nal_data->rxtwe_head)
@@ -983,7 +985,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                spin_unlock(&nal_data->rxtwe_lock);
        } while (!we);
 
-       CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+       CDEBUG(D_INFO, "Returning we[%p]\n", we);
        return(we);
 }
 
index 90c9a95..3b3b5d4 100644 (file)
@@ -348,10 +348,10 @@ kqswnal_finalise (void)
                for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
                        kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
-                       /* If krx_pages[0] got allocated, it got mapped.
+                       /* If krx_kiov[0].kiov_page got allocated, it got mapped.  
                         * NB subsequent pages get merged */
 
-                       if (krx->krx_pages[0] != NULL)
+                       if (krx->krx_kiov[0].kiov_page != NULL)
                                ep_dvma_unload(kqswnal_data.kqn_ep,
                                               kqswnal_data.kqn_ep_rx_nmh,
                                               &krx->krx_elanbuffer);
@@ -416,8 +416,8 @@ kqswnal_finalise (void)
                        kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
                        for (j = 0; j < krx->krx_npages; j++)
-                               if (krx->krx_pages[j] != NULL)
-                                       __free_page (krx->krx_pages[j]);
+                               if (krx->krx_kiov[j].kiov_page != NULL)
+                                       __free_page (krx->krx_kiov[j].kiov_page);
                }
 
                PORTAL_FREE(kqswnal_data.kqn_rxds,
@@ -709,18 +709,19 @@ kqswnal_initialise (void)
                LASSERT (krx->krx_npages > 0);
                for (j = 0; j < krx->krx_npages; j++)
                {
-                       krx->krx_pages[j] = alloc_page(GFP_KERNEL);
-                       if (krx->krx_pages[j] == NULL)
-                       {
+                       struct page *page = alloc_page(GFP_KERNEL);
+                       
+                       if (page == NULL) {
                                kqswnal_finalise ();
                                return (-ENOMEM);
                        }
 
-                       LASSERT(page_address(krx->krx_pages[j]) != NULL);
+                       krx->krx_kiov[j].kiov_page = page;
+                       LASSERT(page_address(page) != NULL);
 
 #if MULTIRAIL_EKC
                        ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                                    page_address(krx->krx_pages[j]),
+                                    page_address(page),
                                     PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
                                     elan_page_idx, &all_rails, &elanbuffer);
                        
@@ -736,7 +737,7 @@ kqswnal_initialise (void)
 #else
                        elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
                                              kqswnal_data.kqn_eprxdmahandle,
-                                             page_address(krx->krx_pages[j]),
+                                             page_address(page),
                                              PAGE_SIZE, elan_page_idx,
                                              &elanbuffer);
                        if (j == 0)
index b1b9a45..5ebf30a 100644 (file)
@@ -153,8 +153,7 @@ typedef struct
         int              krx_rpc_reply_sent;    /* rpc reply sent */
         atomic_t         krx_refcount;          /* how to tell when rpc is done */
         kpr_fwd_desc_t   krx_fwd;               /* embedded forwarding descriptor */
-        struct page     *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
-        struct iovec     krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+        ptl_kiov_t       krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
 }  kqswnal_rx_t;
 
 typedef struct
index 478c25f..157dc70 100644 (file)
@@ -775,7 +775,7 @@ kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag,
                    int offset, int nob)
 {
         kqswnal_rx_t       *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        char               *buffer = (char *)page_address(krx->krx_pages[0]);
+        char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
         kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
         int                 rc;
 #if MULTIRAIL_EKC
@@ -1008,7 +1008,7 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         }
         memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
 #endif
-        
+
         if (kqswnal_data.kqn_optimized_gets &&
             type == PTL_MSG_GET &&              /* doing a GET */
             nid == targetnid) {                 /* not forwarding */
@@ -1167,7 +1167,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
 {
         int             rc;
         kqswnal_tx_t   *ktx;
-        struct iovec   *iov = fwd->kprfd_iov;
+        ptl_kiov_t     *kiov = fwd->kprfd_kiov;
         int             niov = fwd->kprfd_niov;
         int             nob = fwd->kprfd_nob;
         ptl_nid_t       nid = fwd->kprfd_gateway_nid;
@@ -1177,11 +1177,9 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         LBUG ();
 #endif
         /* The router wants this NAL to forward a packet */
-        CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+        CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
                 fwd, nid, niov, nob);
 
-        LASSERT (niov > 0);
-        
         ktx = kqswnal_get_idle_tx (fwd, 0);
         if (ktx == NULL)        /* can't get txd right now */
                 return;         /* fwd will be scheduled when tx desc freed */
@@ -1195,44 +1193,44 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
                 goto failed;
         }
 
-        if (nob > KQSW_NRXMSGBYTES_LARGE) {
-                CERROR ("Can't forward [%p] to "LPX64
-                        ": size %d bigger than max packet size %ld\n",
-                        fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
-                rc = -EMSGSIZE;
-                goto failed;
-        }
+        /* copy hdr into pre-mapped buffer */
+        memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
+        ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
 
-        ktx->ktx_port    = (nob <= (KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) ?
+        ktx->ktx_port    = (nob <= KQSW_SMALLPAYLOAD) ?
                            EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
         ktx->ktx_nid     = nid;
         ktx->ktx_state   = KTX_FORWARDING;
         ktx->ktx_args[0] = fwd;
+        ktx->ktx_nfrag   = ktx->ktx_firsttmpfrag = 1;
 
-        if ((kqswnal_data.kqn_copy_small_fwd || niov > 1) &&
-            nob <= KQSW_TX_BUFFER_SIZE) 
+        if (nob <= KQSW_TX_MAXCONTIG) 
         {
-                /* send from ktx's pre-mapped contiguous buffer? */
-                lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, 0, nob);
+                /* send payload from ktx's pre-mapped contiguous buffer */
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, nob);
+                              0, KQSW_HDR_SIZE + nob);
 #else
                 ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = nob;
+                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
 #endif
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
+                if (nob > 0)
+                        lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
+                                          niov, kiov, 0, nob);
         }
         else
         {
-                /* zero copy */
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-                rc = kqswnal_map_tx_iov (ktx, 0, nob, niov, iov);
+                /* zero copy payload */
+#if MULTIRAIL_EKC
+                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
+                              0, KQSW_HDR_SIZE);
+#else
+                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
+                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
+#endif
+                rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
                 if (rc != 0)
                         goto failed;
-
-                ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base;
         }
 
         rc = kqswnal_launch (ktx);
@@ -1257,7 +1255,7 @@ kqswnal_fwd_callback (void *arg, int error)
 
         if (error != 0)
         {
-                ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+                ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
 
                 CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
                        NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
@@ -1371,8 +1369,9 @@ kqswnal_requeue_rx (kqswnal_rx_t *krx)
 void
 kqswnal_rx (kqswnal_rx_t *krx)
 {
-        ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+        ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
         ptl_nid_t       dest_nid = NTOH__u64 (hdr->dest_nid);
+        int             payload_nob;
         int             nob;
         int             niov;
 
@@ -1398,16 +1397,26 @@ kqswnal_rx (kqswnal_rx_t *krx)
                 return;
         }
 
-        /* NB forwarding may destroy iov; rebuild every time */
-        for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
-        {
-                LASSERT (niov < krx->krx_npages);
-                krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
-                krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+        nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
+        niov = 0;
+        if (nob > 0) {
+                krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
+                krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
+                niov = 1;
+                nob -= PAGE_SIZE - KQSW_HDR_SIZE;
+                
+                while (nob > 0) {
+                        LASSERT (niov < krx->krx_npages);
+                        
+                        krx->krx_kiov[niov].kiov_offset = 0;
+                        krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
+                        niov++;
+                        nob -= PAGE_SIZE;
+                }
         }
 
-        kpr_fwd_init (&krx->krx_fwd, dest_nid,
-                      krx->krx_nob, niov, krx->krx_iov,
+        kpr_fwd_init (&krx->krx_fwd, dest_nid, 
+                      hdr, payload_nob, niov, krx->krx_kiov,
                       kqswnal_fwd_callback, krx);
 
         kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
@@ -1471,7 +1480,7 @@ kqswnal_rxhandler(EP_RXD *rxd)
 void
 kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
 {
-        ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+        ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
 
         CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
                 ", dpid %d, spid %d, type %d\n",
@@ -1526,6 +1535,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
                  size_t        rlen)
 {
         kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+        char         *buffer = page_address(krx->krx_kiov[0].kiov_page);
         int           page;
         char         *page_ptr;
         int           page_nob;
@@ -1535,8 +1545,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 #if KQSW_CHECKSUM
         kqsw_csum_t   senders_csum;
         kqsw_csum_t   payload_csum = 0;
-        kqsw_csum_t   hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
-                                           sizeof(ptl_hdr_t));
+        kqsw_csum_t   hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
         size_t        csum_len = mlen;
         int           csum_frags = 0;
         int           csum_nob = 0;
@@ -1545,8 +1554,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 
         atomic_inc (&csum_counter);
 
-        memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
-                                sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+        memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
         if (senders_csum != hdr_csum)
                 kqswnal_csum_error (krx, 1);
 #endif
@@ -1567,8 +1575,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 
         if (mlen != 0) {
                 page     = 0;
-                page_ptr = ((char *) page_address(krx->krx_pages[0])) +
-                        KQSW_HDR_SIZE;
+                page_ptr = buffer + KQSW_HDR_SIZE;
                 page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
 
                 LASSERT (niov > 0);
@@ -1621,7 +1628,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
                         {
                                 page++;
                                 LASSERT (page < krx->krx_npages);
-                                page_ptr = page_address(krx->krx_pages[page]);
+                                page_ptr = page_address(krx->krx_kiov[page].kiov_page);
                                 page_nob = PAGE_SIZE;
                         }
 
@@ -1649,8 +1656,8 @@ kqswnal_recvmsg (nal_cb_t     *nal,
         }
 
 #if KQSW_CHECKSUM
-        memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
-                sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+        memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), 
+                sizeof(kqsw_csum_t));
 
         if (csum_len != rlen)
                 CERROR("Unable to checksum data in user's buffer\n");
index c47dcb4..2c44b43 100644 (file)
@@ -1388,6 +1388,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private)
 void
 ksocknal_free_fmbs (ksock_fmb_pool_t *p)
 {
+        int          npages = p->fmp_buff_pages;
         ksock_fmb_t *fmb;
         int          i;
 
@@ -1399,12 +1400,12 @@ ksocknal_free_fmbs (ksock_fmb_pool_t *p)
                 fmb = list_entry(p->fmp_idle_fmbs.next,
                                  ksock_fmb_t, fmb_list);
                 
-                for (i = 0; i < fmb->fmb_npages; i++)
-                        if (fmb->fmb_pages[i] != NULL)
-                                __free_page(fmb->fmb_pages[i]);
-                
+                for (i = 0; i < npages; i++)
+                        if (fmb->fmb_kiov[i].kiov_page != NULL)
+                                __free_page(fmb->fmb_kiov[i].kiov_page);
+
                 list_del(&fmb->fmb_list);
-                PORTAL_FREE(fmb, sizeof(*fmb));
+                PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
         }
 }
 
@@ -1603,10 +1604,12 @@ ksocknal_module_init (void)
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+        ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
 
         spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+        ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
 
         spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
@@ -1690,34 +1693,36 @@ ksocknal_module_init (void)
 
                 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
                                  SOCKNAL_LARGE_FWD_NMSGS); i++) {
-                        ksock_fmb_t *fmb;
+                        ksock_fmb_t      *fmb;
+                        ksock_fmb_pool_t *pool;
+                        
+
+                        if (i < SOCKNAL_SMALL_FWD_NMSGS)
+                                pool = &ksocknal_data.ksnd_small_fmp;
+                        else
+                                pool = &ksocknal_data.ksnd_large_fmp;
                         
-                        PORTAL_ALLOC(fmb, sizeof(*fmb));
+                        PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, 
+                                                   fmb_kiov[pool->fmp_buff_pages]));
                         if (fmb == NULL) {
                                 ksocknal_module_fini();
                                 return (-ENOMEM);
                         }
 
-                        if (i < SOCKNAL_SMALL_FWD_NMSGS) {
-                                fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
-                                fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
-                        } else {
-                                fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
-                                fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
-                        }
-
-                        for (j = 0; j < fmb->fmb_npages; j++) {
-                                fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
+                        fmb->fmb_pool = pool;
+                        
+                        for (j = 0; j < pool->fmp_buff_pages; j++) {
+                                fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
 
-                                if (fmb->fmb_pages[j] == NULL) {
+                                if (fmb->fmb_kiov[j].kiov_page == NULL) {
                                         ksocknal_module_fini ();
                                         return (-ENOMEM);
                                 }
 
-                                LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
+                                LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
                         }
 
-                        list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+                        list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
                 }
         }
 
index 0f0b9bd..db8c842 100644 (file)
@@ -44,6 +44,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/irq.h>
 
 #include <linux/init.h>
 #include <linux/fs.h>
@@ -88,7 +89,7 @@
 
 #define SOCKNAL_SMALL_FWD_PAGES        1               /* # pages in a small message fwd buffer */
 
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
                                                /* # pages in a large message fwd buffer */
 
 #define SOCKNAL_RESCHED         100             /* # scheduler loops before reschedule */
@@ -115,6 +116,7 @@ typedef struct                                  /* pool of forwarding buffers */
         struct list_head  fmp_idle_fmbs;        /* free buffers */
         struct list_head  fmp_blocked_conns;    /* connections waiting for a buffer */
         int               fmp_nactive_fmbs;     /* # buffers in use */
+        int               fmp_buff_pages;       /* # pages per buffer */
 } ksock_fmb_pool_t;
 
 
@@ -193,18 +195,13 @@ typedef struct {
 #define SOCKNAL_INIT_ALL        3
 
 /* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments and 0 or more ptl_kiov_t fragments.  Forwarded
- * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
- * ptl_kiov_t fragments.  Messages from an MD with PTL_MD_KIOV set, have 1
- * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
- * fragments.
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more ptl_kiov_t fragments.
  *
  * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header).  Once the header has been received, if the message
- * requires forwarding or will be received into mapped memory, up to
- * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
- * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
- */
+ * receive (the header).  Once the header has been received, the payload is
+ * received into either struct iovec or ptl_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
 
 struct ksock_conn;                              /* forward ref */
 struct ksock_peer;                              /* forward ref */
@@ -227,6 +224,12 @@ typedef struct                                  /* transmit packet */
 #endif
 } ksock_tx_t;
 
+typedef struct                                  /* forwarded packet */
+{
+        ksock_tx_t             ftx_tx;          /* send info */
+        struct iovec           ftx_iov;         /* hdr iovec */
+} ksock_ftx_t;
+
 #define KSOCK_ZCCD_2_TX(ptr)   list_entry (ptr, ksock_tx_t, tx_zccd)
 /* network zero copy callback descriptor embedded in ksock_tx_t */
 
@@ -254,15 +257,14 @@ typedef struct                                  /* Kernel portals Socket Forward
 {                                               /* (socknal->router) */
         struct list_head        fmb_list;       /* queue idle */
         kpr_fwd_desc_t          fmb_fwd;        /* router's descriptor */
-        int                     fmb_npages;     /* # pages allocated */
         ksock_fmb_pool_t       *fmb_pool;       /* owning pool */
         struct ksock_peer      *fmb_peer;       /* peer received from */
-        struct page            *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
-        struct iovec            fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+        ptl_hdr_t               fmb_hdr;        /* message header */
+        ptl_kiov_t              fmb_kiov[0];    /* payload frags */
 } ksock_fmb_t;
 
 /* space for the rx frag descriptors; we either read a single contiguous
- * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+ * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
 typedef union {
         struct iovec    iov[PTL_MD_MAX_IOV];
         ptl_kiov_t      kiov[PTL_MD_MAX_IOV];
index c6cdaba..c89e20e 100644 (file)
@@ -123,7 +123,7 @@ ksocknal_free_ltx (ksock_ltx_t *ltx)
         PORTAL_FREE(ltx, ltx->ltx_desc_size);
 }
 
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
 struct page *
 ksocknal_kvaddr_to_page (unsigned long vaddr)
 {
@@ -159,7 +159,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         int            more = (tx->tx_niov > 1) || 
                               (tx->tx_nkiov > 0) ||
                               (!list_empty (&conn->ksnc_tx_queue));
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
         int            offset = vaddr & (PAGE_SIZE - 1);
         int            zcsize = MIN (fragsize, PAGE_SIZE - offset);
         struct page   *page;
@@ -171,7 +171,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         LASSERT (fragsize <= tx->tx_resid);
         LASSERT (tx->tx_niov > 0);
         
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
         if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
             (sock->sk->route_caps & NETIF_F_SG) &&
             (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
@@ -1133,7 +1133,7 @@ void
 ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
 {
         ptl_nid_t     nid = fwd->kprfd_gateway_nid;
-        ksock_tx_t   *tx  = (ksock_tx_t *)&fwd->kprfd_scratch;
+        ksock_ftx_t  *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
         int           rc;
         
         CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
@@ -1143,14 +1143,18 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (nid == ksocknal_lib.ni.nid)
                 nid = fwd->kprfd_target_nid;
 
-        tx->tx_isfwd = 1;                   /* This is a forwarding packet */
-        tx->tx_nob   = fwd->kprfd_nob;
-        tx->tx_niov  = fwd->kprfd_niov;
-        tx->tx_iov   = fwd->kprfd_iov;
-        tx->tx_nkiov = 0;
-        tx->tx_kiov  = NULL;
+        /* setup iov for hdr */
+        ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
+        ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
+
+        ftx->ftx_tx.tx_isfwd = 1;                  /* This is a forwarding packet */
+        ftx->ftx_tx.tx_nob   = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
+        ftx->ftx_tx.tx_niov  = 1;
+        ftx->ftx_tx.tx_iov   = &ftx->ftx_iov;
+        ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
+        ftx->ftx_tx.tx_kiov  = fwd->kprfd_kiov;
 
-        rc = ksocknal_launch_packet (tx, nid);
+        rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
         if (rc != 0)
                 kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
 }
@@ -1178,7 +1182,7 @@ ksocknal_fmb_callback (void *arg, int error)
 {
         ksock_fmb_t       *fmb = (ksock_fmb_t *)arg;
         ksock_fmb_pool_t  *fmp = fmb->fmb_pool;
-        ptl_hdr_t         *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+        ptl_hdr_t         *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
         ksock_conn_t      *conn = NULL;
         ksock_sched_t     *sched;
         unsigned long      flags;
@@ -1236,7 +1240,6 @@ ksock_fmb_t *
 ksocknal_get_idle_fmb (ksock_conn_t *conn)
 {
         int               payload_nob = conn->ksnc_rx_nob_left;
-        int               packet_nob = sizeof (ptl_hdr_t) + payload_nob;
         unsigned long     flags;
         ksock_fmb_pool_t *pool;
         ksock_fmb_t      *fmb;
@@ -1244,7 +1247,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
         LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
 
-        if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+        if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
                 pool = &ksocknal_data.ksnd_small_fmp;
         else
                 pool = &ksocknal_data.ksnd_large_fmp;
@@ -1275,98 +1278,64 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
 int
 ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
 {
-        int payload_nob = conn->ksnc_rx_nob_left;
-        int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+        int       payload_nob = conn->ksnc_rx_nob_left;
         ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
-        int niov;                               /* at least the header */
-        int nob;
+        int       niov = 0;
+        int       nob = payload_nob;
 
         LASSERT (conn->ksnc_rx_scheduled);
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
         LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
         LASSERT (payload_nob >= 0);
-        LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+        LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
         LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
-        /* Got a forwarding buffer; copy the header we just read into the
-         * forwarding buffer.  If there's payload, start reading reading it
-         * into the buffer, otherwise the forwarding buffer can be kicked
-         * off immediately.
-         *
-         * NB fmb->fmb_iov spans the WHOLE packet.
-         *    conn->ksnc_rx_iov spans just the payload.
-         */
-        fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
-        /* copy header */
-        memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+        LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
 
         /* Take a ref on the conn's peer to prevent module unload before
-         * forwarding completes.  NB we ref peer and not conn since because
-         * all refs on conn after it has been closed must remove themselves
-         * in finite time */
+         * forwarding completes. */
         fmb->fmb_peer = conn->ksnc_peer;
         atomic_inc (&conn->ksnc_peer->ksnp_refcount);
 
-        if (payload_nob == 0) {         /* got complete packet already */
-                CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
-                        conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
-                        dest_nid, packet_nob);
+        /* Copy the header we just read into the forwarding buffer.  If
+         * there's payload, start reading reading it into the buffer,
+         * otherwise the forwarding buffer can be kicked off
+         * immediately. */
+        fmb->fmb_hdr = conn->ksnc_hdr;
 
-                fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+        while (nob > 0) {
+                LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
+                LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
+                fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
+                nob -= PAGE_SIZE;
+                niov++;
+        }
+
+        kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
+                     payload_nob, niov, fmb->fmb_kiov,
+                     ksocknal_fmb_callback, fmb);
 
-                kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
-                              packet_nob, 1, fmb->fmb_iov,
-                              ksocknal_fmb_callback, fmb);
+        if (payload_nob == 0) {         /* got complete packet already */
+                CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
+                        conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid);
 
-                /* forward it now */
                 kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
 
                 ksocknal_new_packet (conn, 0);  /* on to next packet */
                 return (1);
         }
 
-        niov = 1;
-        if (packet_nob <= PAGE_SIZE) {  /* whole packet fits in first page */
-                fmb->fmb_iov[0].iov_len = packet_nob;
-        } else {
-                fmb->fmb_iov[0].iov_len = PAGE_SIZE;
-                nob = packet_nob - PAGE_SIZE;
-
-                do {
-                        LASSERT (niov < fmb->fmb_npages);
-                        fmb->fmb_iov[niov].iov_base =
-                                page_address (fmb->fmb_pages[niov]);
-                        fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
-                        nob -= PAGE_SIZE;
-                        niov++;
-                } while (nob > 0);
-        }
-
-        kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
-                      packet_nob, niov, fmb->fmb_iov,
-                      ksocknal_fmb_callback, fmb);
-
         conn->ksnc_cookie = fmb;                /* stash fmb for later */
         conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
         
-        /* payload is desc's iov-ed buffer, but skipping the hdr */
-        LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
-                 sizeof (struct iovec));
-
-        conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-        conn->ksnc_rx_iov[0].iov_base =
-                (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
-                         sizeof (ptl_hdr_t));
-        conn->ksnc_rx_iov[0].iov_len =
-                fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
-        if (niov > 1)
-                memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
-                       (niov - 1) * sizeof (struct iovec));
-
-        conn->ksnc_rx_niov = niov;
+        /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
+         * buffer */
+        LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
 
+        conn->ksnc_rx_niov = 0;
+        conn->ksnc_rx_nkiov = niov;
+        conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+        memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
+        
         CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
                 NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
         return (0);
index e29f628..d0dbf0a 100644 (file)
@@ -456,14 +456,13 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
         CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
                 target_nid, src_ne->kpne_interface.kprni_nalid);
 
-        LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
-        LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+        LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
         
         atomic_inc (&kpr_queue_depth);
        atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
 
         kpr_fwd_packets++;                   /* (loose) stats accounting */
-        kpr_fwd_bytes += nob;
+        kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
 
        if (src_ne->kpne_shutdown)           /* caller is shutting down */
                goto out;
index 6c31b3d..925406f 100644 (file)
@@ -26,11 +26,11 @@ libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
 gmnalnid_SOURCES = gmnalnid.c
 
 ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD =  -L. -lptlctl -lncurses # -lefence
+ptlctl_LDADD =  -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
 ptlctl_DEPENDENCIES = libptlctl.a
 
 debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
 debugctl_DEPENDENCIES = libptlctl.a
 
 routerstat_SOURCES = routerstat.c
index 84ac97f..ff6631c 100644 (file)
@@ -115,5 +115,5 @@ int main(int argc, char **argv)
        free(pcfg.pcfg_pbuf1);
        close(pfd);
        printf("%u\n", nid);
-        exit(nid);
+        exit(0);
 }
index 274a565..a3d29e6 100644 (file)
@@ -1,3 +1,13 @@
+tbd  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.1
+       * bug fixes
+       - fixes for glimpse AST timeouts / incorrectly 0-sized files (2818)
+       - don't overwrite extent policy data in reply if lock was blocked (2901)
+       - drop filter export grants atomically with removal from device (2663)
+       - del obd_self_export from work_list in class_disconnect_exports (2908)
+       - don't LBUG if MDS recovery times out during orphan cleanup (2530)
+       - swab reply message in mdc_close, other PPC fixes (2464)
+
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
        * bug fixes
index d8c84be..4f230d2 100644 (file)
@@ -119,8 +119,6 @@ static inline int lustre_cfg_pack(struct lustre_cfg *data, char **pbuf,
                 LOGL(data->lcfg_inlbuf3, data->lcfg_inllen3, ptr);
         if (data->lcfg_inlbuf4)
                 LOGL(data->lcfg_inlbuf4, data->lcfg_inllen4, ptr);
-//        if (lustre_cfg_is_invalid(overlay))
-//                return 1;
 
        *plen = len;
 
@@ -200,14 +198,11 @@ static inline int lustre_cfg_getdata(char **buf, int len, void *arg, int kernel)
         lcfg = (struct lustre_cfg *)*buf;
 
         if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) {
-                CERROR("Version mismatch kernel vs application\n");
+                CERROR("Version mismatch kernel: %#x application: %#x\n",
+                       LUSTRE_CFG_VERSION, lcfg->lcfg_version);
                 return -EINVAL;
         }
 
-//        if (lustre_cfg_is_invalid(data)) {
-//                CERROR("ioctl not correctly formatted\n");
-//                return -EINVAL;
-//        }
 
         if (lcfg->lcfg_dev_name) {
                 lcfg->lcfg_dev_name = &lcfg->lcfg_bulk[0];
index b8515a3..d85d7a1 100644 (file)
@@ -29,6 +29,7 @@ typedef enum {
         ELDLM_LOCK_CHANGED = 300,
         ELDLM_LOCK_ABORTED = 301,
         ELDLM_LOCK_REPLACED = 302,
+        ELDLM_NO_LOCK_DATA = 303,
 
         ELDLM_NAMESPACE_EXISTS = 400,
         ELDLM_BAD_NAMESPACE    = 401
@@ -144,7 +145,7 @@ typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
 struct ldlm_valblock_ops {
         int (*lvbo_init)(struct ldlm_resource *res);
         int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m,
-                           int buf_idx);
+                           int buf_idx, int increase);
 };
 
 struct ldlm_namespace {
index 24ee1c2..242498e 100644 (file)
@@ -55,6 +55,7 @@ struct lov_oinfo {                 /* per-stripe data structure */
         struct list_head loi_cli_item;
         struct list_head loi_write_item;
 
+        int loi_kms_valid:1;
         __u64 loi_kms; /* known minimum size */
         __u64 loi_rss; /* recently seen size */
         __u64 loi_mtime; /* recently seen mtime */
index abb6bbe..d11bec0 100644 (file)
@@ -50,5 +50,5 @@ kernel_text_address-2.4.20-vanilla.patch
 ext3-xattr-ptr-arith-fix.patch
 gfp_memalloc-2.4.22.patch
 procfs-ndynamic-2.4.patch
-linux-2.4.20-tmpfs-xattr.patch
+linux-2.4.20-filemap.patch
 ext3-truncate-buffer-head.patch
index 0e7f0b0..b5b0e33 100644 (file)
@@ -752,11 +752,13 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
         lock->l_completion_ast = completion;
         lock->l_glimpse_ast = glimpse;
 
-        lock->l_lvb_len = lvb_len;
-        OBD_ALLOC(lock->l_lvb_data, lvb_len);
-        if (lock->l_lvb_data == NULL) {
-                OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
-                RETURN(NULL);
+        if (lvb_len) {
+                lock->l_lvb_len = lvb_len;
+                OBD_ALLOC(lock->l_lvb_data, lvb_len);
+                if (lock->l_lvb_data == NULL) {
+                        OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
+                        RETURN(NULL);
+                }
         }
 
         RETURN(lock);
index 5765d8c..6602713 100644 (file)
@@ -37,6 +37,7 @@
 
 #include <linux/lustre_dlm.h>
 #include <linux/obd_class.h>
+#include <portals/list.h>
 #include "ldlm_internal.h"
 
 extern kmem_cache_t *ldlm_resource_slab;
@@ -547,12 +548,15 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         } else if (rc == -EINVAL) {
                 LDLM_DEBUG(lock, "lost the race -- client no longer has this "
                            "lock");
+        } else if (rc == -ELDLM_NO_LOCK_DATA) {
+                LDLM_DEBUG(lock, "lost a race -- client has a lock, but no "
+                           "inode");
         } else if (rc) {
                 LDLM_ERROR(lock, "client sent rc %d rq_status %d from "
                            "glimpse AST", rc, req->rq_status);
         } else {
-                rc = res->lr_namespace->ns_lvbo->lvbo_update(res,
-                                                             req->rq_repmsg, 0);
+                rc = res->lr_namespace->ns_lvbo->lvbo_update
+                        (res, req->rq_repmsg, 0, 1);
         }
         ptlrpc_req_finished(req);
         RETURN(rc);
@@ -767,7 +771,7 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
                 if (res && res->lr_namespace->ns_lvbo &&
                     res->lr_namespace->ns_lvbo->lvbo_update) {
                         (void)res->lr_namespace->ns_lvbo->lvbo_update
-                                (res, NULL, 0);
+                                (res, NULL, 0, 0);
                                 //(res, req->rq_reqmsg, 1);
                 }
 
@@ -840,9 +844,12 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                 lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
                 LDLM_DEBUG(lock, "completion AST, new lock mode");
         }
-        if (lock->l_resource->lr_type != LDLM_PLAIN)
+
+        if (lock->l_resource->lr_type != LDLM_PLAIN) {
                 memcpy(&lock->l_policy_data, &dlm_req->lock_desc.l_policy_data,
                        sizeof(lock->l_policy_data));
+                LDLM_DEBUG(lock, "completion AST, new policy data");
+        }
 
         ldlm_resource_unlink_lock(lock);
         if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
@@ -889,6 +896,7 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
                                     struct ldlm_request *dlm_req,
                                     struct ldlm_lock *lock)
 {
+        int rc = -ENOSYS;
         ENTRY;
 
         l_lock(&ns->ns_lock);
@@ -897,10 +905,17 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
         if (lock->l_glimpse_ast != NULL) {
                 l_unlock(&ns->ns_lock);
                 l_check_no_ns_lock(ns);
-                lock->l_glimpse_ast(lock, req);
+                rc = lock->l_glimpse_ast(lock, req);
                 l_lock(&ns->ns_lock);
         }
 
+        if (req->rq_repmsg != NULL) {
+                ptlrpc_reply(req);
+        } else {
+                req->rq_status = rc;
+                ptlrpc_error(req);
+        }
+
         if (lock->l_granted_mode == LCK_PW &&
             !lock->l_readers && !lock->l_writers &&
             time_after(jiffies, lock->l_last_used + 10 * HZ)) {
index 01e4562..a996da6 100644 (file)
@@ -333,17 +333,6 @@ int ldlm_cli_enqueue(struct obd_export *exp,
 
         CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n",
                lock, reply->lock_handle.cookie, *flags);
-        if (type == LDLM_EXTENT) {
-                CDEBUG(D_INFO, "requested extent: "LPU64" -> "LPU64", got "
-                       "extent "LPU64" -> "LPU64"\n",
-                       body->lock_desc.l_policy_data.l_extent.start,
-                       body->lock_desc.l_policy_data.l_extent.end,
-                       reply->lock_desc.l_policy_data.l_extent.start,
-                       reply->lock_desc.l_policy_data.l_extent.end);
-        }
-        if (policy != NULL)
-                memcpy(&lock->l_policy_data, &reply->lock_desc.l_policy_data,
-                       sizeof(reply->lock_desc.l_policy_data));
 
         /* If enqueue returned a blocked lock but the completion handler has
          * already run, then it fixed up the resource and we don't need to do it
@@ -372,7 +361,14 @@ int ldlm_cli_enqueue(struct obd_export *exp,
                         }
                         LDLM_DEBUG(lock, "client-side enqueue, new resource");
                 }
+                if (policy != NULL)
+                        memcpy(&lock->l_policy_data,
+                               &reply->lock_desc.l_policy_data,
+                               sizeof(reply->lock_desc.l_policy_data));
+                if (type != LDLM_PLAIN)
+                        LDLM_DEBUG(lock,"client-side enqueue, new policy data");
         }
+
         if ((*flags) & LDLM_FL_AST_SENT) {
                 l_lock(&ns->ns_lock);
                 lock->l_flags |= LDLM_FL_CBPENDING;
index 2cbc22e..9e487d5 100644 (file)
@@ -549,64 +549,50 @@ int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 }
 #endif
 
-/* This function is a disaster.  I hate the LOV. */
 static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
 {
         struct ptlrpc_request *req = reqp;
         struct inode *inode = ll_inode_from_lock(lock);
-        struct obd_export *exp;
         struct ll_inode_info *lli;
         struct ost_lvb *lvb;
-        struct {
-                int stripe_number;
-                __u64 size;
-                struct lov_stripe_md *lsm;
-        } data;
-        __u32 vallen = sizeof(data);
-        int rc, size = sizeof(*lvb);
+        int rc, size = sizeof(*lvb), stripe = 0;
         ENTRY;
 
         if (inode == NULL)
-                RETURN(0);
+                GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
         lli = ll_i2info(inode);
         if (lli == NULL)
-                goto iput;
+                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
         if (lli->lli_smd == NULL)
-                goto iput;
-        exp = ll_i2obdexp(inode);
+                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
 
         /* First, find out which stripe index this lock corresponds to. */
         if (lli->lli_smd->lsm_stripe_count > 1)
-                data.stripe_number = ll_lock_to_stripe_offset(inode, lock);
-        else
-                data.stripe_number = 0;
-
-        data.size = inode->i_size;
-        data.lsm = lli->lli_smd;
-
-        rc = obd_get_info(exp, strlen("size_to_stripe"), "size_to_stripe",
-                          &vallen, &data);
-        if (rc != 0) {
-                CERROR("obd_get_info: rc = %d\n", rc);
-                LBUG();
-        }
-
-        LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> size "LPU64,
-                   inode->i_size, data.stripe_number, data.size);
+                stripe = ll_lock_to_stripe_offset(inode, lock);
 
         rc = lustre_pack_reply(req, 1, &size, NULL);
         if (rc) {
                 CERROR("lustre_pack_reply: %d\n", rc);
-                goto iput;
+                GOTO(iput, rc);
         }
 
         lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb));
-        lvb->lvb_size = data.size;
-        ptlrpc_reply(req);
+        lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
 
+        LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
+                   inode->i_size, stripe, lvb->lvb_size);
+        GOTO(iput, 0);
  iput:
         iput(inode);
-        RETURN(0);
+
+ out:
+        /* These errors are normal races, so we don't want to fill the console
+         * with messages by calling ptlrpc_error() */
+        if (rc == -ELDLM_NO_LOCK_DATA)
+                lustre_pack_reply(req, 0, NULL, NULL);
+
+        req->rq_status = rc;
+        return rc;
 }
 
 __u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
index c17ad63..c9cf119 100644 (file)
@@ -462,7 +462,14 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
         exp = class_conn2export(&mdc_conn);
 
         ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
+#if 1
         rc = class_config_parse_llog(ctxt, profile, cfg);
+#else
+        /*
+         * For debugging, it's useful to just dump the log
+         */
+        rc = class_config_dump_llog(ctxt, profile, cfg);
+#endif
         if (rc) {
                 CERROR("class_config_parse_llog failed: rc = %d\n", rc);
         }
index 4c59d71..5784eb8 100644 (file)
@@ -104,6 +104,7 @@ int ll_set_inode(struct inode *inode, void *opaque)
         ll_read_inode2(inode, opaque);
         return 0;
 }
+
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
                       struct lustre_md *md)
 {
index 92d862f..b0be68f 100644 (file)
@@ -566,17 +566,16 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa,
 
                 if (ost_uuid && !obd_uuid_equals(ost_uuid, &lov->tgts[i].uuid))
                         continue;
-                
-                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
 
+                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+                
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 err = obd_create(lov->tgts[i].ltd_exp, tmp_oa, &obj_mdp, oti);
-                if (err) {
+                if (err)
+                        /* This export will be disabled until it is recovered,
+                           and then orphan recovery will be completed. */
                         CERROR("error in orphan recovery on OST idx %d/%d: "
                                "rc = %d\n", i, lov->desc.ld_tgt_count, err);
-                        if (!rc)
-                                rc = err;
-                }
 
                 if (ost_uuid)
                         break;
@@ -603,7 +602,7 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
 
         LASSERT(ea != NULL);
 
-        if ((src_oa->o_valid & OBD_MD_FLFLAGS) && 
+        if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
             src_oa->o_flags == OBD_FL_DELORPHAN) {
                 rc = lov_clear_orphans(exp, src_oa, ea, oti);
                 RETURN(rc);
@@ -617,7 +616,7 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
         if (!lov->desc.ld_active_tgt_count)
                 RETURN(-EIO);
 
-        /* Recreate a specific object id at the given OST index */ 
+        /* Recreate a specific object id at the given OST index */
         if (src_oa->o_valid & OBD_MD_FLFLAGS && src_oa->o_flags &
                                                 OBD_FL_RECREATE_OBJS) {
                  struct lov_stripe_md obj_md;
@@ -639,7 +638,8 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
                  if (i == lsm->lsm_stripe_count)
                          RETURN(-EINVAL);
 
-                 rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti);
+                 rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa,
+                                 &obj_mdp, oti);
                  RETURN(rc);
         }
 
@@ -690,14 +690,14 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
         }
 
         if (*ea == NULL || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
-                if (ost_start_count <= 0) {
+                if (--ost_start_count <= 0) {
                         ost_start_idx = ll_insecure_random_int();
                         ost_start_count = LOV_CREATE_RESEED_INTERVAL;
-                } else {
-                        --ost_start_count;
-                        ost_start_idx += lsm->lsm_stripe_count;
-                        if (lsm->lsm_stripe_count == ost_count)
-                                ++ost_start_idx;
+                } else if (lsm->lsm_stripe_count >=
+                           lov->desc.ld_active_tgt_count) {
+                        /* If we allocate from all of the stripes, make the
+                         * next file start on the next OST. */
+                        ++ost_start_idx;
                 }
                 ost_idx = ost_start_idx % ost_count;
         } else {
@@ -721,6 +721,7 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
                 struct lov_stripe_md *obj_mdp = &obj_md;
                 int err;
 
+                ++ost_start_idx;
                 if (lov->tgts[ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
                         continue;
@@ -2055,6 +2056,7 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                 /* XXX LOV STACKING: submd should be from the subobj */
                 submd->lsm_object_id = loi->loi_id;
                 submd->lsm_stripe_count = 0;
+                submd->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
                 submd->lsm_oinfo->loi_rss = loi->loi_rss;
                 submd->lsm_oinfo->loi_kms = loi->loi_kms;
                 loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
@@ -2077,14 +2079,16 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
 
                         LASSERT(lock != NULL);
                         loi->loi_rss = tmp;
-                        // Extend KMS up to the end of this lock, and no further
+                        /* Extend KMS up to the end of this lock and no further
+                         * A lock on [x,y] means a KMS of up to y + 1 bytes! */
                         if (tmp > lock->l_policy_data.l_extent.end)
                                 tmp = lock->l_policy_data.l_extent.end + 1;
-                        if (tmp > loi->loi_kms) {
+                        if (tmp >= loi->loi_kms) {
                                 CDEBUG(D_INODE, "lock acquired, setting rss="
                                        LPU64", kms="LPU64"\n", loi->loi_rss,
                                        tmp);
                                 loi->loi_kms = tmp;
+                                loi->loi_kms_valid = 1;
                         } else {
                                 CDEBUG(D_INODE, "lock acquired, setting rss="
                                        LPU64"; leaving kms="LPU64", end="LPU64
@@ -2619,8 +2623,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
                         int er;
 
-                        if (!lov->tgts[i].active)
-                                continue;
+                        /* initialize all OSCs, even inactive ones */
 
                         er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
                                           sizeof(obd_id), ((obd_id*)val) + i);
index c692def..e1c3fed 100644 (file)
@@ -507,6 +507,11 @@ int mdc_close(struct obd_export *exp, struct obdo *obdo,
                         CERROR("Unexpected: can't find mdc_open_data, but the "
                                "close succeeded.  Please tell CFS.\n");
                 }
+                if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
+                                        lustre_swab_mds_body)) {
+                        CERROR("Error unpacking mds_body\n");
+                        rc = -EPROTO;
+                }
         }
         if (req->rq_async_args.pointer_arg[0] != NULL) {
                 CERROR("returned without dropping rpc_lock: rc %d\n", rc);
index 0e9d2f0..3520849 100644 (file)
@@ -159,20 +159,8 @@ int mds_lov_set_nextid(struct obd_device *obd)
                 GOTO(out, rc);
 
         rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
-        if (rc < 0)
-                GOTO(out, rc);
 
 out:
-        if (rc && mds->mds_lov_objids) {
-                /* Might as well crash here, until we figure out what to do.
-                 * If we OBD_FREE, we'll just LASSERT the next time through this
-                 * function. */
-                LBUG();
-                OBD_FREE(mds->mds_lov_objids,
-                         mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id));
-                mds->mds_lov_objids = NULL;
-        }
-
         RETURN(rc);
 }
 
index 8d49420..e959402 100644 (file)
@@ -28,6 +28,7 @@
 #endif
 #define DEBUG_SUBSYSTEM S_MDS
 
+#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/version.h>
index 5088abb..98ae3b5 100644 (file)
@@ -630,6 +630,9 @@ void class_disconnect_exports(struct obd_device *obd, int flags)
                         CDEBUG(D_HA,
                                "exp %p export uuid == obd uuid, don't discon\n",
                                exp);
+                        /* Need to delete this now so we don't end up pointing
+                         * to work_list later when this export is cleaned up. */
+                        list_del_init(&exp->exp_obd_chain);
                         class_export_put(exp);
                         continue;
                 }
index 9156dc8..41f2258 100644 (file)
@@ -545,7 +545,7 @@ static int class_config_llog_handler(struct llog_handle * handle,
         int cfg_len = rec->lrh_len;
         char *cfg_buf = (char*) (rec + 1);
         int rc = 0;
-
+        ENTRY;
         if (rec->lrh_type == OBD_CFG_REC) {
                 char *buf;
                 struct lustre_cfg *lcfg;
@@ -641,7 +641,7 @@ static int class_config_dump_handler(struct llog_handle * handle,
         int cfg_len = rec->lrh_len;
         char *cfg_buf = (char*) (rec + 1);
         int rc = 0;
-
+        ENTRY;
         if (rec->lrh_type == OBD_CFG_REC) {
                 char *buf;
                 struct lustre_cfg *lcfg;
@@ -702,6 +702,9 @@ static int class_config_dump_handler(struct llog_handle * handle,
                 if (pcfg->pcfg_flags)
                         CDEBUG(D_INFO, "       flags: %x\n",
                                pcfg->pcfg_flags);
+        } else {
+                CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
+                rc = -EINVAL;
         }
 out:
         RETURN(rc);
index 093f3ac..9248472 100644 (file)
@@ -392,7 +392,8 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
         LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
 
         for (i = 0; i < objcount; i++, obj++) {
-                int verify = obj->ioo_id != ECHO_PERSISTENT_OBJID;
+                int verify = (rc == 0 &&
+                              obj->ioo_id != ECHO_PERSISTENT_OBJID);
                 int j;
 
                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) {
index 0e8e458..d2f6369 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/lprocfs_status.h>
 #include <linux/lustre_log.h>
 #include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
 
 #include "filter_internal.h"
 
@@ -1136,12 +1137,12 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
 
         LASSERT(l->l_glimpse_ast != NULL);
         rc = l->l_glimpse_ast(l, NULL); /* this will update the LVB */
+        if (rc != 0 && res->lr_namespace->ns_lvbo &&
+            res->lr_namespace->ns_lvbo->lvbo_update) {
+                res->lr_namespace->ns_lvbo->lvbo_update(res, NULL, 0, 1);
+        }
 
         down(&res->lr_lvb_sem);
-#if 0
-        if (res_lvb->lvb_size == reply_lvb->lvb_size)
-                LDLM_ERROR(l, "we lost the glimpse race!");
-#endif
         reply_lvb->lvb_size = res_lvb->lvb_size;
         up(&res->lr_lvb_sem);
 
@@ -1449,23 +1450,29 @@ static void filter_grant_sanity_check(struct obd_device *obd, char *func)
         spin_unlock(&obd->obd_osfs_lock);
 
         /* Do these assertions outside the spinlocks so we don't kill system */
-        LASSERTF(tot_granted == fo_tot_granted, "%s "LPU64" != "LPU64"\n",
-                 func, tot_granted, fo_tot_granted);
-        LASSERTF(tot_pending == fo_tot_pending, "%s "LPU64" != "LPU64"\n",
-                 func, tot_pending, fo_tot_pending);
-        LASSERTF(tot_dirty == fo_tot_dirty, "%s "LPU64" != "LPU64"\n",
-                 func, tot_dirty, fo_tot_dirty);
-        LASSERTF(tot_pending <= tot_granted, "%s "LPU64" > "LPU64"\n",
-                 func, tot_pending, tot_granted);
-        LASSERTF(tot_granted <= maxsize, "%s "LPU64" > "LPU64"\n",
-                 func, tot_granted, maxsize);
-        LASSERTF(tot_dirty <= maxsize, "%s "LPU64" > "LPU64"\n",
-                 func, tot_dirty, maxsize);
+        if (tot_granted != fo_tot_granted)
+                CERROR("%s: tot_granted "LPU64" != fo_tot_granted "LPU64"\n",
+                       func, tot_granted, fo_tot_granted);
+        if (tot_pending != fo_tot_pending)
+                CERROR("%s: tot_pending "LPU64" != fo_tot_pending "LPU64"\n",
+                       func, tot_pending, fo_tot_pending);
+        if (tot_dirty != fo_tot_dirty)
+                CERROR("%s: tot_dirty "LPU64" != fo_tot_dirty "LPU64"\n",
+                       func, tot_dirty, fo_tot_dirty);
+        if (tot_pending > tot_granted)
+                CERROR("%s: tot_pending "LPU64" > tot_granted "LPU64"\n",
+                       func, tot_pending, tot_granted);
+        if (tot_granted > maxsize)
+                CERROR("%s: tot_granted "LPU64" > maxsize "LPU64"\n",
+                       func, tot_granted, maxsize);
+        if (tot_dirty > maxsize)
+                CERROR("%s: tot_dirty "LPU64" > maxsize "LPU64"\n",
+                       func, tot_dirty, maxsize);
 }
 
-/* Remove this client from the grant accounting totals.  This is done at
- * disconnect time and also at export destroy time in case there was a race
- * between removing the export and an incoming BRW updating the client grant.
+/* Remove this client from the grant accounting totals.  We also remove
+ * the export from the obd device under the osfs and dev locks to ensure
+ * that the filter_grant_sanity_check() calculations are always valid.
  * The client should do something similar when it invalidates its import. */
 static void filter_grant_discard(struct obd_export *exp)
 {
@@ -1474,6 +1481,10 @@ static void filter_grant_discard(struct obd_export *exp)
         struct filter_export_data *fed = &exp->exp_filter_data;
 
         spin_lock(&obd->obd_osfs_lock);
+        spin_lock(&exp->exp_obd->obd_dev_lock);
+        list_del_init(&exp->exp_obd_chain);
+        spin_unlock(&exp->exp_obd->obd_dev_lock);
+
         CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
                obd->obd_name, exp->exp_client_uuid.uuid, exp,
                fed->fed_dirty, fed->fed_pending, fed->fed_grant);
@@ -1512,7 +1523,9 @@ static int filter_destroy_export(struct obd_export *exp)
         if (exp->exp_obd->obd_replayable)
                 filter_client_free(exp, exp->exp_flags);
 
-        filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
+        filter_grant_discard(exp);
+        if (!(exp->exp_flags & OBD_OPT_FORCE))
+                filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
 
         RETURN(0);
 }
@@ -1533,15 +1546,13 @@ static int filter_disconnect(struct obd_export *exp, int flags)
         exp->exp_flags = flags;
         spin_unlock_irqrestore(&exp->exp_lock, irqflags);
 
+        if (!(flags & OBD_OPT_FORCE))
+                filter_grant_sanity_check(obd, __FUNCTION__);
         filter_grant_discard(exp);
 
         /* Disconnect early so that clients can't keep using export */
         rc = class_disconnect(exp, flags);
 
-        /* Do this twice in case a BRW arrived between the first call and
-         * the class_export_unlink() call (bug 2663) */
-        filter_grant_discard(exp);
-
         ldlm_cancel_locks_for_export(exp);
 
         fsfilt_sync(obd, obd->u.filter.fo_sb);
@@ -1665,7 +1676,7 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa,
                         if (res->lr_namespace->ns_lvbo &&
                             res->lr_namespace->ns_lvbo->lvbo_update) {
                                 rc = res->lr_namespace->ns_lvbo->lvbo_update
-                                        (res, NULL, 0);
+                                        (res, NULL, 0, 0);
                         }
                         ldlm_resource_putref(res);
                 }
index 6ab28db..15ac8e9 100644 (file)
@@ -251,7 +251,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                 GOTO(cleanup, rc);
         cleanup_phase = 1;
 
-#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18))
+#ifdef HAVE_KIOBUF_DOVARY
         iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */
 #endif
         rc = expand_kiobuf(iobuf, obj->ioo_bufcnt);
index 852aeaf..acfba4c 100644 (file)
@@ -102,9 +102,11 @@ static int filter_lvbo_init(struct ldlm_resource *res)
  *
  *   m != NULL : called by the DLM itself after a glimpse callback
  *   m == NULL : called by the filter after a disk write
+ *
+ *   If 'increase' is true, don't allow values to move backwards.
  */
 static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
-                              int buf_idx)
+                              int buf_idx, int increase)
 {
         int rc = 0;
         struct ost_lvb *lvb = res->lr_lvb_data;
@@ -137,13 +139,13 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
                         //GOTO(out, rc = -EPROTO);
                         GOTO(out, rc = 0);
                 }
-                if (new->lvb_size > lvb->lvb_size) {
+                if (new->lvb_size > lvb->lvb_size || !increase) {
                         CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size: "
                                LPU64" -> "LPU64"\n", res->lr_name.name[0],
                                lvb->lvb_size, new->lvb_size);
                         lvb->lvb_size = new->lvb_size;
                 }
-                if (new->lvb_mtime > lvb->lvb_mtime) {
+                if (new->lvb_mtime > lvb->lvb_mtime || !increase) {
                         CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime: "
                                LPU64" -> "LPU64"\n", res->lr_name.name[0],
                                lvb->lvb_mtime, new->lvb_mtime);
@@ -170,10 +172,18 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
         oa->o_valid = OBD_MD_FLID;
         obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
 
-        lvb->lvb_size = dentry->d_inode->i_size;
-        lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
-        CDEBUG(D_DLMTRACE, "res: "LPU64" disk lvb size: "LPU64", mtime: "
-               LPU64"\n", res->lr_name.name[0], lvb->lvb_size, lvb->lvb_mtime);
+        if (dentry->d_inode->i_size > lvb->lvb_size || !increase) {
+                CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size from disk: "
+                       LPU64" -> "LPU64"\n", res->lr_name.name[0],
+                       lvb->lvb_size, dentry->d_inode->i_size);
+                lvb->lvb_size = dentry->d_inode->i_size;
+        }
+        if (dentry->d_inode->i_mtime > lvb->lvb_mtime || !increase) {
+                CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime from disk: "
+                       LPU64" -> "LPU64"\n", res->lr_name.name[0],
+                       lvb->lvb_mtime,(__u64)LTIME_S(dentry->d_inode->i_mtime));
+                lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
+        }
         f_dput(dentry);
 
  out:
index 6858fe1..a6a3992 100644 (file)
@@ -2384,6 +2384,9 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
         policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
         policy->l_extent.end |= ~PAGE_MASK;
 
+        if (lsm->lsm_oinfo->loi_kms_valid == 0)
+                goto no_match;
+
         /* Next, search for already existing extent locks that will cover us */
         rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, policy, mode,
                              lockh);
@@ -2424,6 +2427,7 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                 }
         }
 
+ no_match:
         rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, res_id, type,
                               policy, mode, flags, bl_cb, cp_cb, gl_cb, data,
                               &lvb, sizeof(lvb), lustre_swab_ost_lvb, lockh);
index 3bdaf32..7801957 100644 (file)
@@ -133,8 +133,8 @@ case ${host_cpu} in
 
        powerpc )
        AC_MSG_RESULT($host_cpu)
-        KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
-        KCPPFLAGS='-D__KERNEL__'
+        KCFLAGS='-O2 -g -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+        KCPPFLAGS='-D__KERNEL__ -DMODULE'
         MOD_LINK=elf32ppclinux
 ;;
 
@@ -338,6 +338,18 @@ AC_SUBST(MOD_LINK)
 AC_SUBST(LINUX25)
 AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
 
+# ---------- Red Hat 2.4.18 has iobuf->dovary --------------
+# But other kernels don't
+
+AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+AC_TRY_COMPILE([#define __KERNEL__
+               #include <linux/iobuf.h>],
+              [struct kiobuf iobuf;
+               iobuf.dovary = 1;],
+              [AC_MSG_RESULT([yes])
+                CPPFLAGS="$CPPFLAGS -DHAVE_KIOBUF_DOVARY"],
+              [AC_MSG_RESULT([no])])
+
 # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
 # This needs to run after we've defined the KCPPFLAGS
 
index d56a120..c080a57 100644 (file)
@@ -368,13 +368,14 @@ typedef struct {
         struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
         ptl_nid_t            kprfd_target_nid;  /* final destination NID */
         ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
-        int                  kprfd_nob;         /* # message bytes (including header) */
-        int                  kprfd_niov;        /* # message frags (including header) */
-        struct iovec        *kprfd_iov;         /* message fragments */
-        void                *kprfd_router_arg;  // originating NAL's router arg
+        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
+        int                  kprfd_nob;         /* # payload bytes */
+        int                  kprfd_niov;        /* # payload frags */
+        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
+        void                *kprfd_router_arg;  /* originating NAL's router arg */
         kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
         void                *kprfd_callback_arg; /* completion callback arg */
-        kprfd_scratch_t      kprfd_scratch;    // scratchpad for routing targets
+        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
 } kpr_fwd_desc_t;
 
 typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
@@ -477,15 +478,16 @@ kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid
 }
 
 static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
-              int nob, int niov, struct iovec *iov,
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+              int nob, int niov, ptl_kiov_t *kiov,
               kpr_fwd_callback_t callback, void *callback_arg)
 {
         fwd->kprfd_target_nid   = nid;
         fwd->kprfd_gateway_nid  = nid;
+        fwd->kprfd_hdr          = hdr;
         fwd->kprfd_nob          = nob;
         fwd->kprfd_niov         = niov;
-        fwd->kprfd_iov          = iov;
+        fwd->kprfd_kiov         = kiov;
         fwd->kprfd_callback     = callback;
         fwd->kprfd_callback_arg = callback_arg;
 }
index 7ffe797..d4ca453 100644 (file)
@@ -3,7 +3,13 @@
 
 #ifdef __linux__
 # include <asm/types.h>
-# include <asm/timex.h>
+# if defined(__powerpc__) && !defined(__KERNEL__)
+#  define __KERNEL__
+#  include <asm/timex.h>
+#  undef __KERNEL__
+# else
+#  include <asm/timex.h>
+# endif
 #else
 # include <sys/types.h>
 typedef u_int32_t __u32;
@@ -14,7 +20,7 @@ typedef u_int64_t __u64;
 # include <linux/time.h>
 #else
 # include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL)
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
 #endif
 
 #include <portals/errno.h>
@@ -129,7 +135,7 @@ typedef struct {
         struct timeval     arrival_time;
 
         volatile ptl_seq_t sequence;
-} ptl_event_t;
+} __attribute__((packed)) ptl_event_t;
 #ifdef __CYGWIN__
 #pragma pop
 #endif
index cdde5b7..ad46b90 100644 (file)
@@ -45,6 +45,7 @@
 #include "linux/init.h"
 #include "linux/sem.h"
 #include "linux/vmalloc.h"
+#include "linux/sysctl.h"
 
 #define DEBUG_SUBSYSTEM S_GMNAL
 
 extern  int gmnal_small_msg_size;
 extern  int num_rx_threads;
 extern  int num_stxds;
+extern  int gm_port;
 #define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
 #define GMNAL_MAGIC                            0x1234abcd
+/*
+ *     The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT  gm_port
 
 
 /*
@@ -218,6 +224,7 @@ typedef struct _gmnal_data_t {
        gmnal_rxtwe_t   *rxtwe_tail;
        spinlock_t      rxtwe_lock;
        struct  semaphore rxtwe_wait;
+        struct ctl_table_header *sysctl;
 } gmnal_data_t;
 
 /*
@@ -234,11 +241,6 @@ typedef struct _gmnal_data_t {
 extern gmnal_data_t    *global_nal_data;
 
 /*
- *     The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT  4
-
-/*
  * for ioctl get pid
  */
 #define GMNAL_IOC_GET_GNID 1   
@@ -353,6 +355,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
 
 int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
 
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
 void *gmnal_cb_malloc(nal_cb_t *, size_t);
 
 void gmnal_cb_free(nal_cb_t *, void *, size_t);
@@ -382,7 +386,7 @@ void  gmnal_fini(void);
                                a->cb_recv_pages = gmnal_cb_recv_pages; \
                                a->cb_read = gmnal_cb_read; \
                                a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = NULL; \
+                               a->cb_callback = gmnal_cb_callback; \
                                a->cb_malloc = gmnal_cb_malloc; \
                                a->cb_free = gmnal_cb_free; \
                                a->cb_map = NULL; \
@@ -418,6 +422,7 @@ void                gmnal_stop_rxthread(gmnal_data_t *);
 void           gmnal_stop_ctthread(gmnal_data_t *);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
 char           *gmnal_gm_error(gm_status_t);
 char           *gmnal_rxevent(gm_recv_event_t*);
 int            gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
index 1cb1317..1442aa7 100644 (file)
 
 #include "gmnal.h"
 
+
+
 gmnal_data_t   *global_nal_data = NULL;
+#define         GLOBAL_NID_STR_LEN      16
+char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ *      Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL    201
+#define GMNAL_SYSCTL_GLOBALNID  1
+
+static ctl_table gmnal_sysctl_table[] = {
+        {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+         global_nid_str, GLOBAL_NID_STR_LEN,
+         0444, NULL, &proc_dostring},
+        { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+        {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+        { 0 }
+};
+
+
+
+
+
+
 /*
  *     gmnal_api_forward
  *     This function takes a pack block of arguments from the NAL API
@@ -193,8 +222,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        ptl_pid_t       portals_pid = 0;
 
 
-       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], 
-              ac_size[%d]\n", interface, ptl_size, ac_size);
+       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+              "ac_size[%d]\n", interface, ptl_size, ac_size);
 
 
        PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
@@ -255,8 +284,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
 
 
-       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], 
-                      name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
+       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+                      "name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
               "gmnal", GM_API_VERSION);
 
        GMNAL_GM_LOCK(nal_data);
@@ -280,15 +309,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                        CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
                        break;
                case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib 
-                              and driver\n");
+                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+                              "and driver\n");
                        break;
                case(GM_OUT_OF_MEMORY):
                        CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
                        break;
                default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error 
-                              code [%d]\n", gm_status);
+                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+                              "code [%d]\n", gm_status);
                        break;
                }       
                GMNAL_GM_LOCK(nal_data);
@@ -403,6 +432,7 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
        nal_data->gm_global_nid = global_nid;
+        snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
 
 /*
        pid = gm_getpid();
@@ -429,6 +459,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                return(NULL);
                
        }
+        nal_data->sysctl = NULL;
+        nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
        
        CDEBUG(D_INFO, "gmnal_init finished\n");
        global_nal_data = nal->nal_data;
@@ -459,6 +492,8 @@ void gmnal_fini()
        gm_close(nal_data->gm_port);
        gm_finalize();
        GMNAL_GM_UNLOCK(nal_data);
+        if (nal_data->sysctl)
+                unregister_sysctl_table (nal_data->sysctl);
        PORTAL_FREE(nal, sizeof(nal_t));        
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
        PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
index e055242..1f28746 100644 (file)
@@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], 
-              niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
+       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+              "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
               nal_cb, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
@@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
        struct iovec    *iovec = NULL, *iovec_dup = NULL;
        int             i = 0;
+       ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], 
-              cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+              "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
               nal_cb, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
@@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
                status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
+               for (i=0; i<kniov; i++) {
+                       kunmap(kiov_dup->kiov_page);
+                       kiov_dup++;
+               }
                PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
        }
                
@@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
@@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int     i = 0;
        gmnal_data_t    *nal_data;
        struct  iovec   *iovec = NULL, *iovec_dup = NULL;
+       ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
        nal_data = nal_cb->nal_data;
@@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
+       for (i=0; i<kniov; i++) {
+               kunmap(kiov_dup->kiov_page);
+               kiov_dup++;
+       }
        PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
        return(PTL_OK);
 }
@@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
        return(PTL_OK);
 }
 
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
+                      ptl_event_t *ev)
+{
+
+       if (eq->event_callback != NULL) {
+               CDEBUG(D_INFO, "found callback\n");
+               eq->event_callback(ev);
+       }
+       
+       return(PTL_OK);
+}
+
 void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
 {
        void *ptr = NULL;
index a0d3530..1bcd9bd 100644 (file)
@@ -203,14 +203,14 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        gmnal_msghdr = (gmnal_msghdr_t*)buffer;
        portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
 
-       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], 
-              type [%d], length [%d], buffer [%p]\n",
+       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+              "type [%d], length [%d], buffer [%p]\n",
               snode, sport, type, length, buffer);
-       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], 
-              gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
+       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
               gmnal_msghdr->magic, gmnal_msghdr->type);
-       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], 
-              dest_node ["LPD64"]\n", portals_hdr->src_nid, 
+       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
               portals_hdr->dest_nid);
 
        
@@ -321,6 +321,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -343,7 +344,6 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
        lib_finalize(nal_cb, private, cookie, PTL_OK);
-
        /*
         *      return buffer so it can be used again
         */
@@ -377,9 +377,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] 
-              hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] 
-              iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -440,9 +440,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        stxd->msg_size = tot_size;
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, global_nid, local_nid, stxd);
 
        GMNAL_GM_LOCK(nal_data);
@@ -493,8 +493,8 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                /*
                 *      do a resend on the dropped ones
                 */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped 
-                              resending\n", context);
+                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+                              "resending\n", context);
                        GMNAL_GM_LOCK(nal_data);
                        gm_send_to_peer_with_callback(nal_data->gm_port, 
                                                      stxd->buffer, 
@@ -569,6 +569,11 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                case(GM_YP_NO_MATCH):
                default:
                        CDEBUG(D_ERROR, "Unknown send error\n");
+                gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+                                      stxd->gm_target_node, GMNAL_GM_PORT,
+                                      gmnal_resume_sending_callback, context);
+                return;
+
        }
 
        /*
@@ -588,10 +593,22 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
        }
        gmnal_return_stxd(nal_data, stxd);
        lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
        return;
 }
 
+/*
+ *     After an error on the port
+ *     call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+                                 gm_status_t status)
+{
+        gmnal_data_t    *nal_data;
+        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
+        CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+        gmnal_return_stxd(stxd->nal_data, stxd);
+        return;
+}
 
 
 void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, 
@@ -611,8 +628,8 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              context);
                GMNAL_GM_LOCK(nal_data);
        } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is 
-                      [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+                      "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
        }
 
 
@@ -644,9 +661,9 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] 
-              hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], 
-              iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+              "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        if (nal_cb)
@@ -729,8 +746,8 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               iov->iov_base, iov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               iov->iov_base, iov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -806,12 +823,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], 
-              cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+              "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
                nal_cb, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+               lib_finalize(nal_cb, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
 
@@ -846,8 +864,8 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               riov->iov_base, riov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               riov->iov_base, riov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -902,8 +920,8 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        int     ncalls = 0;
 
-       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], 
-              nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+              "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
 
 
        ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
@@ -958,8 +976,8 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                            srxd->gm_source_node, 
                                            &source_node) != GM_SUCCESS) {
 
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] 
-                              to local node_id\n", srxd->gm_source_node);
+                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+                              "to local node_id\n", srxd->gm_source_node);
                        GMNAL_GM_UNLOCK(nal_data);
                        return(GMNAL_STATUS_FAIL);
                }
@@ -1201,9 +1219,9 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        stxd->msg_size= sizeof(gmnal_msghdr_t);
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
index 1260629..31f6819 100644 (file)
@@ -30,6 +30,7 @@ int gmnal_small_msg_size = 525312;
  */
 int num_rx_threads = -1;
 int num_stxds = 5;
+int gm_port = 4;
 
 ptl_handle_ni_t        kgmnal_ni;
 
@@ -139,6 +140,7 @@ EXPORT_SYMBOL(kgmnal_ni);
 MODULE_PARM(gmnal_small_msg_size, "i");
 MODULE_PARM(num_rx_threads, "i");
 MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
 
 MODULE_AUTHOR("Morgan Doyle");
 
index 55606f3..6a52319 100644 (file)
@@ -117,8 +117,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -131,8 +131,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->stxd;
                nal_data->stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        for (i=0; i<=nrxt_stx; i++) {
@@ -146,8 +146,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -160,8 +160,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->rxt_stxd;
                nal_data->rxt_stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        /*
@@ -187,8 +187,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        CDEBUG(D_TRACE, "gmnal_free_small tx\n");
 
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -198,8 +198,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        }
         txd = nal_data->rxt_stxd;
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -392,22 +392,22 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
 #if 0
                PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], 
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+                              "size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
-               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] 
-                      rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
+               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
                       rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_LOCK(nal_data);
                gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
                                               GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
-                              index [%d]\n", rxbuffer, i);
+                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+                              index [%d]\n", rxbuffer, i);
                        switch(gm_status) {
                                case(GM_FAILURE):
                                        CDEBUG(D_ERROR, "GM_FAILURE\n");
@@ -432,8 +432,8 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -447,15 +447,15 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                if (gm_hash_insert(nal_data->srxd_hash, 
                                   (void*)rxbuffer, (void*)rxd)) {
 
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] 
-                              for rxbuffer[%p]\n", rxd, rxbuffer);
+                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+                              "for rxbuffer[%p]\n", rxd, rxbuffer);
                        return(GMNAL_STATUS_FAIL);
                }
 
                rxd->next = nal_data->srxd;
                nal_data->srxd = rxd;
-               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], 
-                      size [%d]\n", rxd, rxd->buffer, rxd->size);
+               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+                      "size [%d]\n", rxd, rxd->buffer, rxd->size);
        }
 
        return(GMNAL_STATUS_OK);
@@ -623,6 +623,8 @@ gmnal_stop_ctthread(gmnal_data_t *nal_data)
 char * 
 gmnal_gm_error(gm_status_t status)
 {
+       return(gm_strerror(status));
+
        switch(status) {
                case(GM_SUCCESS):
                        return("SUCCESS");
@@ -972,7 +974,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                }
                spin_lock(&nal_data->rxtwe_lock);
                if (nal_data->rxtwe_head) {
-                       CDEBUG(D_WARNING, "Got a work entry\n");
+                       CDEBUG(D_INFO, "Got a work entry\n");
                        we = nal_data->rxtwe_head;
                        nal_data->rxtwe_head = we->next;
                        if (!nal_data->rxtwe_head)
@@ -983,7 +985,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                spin_unlock(&nal_data->rxtwe_lock);
        } while (!we);
 
-       CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+       CDEBUG(D_INFO, "Returning we[%p]\n", we);
        return(we);
 }
 
index 90c9a95..3b3b5d4 100644 (file)
@@ -348,10 +348,10 @@ kqswnal_finalise (void)
                for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
                        kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
-                       /* If krx_pages[0] got allocated, it got mapped.
+                       /* If krx_kiov[0].kiov_page got allocated, it got mapped.  
                         * NB subsequent pages get merged */
 
-                       if (krx->krx_pages[0] != NULL)
+                       if (krx->krx_kiov[0].kiov_page != NULL)
                                ep_dvma_unload(kqswnal_data.kqn_ep,
                                               kqswnal_data.kqn_ep_rx_nmh,
                                               &krx->krx_elanbuffer);
@@ -416,8 +416,8 @@ kqswnal_finalise (void)
                        kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
 
                        for (j = 0; j < krx->krx_npages; j++)
-                               if (krx->krx_pages[j] != NULL)
-                                       __free_page (krx->krx_pages[j]);
+                               if (krx->krx_kiov[j].kiov_page != NULL)
+                                       __free_page (krx->krx_kiov[j].kiov_page);
                }
 
                PORTAL_FREE(kqswnal_data.kqn_rxds,
@@ -709,18 +709,19 @@ kqswnal_initialise (void)
                LASSERT (krx->krx_npages > 0);
                for (j = 0; j < krx->krx_npages; j++)
                {
-                       krx->krx_pages[j] = alloc_page(GFP_KERNEL);
-                       if (krx->krx_pages[j] == NULL)
-                       {
+                       struct page *page = alloc_page(GFP_KERNEL);
+                       
+                       if (page == NULL) {
                                kqswnal_finalise ();
                                return (-ENOMEM);
                        }
 
-                       LASSERT(page_address(krx->krx_pages[j]) != NULL);
+                       krx->krx_kiov[j].kiov_page = page;
+                       LASSERT(page_address(page) != NULL);
 
 #if MULTIRAIL_EKC
                        ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                                    page_address(krx->krx_pages[j]),
+                                    page_address(page),
                                     PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
                                     elan_page_idx, &all_rails, &elanbuffer);
                        
@@ -736,7 +737,7 @@ kqswnal_initialise (void)
 #else
                        elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
                                              kqswnal_data.kqn_eprxdmahandle,
-                                             page_address(krx->krx_pages[j]),
+                                             page_address(page),
                                              PAGE_SIZE, elan_page_idx,
                                              &elanbuffer);
                        if (j == 0)
index b1b9a45..5ebf30a 100644 (file)
@@ -153,8 +153,7 @@ typedef struct
         int              krx_rpc_reply_sent;    /* rpc reply sent */
         atomic_t         krx_refcount;          /* how to tell when rpc is done */
         kpr_fwd_desc_t   krx_fwd;               /* embedded forwarding descriptor */
-        struct page     *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
-        struct iovec     krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+        ptl_kiov_t       krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
 }  kqswnal_rx_t;
 
 typedef struct
index 478c25f..157dc70 100644 (file)
@@ -775,7 +775,7 @@ kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag,
                    int offset, int nob)
 {
         kqswnal_rx_t       *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        char               *buffer = (char *)page_address(krx->krx_pages[0]);
+        char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
         kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
         int                 rc;
 #if MULTIRAIL_EKC
@@ -1008,7 +1008,7 @@ kqswnal_sendmsg (nal_cb_t     *nal,
         }
         memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
 #endif
-        
+
         if (kqswnal_data.kqn_optimized_gets &&
             type == PTL_MSG_GET &&              /* doing a GET */
             nid == targetnid) {                 /* not forwarding */
@@ -1167,7 +1167,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
 {
         int             rc;
         kqswnal_tx_t   *ktx;
-        struct iovec   *iov = fwd->kprfd_iov;
+        ptl_kiov_t     *kiov = fwd->kprfd_kiov;
         int             niov = fwd->kprfd_niov;
         int             nob = fwd->kprfd_nob;
         ptl_nid_t       nid = fwd->kprfd_gateway_nid;
@@ -1177,11 +1177,9 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         LBUG ();
 #endif
         /* The router wants this NAL to forward a packet */
-        CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+        CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
                 fwd, nid, niov, nob);
 
-        LASSERT (niov > 0);
-        
         ktx = kqswnal_get_idle_tx (fwd, 0);
         if (ktx == NULL)        /* can't get txd right now */
                 return;         /* fwd will be scheduled when tx desc freed */
@@ -1195,44 +1193,44 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
                 goto failed;
         }
 
-        if (nob > KQSW_NRXMSGBYTES_LARGE) {
-                CERROR ("Can't forward [%p] to "LPX64
-                        ": size %d bigger than max packet size %ld\n",
-                        fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
-                rc = -EMSGSIZE;
-                goto failed;
-        }
+        /* copy hdr into pre-mapped buffer */
+        memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
+        ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
 
-        ktx->ktx_port    = (nob <= (KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) ?
+        ktx->ktx_port    = (nob <= KQSW_SMALLPAYLOAD) ?
                            EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
         ktx->ktx_nid     = nid;
         ktx->ktx_state   = KTX_FORWARDING;
         ktx->ktx_args[0] = fwd;
+        ktx->ktx_nfrag   = ktx->ktx_firsttmpfrag = 1;
 
-        if ((kqswnal_data.kqn_copy_small_fwd || niov > 1) &&
-            nob <= KQSW_TX_BUFFER_SIZE) 
+        if (nob <= KQSW_TX_MAXCONTIG) 
         {
-                /* send from ktx's pre-mapped contiguous buffer? */
-                lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, 0, nob);
+                /* send payload from ktx's pre-mapped contiguous buffer */
 #if MULTIRAIL_EKC
                 ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, nob);
+                              0, KQSW_HDR_SIZE + nob);
 #else
                 ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = nob;
+                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
 #endif
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-                ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
+                if (nob > 0)
+                        lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
+                                          niov, kiov, 0, nob);
         }
         else
         {
-                /* zero copy */
-                ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-                rc = kqswnal_map_tx_iov (ktx, 0, nob, niov, iov);
+                /* zero copy payload */
+#if MULTIRAIL_EKC
+                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
+                              0, KQSW_HDR_SIZE);
+#else
+                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
+                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
+#endif
+                rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
                 if (rc != 0)
                         goto failed;
-
-                ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base;
         }
 
         rc = kqswnal_launch (ktx);
@@ -1257,7 +1255,7 @@ kqswnal_fwd_callback (void *arg, int error)
 
         if (error != 0)
         {
-                ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+                ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
 
                 CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
                        NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
@@ -1371,8 +1369,9 @@ kqswnal_requeue_rx (kqswnal_rx_t *krx)
 void
 kqswnal_rx (kqswnal_rx_t *krx)
 {
-        ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+        ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
         ptl_nid_t       dest_nid = NTOH__u64 (hdr->dest_nid);
+        int             payload_nob;
         int             nob;
         int             niov;
 
@@ -1398,16 +1397,26 @@ kqswnal_rx (kqswnal_rx_t *krx)
                 return;
         }
 
-        /* NB forwarding may destroy iov; rebuild every time */
-        for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
-        {
-                LASSERT (niov < krx->krx_npages);
-                krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
-                krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+        nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
+        niov = 0;
+        if (nob > 0) {
+                krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
+                krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
+                niov = 1;
+                nob -= PAGE_SIZE - KQSW_HDR_SIZE;
+                
+                while (nob > 0) {
+                        LASSERT (niov < krx->krx_npages);
+                        
+                        krx->krx_kiov[niov].kiov_offset = 0;
+                        krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
+                        niov++;
+                        nob -= PAGE_SIZE;
+                }
         }
 
-        kpr_fwd_init (&krx->krx_fwd, dest_nid,
-                      krx->krx_nob, niov, krx->krx_iov,
+        kpr_fwd_init (&krx->krx_fwd, dest_nid, 
+                      hdr, payload_nob, niov, krx->krx_kiov,
                       kqswnal_fwd_callback, krx);
 
         kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
@@ -1471,7 +1480,7 @@ kqswnal_rxhandler(EP_RXD *rxd)
 void
 kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
 {
-        ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+        ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
 
         CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
                 ", dpid %d, spid %d, type %d\n",
@@ -1526,6 +1535,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
                  size_t        rlen)
 {
         kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+        char         *buffer = page_address(krx->krx_kiov[0].kiov_page);
         int           page;
         char         *page_ptr;
         int           page_nob;
@@ -1535,8 +1545,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 #if KQSW_CHECKSUM
         kqsw_csum_t   senders_csum;
         kqsw_csum_t   payload_csum = 0;
-        kqsw_csum_t   hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
-                                           sizeof(ptl_hdr_t));
+        kqsw_csum_t   hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
         size_t        csum_len = mlen;
         int           csum_frags = 0;
         int           csum_nob = 0;
@@ -1545,8 +1554,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 
         atomic_inc (&csum_counter);
 
-        memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
-                                sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+        memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
         if (senders_csum != hdr_csum)
                 kqswnal_csum_error (krx, 1);
 #endif
@@ -1567,8 +1575,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
 
         if (mlen != 0) {
                 page     = 0;
-                page_ptr = ((char *) page_address(krx->krx_pages[0])) +
-                        KQSW_HDR_SIZE;
+                page_ptr = buffer + KQSW_HDR_SIZE;
                 page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
 
                 LASSERT (niov > 0);
@@ -1621,7 +1628,7 @@ kqswnal_recvmsg (nal_cb_t     *nal,
                         {
                                 page++;
                                 LASSERT (page < krx->krx_npages);
-                                page_ptr = page_address(krx->krx_pages[page]);
+                                page_ptr = page_address(krx->krx_kiov[page].kiov_page);
                                 page_nob = PAGE_SIZE;
                         }
 
@@ -1649,8 +1656,8 @@ kqswnal_recvmsg (nal_cb_t     *nal,
         }
 
 #if KQSW_CHECKSUM
-        memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
-                sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+        memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), 
+                sizeof(kqsw_csum_t));
 
         if (csum_len != rlen)
                 CERROR("Unable to checksum data in user's buffer\n");
index c47dcb4..2c44b43 100644 (file)
@@ -1388,6 +1388,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private)
 void
 ksocknal_free_fmbs (ksock_fmb_pool_t *p)
 {
+        int          npages = p->fmp_buff_pages;
         ksock_fmb_t *fmb;
         int          i;
 
@@ -1399,12 +1400,12 @@ ksocknal_free_fmbs (ksock_fmb_pool_t *p)
                 fmb = list_entry(p->fmp_idle_fmbs.next,
                                  ksock_fmb_t, fmb_list);
                 
-                for (i = 0; i < fmb->fmb_npages; i++)
-                        if (fmb->fmb_pages[i] != NULL)
-                                __free_page(fmb->fmb_pages[i]);
-                
+                for (i = 0; i < npages; i++)
+                        if (fmb->fmb_kiov[i].kiov_page != NULL)
+                                __free_page(fmb->fmb_kiov[i].kiov_page);
+
                 list_del(&fmb->fmb_list);
-                PORTAL_FREE(fmb, sizeof(*fmb));
+                PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
         }
 }
 
@@ -1603,10 +1604,12 @@ ksocknal_module_init (void)
         spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+        ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
 
         spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+        ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
 
         spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
@@ -1690,34 +1693,36 @@ ksocknal_module_init (void)
 
                 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
                                  SOCKNAL_LARGE_FWD_NMSGS); i++) {
-                        ksock_fmb_t *fmb;
+                        ksock_fmb_t      *fmb;
+                        ksock_fmb_pool_t *pool;
+                        
+
+                        if (i < SOCKNAL_SMALL_FWD_NMSGS)
+                                pool = &ksocknal_data.ksnd_small_fmp;
+                        else
+                                pool = &ksocknal_data.ksnd_large_fmp;
                         
-                        PORTAL_ALLOC(fmb, sizeof(*fmb));
+                        PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, 
+                                                   fmb_kiov[pool->fmp_buff_pages]));
                         if (fmb == NULL) {
                                 ksocknal_module_fini();
                                 return (-ENOMEM);
                         }
 
-                        if (i < SOCKNAL_SMALL_FWD_NMSGS) {
-                                fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
-                                fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
-                        } else {
-                                fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
-                                fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
-                        }
-
-                        for (j = 0; j < fmb->fmb_npages; j++) {
-                                fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
+                        fmb->fmb_pool = pool;
+                        
+                        for (j = 0; j < pool->fmp_buff_pages; j++) {
+                                fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
 
-                                if (fmb->fmb_pages[j] == NULL) {
+                                if (fmb->fmb_kiov[j].kiov_page == NULL) {
                                         ksocknal_module_fini ();
                                         return (-ENOMEM);
                                 }
 
-                                LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
+                                LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
                         }
 
-                        list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+                        list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
                 }
         }
 
index 0f0b9bd..db8c842 100644 (file)
@@ -44,6 +44,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/irq.h>
 
 #include <linux/init.h>
 #include <linux/fs.h>
@@ -88,7 +89,7 @@
 
 #define SOCKNAL_SMALL_FWD_PAGES        1               /* # pages in a small message fwd buffer */
 
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
                                                /* # pages in a large message fwd buffer */
 
 #define SOCKNAL_RESCHED         100             /* # scheduler loops before reschedule */
@@ -115,6 +116,7 @@ typedef struct                                  /* pool of forwarding buffers */
         struct list_head  fmp_idle_fmbs;        /* free buffers */
         struct list_head  fmp_blocked_conns;    /* connections waiting for a buffer */
         int               fmp_nactive_fmbs;     /* # buffers in use */
+        int               fmp_buff_pages;       /* # pages per buffer */
 } ksock_fmb_pool_t;
 
 
@@ -193,18 +195,13 @@ typedef struct {
 #define SOCKNAL_INIT_ALL        3
 
 /* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments and 0 or more ptl_kiov_t fragments.  Forwarded
- * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
- * ptl_kiov_t fragments.  Messages from an MD with PTL_MD_KIOV set, have 1
- * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
- * fragments.
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more ptl_kiov_t fragments.
  *
  * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header).  Once the header has been received, if the message
- * requires forwarding or will be received into mapped memory, up to
- * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
- * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
- */
+ * receive (the header).  Once the header has been received, the payload is
+ * received into either struct iovec or ptl_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
 
 struct ksock_conn;                              /* forward ref */
 struct ksock_peer;                              /* forward ref */
@@ -227,6 +224,12 @@ typedef struct                                  /* transmit packet */
 #endif
 } ksock_tx_t;
 
+typedef struct                                  /* forwarded packet */
+{
+        ksock_tx_t             ftx_tx;          /* send info */
+        struct iovec           ftx_iov;         /* hdr iovec */
+} ksock_ftx_t;
+
 #define KSOCK_ZCCD_2_TX(ptr)   list_entry (ptr, ksock_tx_t, tx_zccd)
 /* network zero copy callback descriptor embedded in ksock_tx_t */
 
@@ -254,15 +257,14 @@ typedef struct                                  /* Kernel portals Socket Forward
 {                                               /* (socknal->router) */
         struct list_head        fmb_list;       /* queue idle */
         kpr_fwd_desc_t          fmb_fwd;        /* router's descriptor */
-        int                     fmb_npages;     /* # pages allocated */
         ksock_fmb_pool_t       *fmb_pool;       /* owning pool */
         struct ksock_peer      *fmb_peer;       /* peer received from */
-        struct page            *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
-        struct iovec            fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+        ptl_hdr_t               fmb_hdr;        /* message header */
+        ptl_kiov_t              fmb_kiov[0];    /* payload frags */
 } ksock_fmb_t;
 
 /* space for the rx frag descriptors; we either read a single contiguous
- * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+ * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
 typedef union {
         struct iovec    iov[PTL_MD_MAX_IOV];
         ptl_kiov_t      kiov[PTL_MD_MAX_IOV];
index c6cdaba..c89e20e 100644 (file)
@@ -123,7 +123,7 @@ ksocknal_free_ltx (ksock_ltx_t *ltx)
         PORTAL_FREE(ltx, ltx->ltx_desc_size);
 }
 
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
 struct page *
 ksocknal_kvaddr_to_page (unsigned long vaddr)
 {
@@ -159,7 +159,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         int            more = (tx->tx_niov > 1) || 
                               (tx->tx_nkiov > 0) ||
                               (!list_empty (&conn->ksnc_tx_queue));
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
         int            offset = vaddr & (PAGE_SIZE - 1);
         int            zcsize = MIN (fragsize, PAGE_SIZE - offset);
         struct page   *page;
@@ -171,7 +171,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         LASSERT (fragsize <= tx->tx_resid);
         LASSERT (tx->tx_niov > 0);
         
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
         if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
             (sock->sk->route_caps & NETIF_F_SG) &&
             (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
@@ -1133,7 +1133,7 @@ void
 ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
 {
         ptl_nid_t     nid = fwd->kprfd_gateway_nid;
-        ksock_tx_t   *tx  = (ksock_tx_t *)&fwd->kprfd_scratch;
+        ksock_ftx_t  *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
         int           rc;
         
         CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
@@ -1143,14 +1143,18 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         if (nid == ksocknal_lib.ni.nid)
                 nid = fwd->kprfd_target_nid;
 
-        tx->tx_isfwd = 1;                   /* This is a forwarding packet */
-        tx->tx_nob   = fwd->kprfd_nob;
-        tx->tx_niov  = fwd->kprfd_niov;
-        tx->tx_iov   = fwd->kprfd_iov;
-        tx->tx_nkiov = 0;
-        tx->tx_kiov  = NULL;
+        /* setup iov for hdr */
+        ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
+        ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
+
+        ftx->ftx_tx.tx_isfwd = 1;                  /* This is a forwarding packet */
+        ftx->ftx_tx.tx_nob   = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
+        ftx->ftx_tx.tx_niov  = 1;
+        ftx->ftx_tx.tx_iov   = &ftx->ftx_iov;
+        ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
+        ftx->ftx_tx.tx_kiov  = fwd->kprfd_kiov;
 
-        rc = ksocknal_launch_packet (tx, nid);
+        rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
         if (rc != 0)
                 kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
 }
@@ -1178,7 +1182,7 @@ ksocknal_fmb_callback (void *arg, int error)
 {
         ksock_fmb_t       *fmb = (ksock_fmb_t *)arg;
         ksock_fmb_pool_t  *fmp = fmb->fmb_pool;
-        ptl_hdr_t         *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+        ptl_hdr_t         *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
         ksock_conn_t      *conn = NULL;
         ksock_sched_t     *sched;
         unsigned long      flags;
@@ -1236,7 +1240,6 @@ ksock_fmb_t *
 ksocknal_get_idle_fmb (ksock_conn_t *conn)
 {
         int               payload_nob = conn->ksnc_rx_nob_left;
-        int               packet_nob = sizeof (ptl_hdr_t) + payload_nob;
         unsigned long     flags;
         ksock_fmb_pool_t *pool;
         ksock_fmb_t      *fmb;
@@ -1244,7 +1247,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
         LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
 
-        if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+        if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
                 pool = &ksocknal_data.ksnd_small_fmp;
         else
                 pool = &ksocknal_data.ksnd_large_fmp;
@@ -1275,98 +1278,64 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
 int
 ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
 {
-        int payload_nob = conn->ksnc_rx_nob_left;
-        int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+        int       payload_nob = conn->ksnc_rx_nob_left;
         ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
-        int niov;                               /* at least the header */
-        int nob;
+        int       niov = 0;
+        int       nob = payload_nob;
 
         LASSERT (conn->ksnc_rx_scheduled);
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
         LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
         LASSERT (payload_nob >= 0);
-        LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+        LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
         LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
-        /* Got a forwarding buffer; copy the header we just read into the
-         * forwarding buffer.  If there's payload, start reading reading it
-         * into the buffer, otherwise the forwarding buffer can be kicked
-         * off immediately.
-         *
-         * NB fmb->fmb_iov spans the WHOLE packet.
-         *    conn->ksnc_rx_iov spans just the payload.
-         */
-        fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
-        /* copy header */
-        memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+        LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
 
         /* Take a ref on the conn's peer to prevent module unload before
-         * forwarding completes.  NB we ref peer and not conn since because
-         * all refs on conn after it has been closed must remove themselves
-         * in finite time */
+         * forwarding completes. */
         fmb->fmb_peer = conn->ksnc_peer;
         atomic_inc (&conn->ksnc_peer->ksnp_refcount);
 
-        if (payload_nob == 0) {         /* got complete packet already */
-                CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
-                        conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
-                        dest_nid, packet_nob);
+        /* Copy the header we just read into the forwarding buffer.  If
+         * there's payload, start reading reading it into the buffer,
+         * otherwise the forwarding buffer can be kicked off
+         * immediately. */
+        fmb->fmb_hdr = conn->ksnc_hdr;
 
-                fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+        while (nob > 0) {
+                LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
+                LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
+                fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
+                nob -= PAGE_SIZE;
+                niov++;
+        }
+
+        kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
+                     payload_nob, niov, fmb->fmb_kiov,
+                     ksocknal_fmb_callback, fmb);
 
-                kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
-                              packet_nob, 1, fmb->fmb_iov,
-                              ksocknal_fmb_callback, fmb);
+        if (payload_nob == 0) {         /* got complete packet already */
+                CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
+                        conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid);
 
-                /* forward it now */
                 kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
 
                 ksocknal_new_packet (conn, 0);  /* on to next packet */
                 return (1);
         }
 
-        niov = 1;
-        if (packet_nob <= PAGE_SIZE) {  /* whole packet fits in first page */
-                fmb->fmb_iov[0].iov_len = packet_nob;
-        } else {
-                fmb->fmb_iov[0].iov_len = PAGE_SIZE;
-                nob = packet_nob - PAGE_SIZE;
-
-                do {
-                        LASSERT (niov < fmb->fmb_npages);
-                        fmb->fmb_iov[niov].iov_base =
-                                page_address (fmb->fmb_pages[niov]);
-                        fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
-                        nob -= PAGE_SIZE;
-                        niov++;
-                } while (nob > 0);
-        }
-
-        kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
-                      packet_nob, niov, fmb->fmb_iov,
-                      ksocknal_fmb_callback, fmb);
-
         conn->ksnc_cookie = fmb;                /* stash fmb for later */
         conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
         
-        /* payload is desc's iov-ed buffer, but skipping the hdr */
-        LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
-                 sizeof (struct iovec));
-
-        conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-        conn->ksnc_rx_iov[0].iov_base =
-                (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
-                         sizeof (ptl_hdr_t));
-        conn->ksnc_rx_iov[0].iov_len =
-                fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
-        if (niov > 1)
-                memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
-                       (niov - 1) * sizeof (struct iovec));
-
-        conn->ksnc_rx_niov = niov;
+        /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
+         * buffer */
+        LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
 
+        conn->ksnc_rx_niov = 0;
+        conn->ksnc_rx_nkiov = niov;
+        conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+        memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
+        
         CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
                 NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
         return (0);
index e29f628..d0dbf0a 100644 (file)
@@ -456,14 +456,13 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
         CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
                 target_nid, src_ne->kpne_interface.kprni_nalid);
 
-        LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
-        LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+        LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
         
         atomic_inc (&kpr_queue_depth);
        atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
 
         kpr_fwd_packets++;                   /* (loose) stats accounting */
-        kpr_fwd_bytes += nob;
+        kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
 
        if (src_ne->kpne_shutdown)           /* caller is shutting down */
                goto out;
index 6c31b3d..925406f 100644 (file)
@@ -26,11 +26,11 @@ libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
 gmnalnid_SOURCES = gmnalnid.c
 
 ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD =  -L. -lptlctl -lncurses # -lefence
+ptlctl_LDADD =  -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
 ptlctl_DEPENDENCIES = libptlctl.a
 
 debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
 debugctl_DEPENDENCIES = libptlctl.a
 
 routerstat_SOURCES = routerstat.c
index 84ac97f..ff6631c 100644 (file)
@@ -115,5 +115,5 @@ int main(int argc, char **argv)
        free(pcfg.pcfg_pbuf1);
        close(pfd);
        printf("%u\n", nid);
-        exit(nid);
+        exit(0);
 }
index 156479d..a49417e 100644 (file)
@@ -1,5 +1,5 @@
 # lustre.spec
-%define version v1_2_0
+%define version 1.2.0.3
 %define kversion @LINUXRELEASE@
 %define linuxdir @LINUX@
 %define enable_doc @ENABLE_DOC@
index 6a32076..abe38c8 100755 (executable)
@@ -157,10 +157,10 @@ if [ "$CONF_SANITY" != "no" ]; then
         sh conf-sanity.sh
 fi
 
-if [ "$REPLAY_OST_SINGLE" != "no" ]; then
-        sh replay-ost-single.sh
-fi
-
 if [ "$RECOVERY_SMALL" != "no" ]; then
         sh recovery-small.sh
 fi
+
+if [ "$REPLAY_OST_SINGLE" != "no" ]; then
+        sh replay-ost-single.sh
+fi
index 0f42434..62c2765 100755 (executable)
@@ -13,16 +13,23 @@ int main(int argc, char ** argv)
 {
         int rc, i;
 
-        if (argc < 2) { 
-                printf("Usage %s filename\n", argv[0]);
+        if (argc < 2) {
+                printf("Usage %s filename {filename ...}\n", argv[0]);
                 return 1;
         }
 
         for (i = 1; i < argc; i++) {
-               rc = unlink(argv[i]);
-               if (rc)
-                       printf("unlink(%s) error: %s\n", argv[i],
-                               strerror(errno));
+                rc = unlink(argv[i]);
+                if (rc) {
+                        printf("unlink(%s): %s ", argv[i], strerror(errno));
+                        rc = access(argv[i], F_OK);
+                        if (rc && errno == ENOENT)
+                                printf("(unlinked anyways)\n");
+                        else if (rc == 0)
+                                printf("(still exists)\n");
+                        else
+                                printf("(%s looking up)\n", strerror(errno));
+                }
         }
         return rc;
-} 
+}
index 67595fc..ef241b2 100755 (executable)
@@ -838,6 +838,22 @@ test_42() {
 }
 run_test 42 "recoery after ost failure"
 
+# b=2530
+# directory orphans can't be unlinked from PENDING directory
+test_43() {
+    replay_barrier mds
+
+    # OBD_FAIL_OST_CREATE_NET 0x204
+    do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+    facet_failover mds
+    df $MOUNT || return 1
+    sleep 10
+    do_facet ost "sysctl -w lustre.fail_loc=0"
+
+    return 0
+}
+run_test 43 "mds osc import failure during recovery; don't LBUG"
+
 equals_msg test complete, cleaning up
 $CLEANUP
 
index 84b645a..538d819 100644 (file)
@@ -648,9 +648,7 @@ test_25a() {
 run_test 25a "create file in symlinked directory ==============="
 
 test_25b() {
-       if [ ! -d $DIR/d25 ]; then
-               run_one 25a
-       fi
+       [ ! -d $DIR/d25 ] && test_25a
        $CHECKSTAT -t file $DIR/s25/foo || error
 }
 run_test 25b "lookup file in symlinked directory ==============="
@@ -687,9 +685,7 @@ test_26d() {
 run_test 26d "create multiple component recursive symlink ======"
 
 test_26e() {
-       if [ ! -h $DIR/d26-3 ]; then
-               run_one 26d
-       fi
+       [ ! -h $DIR/d26-3 ] && test_26d
        rm $DIR/d26-3
 }
 run_test 26e "unlink multiple component recursive symlink ======"
@@ -1098,92 +1094,91 @@ run_test 33a "test open file(mode=0444) with O_RDWR (should return error)"
 
 TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
 test_34a() {
-       rm -f $DIR/test_34_file
-       $MCREATE $DIR/test_34_file || error
-       $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
-       $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
-       $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       rm -f $DIR/f34
+       $MCREATE $DIR/f34 || error
+       $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+       $TRUNCATE $DIR/f34 $TEST_34_SIZE || error
+       $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
 }
 run_test 34a "truncate file that has not been opened ==========="
 
 test_34b() {
-       [ ! -f $DIR/test_34_file ] && run_one 34a
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
-       $OPENFILE -f O_RDONLY $DIR/test_34_file
-       $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       [ ! -f $DIR/f34 ] && test_34a
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+       $OPENFILE -f O_RDONLY $DIR/f34
+       $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
 }
 run_test 34b "O_RDONLY opening file doesn't create objects ====="
 
 test_34c() {
-       [ ! -f $DIR/test_34_file ] && run_one 34a 
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
-       $OPENFILE -f O_RDWR $DIR/test_34_file
-       $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" && error
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+       [ ! -f $DIR/f34 ] && test_34a 
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+       $OPENFILE -f O_RDWR $DIR/f34
+       $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" && error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
 }
 run_test 34c "O_RDWR opening file-with-size works =============="
 
 test_34d() {
-       dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
-       $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
-       rm $DIR/test_34_file
+       dd if=/dev/zero of=$DIR/f34 conv=notrunc bs=4k count=1 || error
+       $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+       rm $DIR/f34
 }
 run_test 34d "write to sparse file ============================="
 
 test_34e() {
-       rm -f $DIR/test_34_file
-       $MCREATE $DIR/test_34_file || error
-       $TRUNCATE $DIR/test_34_file 1000 || error
-       $CHECKSTAT -s 1000 $DIR/test_34_file || error
-       $OPENFILE -f O_RDWR $DIR/test_34_file
-       $CHECKSTAT -s 1000 $DIR/test_34_file || error
+       rm -f $DIR/f34e
+       $MCREATE $DIR/f34e || error
+       $TRUNCATE $DIR/f34e 1000 || error
+       $CHECKSTAT -s 1000 $DIR/f34e || error
+       $OPENFILE -f O_RDWR $DIR/f34e
+       $CHECKSTAT -s 1000 $DIR/f34e || error
 }
 run_test 34e "create objects, some with size and some without =="
 
 test_35a() {
-       cp /bin/sh $DIR/test_35a_file
-       chmod 444 $DIR/test_35a_file
-       chown $RUNAS_ID $DIR/test_35a_file
-       $RUNAS $DIR/test_35a_file && error || true
-       rm $DIR/test_35a_file
+       cp /bin/sh $DIR/f35a
+       chmod 444 $DIR/f35a
+       chown $RUNAS_ID $DIR/f35a
+       $RUNAS $DIR/f35a && error || true
+       rm $DIR/f35a
 }
 run_test 35a "exec file with mode 444 (should return and not leak) ====="
 
-
 test_36a() {
-       rm -f $DIR/test_36_file
-       utime $DIR/test_36_file || error
+       rm -f $DIR/f36
+       utime $DIR/f36 || error
 }
 run_test 36a "MDS utime check (mknod, utime) ==================="
 
 test_36b() {
-       echo "" > $DIR/test_36_file
-       utime $DIR/test_36_file || error
+       echo "" > $DIR/f36
+       utime $DIR/f36 || error
 }
 run_test 36b "OST utime check (open, utime) ===================="
 
 test_36c() {
-       rm -f $DIR/d36/test_36_file
+       rm -f $DIR/d36/f36
        mkdir $DIR/d36
        chown $RUNAS_ID $DIR/d36
-       $RUNAS utime $DIR/d36/test_36_file || error
+       $RUNAS utime $DIR/d36/f36 || error
 }
 run_test 36c "non-root MDS utime check (mknod, utime) =========="
 
 test_36d() {
-       [ ! -d $DIR/d36 ] && run_one 36c
-       echo "" > $DIR/d36/test_36_file
-       $RUNAS utime $DIR/d36/test_36_file || error
+       [ ! -d $DIR/d36 ] && test_36c
+       echo "" > $DIR/d36/f36
+       $RUNAS utime $DIR/d36/f36 || error
 }
 run_test 36d "non-root OST utime check (open, utime) ==========="
 
 test_36e() {
        [ $RUNAS_ID -eq $UID ] && return
        [ ! -d $DIR/d36 ] && mkdir $DIR/d36
-       touch $DIR/d36/test_36_file2
-       $RUNAS utime $DIR/d36/test_36_file2 && error || true
+       touch $DIR/d36/f36e
+       $RUNAS utime $DIR/d36/f36e && error "utime worked, want failure" || true
 }
 run_test 36e "utime on non-owned file (should return error) ===="
 
@@ -1244,16 +1239,23 @@ stop_kupdated() {
        trap start_kupdated EXIT
 }
 
+# ensure that all stripes have some grant before we test client-side cache
+for i in `seq -f $DIR/f42-%g 1 $STRIPECOUNT`; do
+       dd if=/dev/zero of=$i bs=4k count=1
+       rm $i
+done
+
 # Tests 42* verify that our behaviour is correct WRT caching, file closure,
 # file truncation, and file removal.
 test_42a() {
        cancel_lru_locks OSC
        stop_kupdated
-        sync # just to be safe
-        BEFOREWRITES=`count_ost_writes`
-        dd if=/dev/zero of=$DIR/f42a bs=1024 count=100
-        AFTERWRITES=`count_ost_writes`
-        [ $BEFOREWRITES -eq $AFTERWRITES ] || \
+       sync; sleep 1; sync # just to be safe
+       BEFOREWRITES=`count_ost_writes`
+       grep [0-9] /proc/fs/lustre/osc/OSC*MNT*/cur_grant_bytes
+       dd if=/dev/zero of=$DIR/f42a bs=1024 count=100
+       AFTERWRITES=`count_ost_writes`
+       [ $BEFOREWRITES -eq $AFTERWRITES ] || \
                error "$BEFOREWRITES < $AFTERWRITES"
        start_kupdated
 }
@@ -1776,14 +1778,14 @@ test_99a() {
 run_test 99a "cvs init ========================================="
 
 test_99b() {
-       [ ! -d $DIR/d99cvsroot ] && run_one 99a
+       [ ! -d $DIR/d99cvsroot ] && test_99a
        cd /etc/init.d
        $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
 }
 run_test 99b "cvs import ======================================="
 
 test_99c() {
-       [ ! -d $DIR/d99cvsroot ] && run_one 99b
+       [ ! -d $DIR/d99cvsroot ] && test_99b
        cd $DIR
        mkdir -p $DIR/d99reposname
        chown $RUNAS_ID $DIR/d99reposname
@@ -1792,7 +1794,7 @@ test_99c() {
 run_test 99c "cvs checkout ====================================="
 
 test_99d() {
-       [ ! -d $DIR/d99cvsroot ] && run_one 99c
+       [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS touch foo99
        $RUNAS cvs add -m 'addmsg' foo99
@@ -1800,14 +1802,14 @@ test_99d() {
 run_test 99d "cvs add =========================================="
 
 test_99e() {
-       [ ! -d $DIR/d99cvsroot ] && run_one 99c
+       [ ! -d $DIR/d99cvsroot ] && test_99c
        cd $DIR/d99reposname
        $RUNAS cvs update
 }
 run_test 99e "cvs update ======================================="
 
 test_99f() {
-       [ ! -d $DIR/d99cvsroot ] && run_one 99d
+       [ ! -d $DIR/d99cvsroot ] && test_99d
        cd $DIR/d99reposname
        $RUNAS cvs commit -m 'nomsg' foo99
 }
index ce28e09..1270f91 100644 (file)
@@ -36,6 +36,7 @@
 int debug = 0;
 int verbose = 0;
 int nomtab = 0;
+static char *progname = NULL;
 
 static void
 update_mtab_entry(char *spec, char *node, char *type, char *opts,
@@ -54,12 +55,12 @@ update_mtab_entry(char *spec, char *node, char *type, char *opts,
         if (!nomtab) {
                 fp = setmntent(MOUNTED, "a+");
                 if (fp == NULL) {
-                        fprintf(stderr, "setmntent(%s): %s:", MOUNTED,
-                                strerror (errno));
+                        fprintf(stderr, "%s: setmntent(%s): %s:", 
+                                progname, MOUNTED, strerror (errno));
                 } else {
                         if ((addmntent (fp, &mnt)) == 1) {
-                                fprintf(stderr, "addmntent: %s:",
-                                        strerror (errno));
+                                fprintf(stderr, "%s: addmntent: %s:",
+                                        progname, strerror (errno));
                         }
                         endmntent(fp);
                 }
@@ -109,17 +110,18 @@ parse_options(char * options, struct lustre_mount_data *lmd)
                                 lmd->lmd_nal = ptl_name2nal(opteq+1);
                         } else if(!strcmp(opt, "local_nid")) {
                                 if (ptl_parse_nid(&nid, opteq+1) != 0) {
-                                        fprintf (stderr, "mount: "
+                                        fprintf (stderr, "%s: "
                                                  "can't parse NID %s\n",
+                                                 progname,
                                                  opteq+1);
                                         return (-1);
                                 }
                                 lmd->lmd_local_nid = nid;
                         } else if(!strcmp(opt, "server_nid")) {
                                 if (ptl_parse_nid(&nid, opteq+1) != 0) {
-                                        fprintf (stderr, "mount: "
+                                        fprintf (stderr, "%s: "
                                                  "can't parse NID %s\n",
-                                                 opteq+1);
+                                                 progname, opteq+1);
                                         return (-1);
                                 }
                                 lmd->lmd_server_nid = nid;
@@ -174,8 +176,8 @@ set_local(struct lustre_mount_data *lmd)
         if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL) {
                 rc = gethostname(buf, sizeof(buf) - 1);
                 if (rc) {
-                        fprintf (stderr, "mount: can't get local buf:"
-                                 "%d\n", rc);
+                        fprintf (stderr, "%s: can't get local buf: %d\n",
+                                 progname, rc);
                         return rc;
                 }
         } else if (lmd->lmd_nal == QSWNAL) {
@@ -190,14 +192,15 @@ set_local(struct lustre_mount_data *lmd)
                 } while (rc != 0 && pfiles[++i] != NULL);
 
                 if (rc != 0) {
-                        fprintf(stderr,
-                                "mount: can't read Elan ID from /proc\n");
+                        fprintf(stderr, "%s: can't read Elan ID from /proc\n",
+                                progname);
+                                
                         return -1;
                 }
         }
 
         if (ptl_parse_nid (&nid, buf) != 0) {
-                fprintf (stderr, "mount: can't parse NID %s\n", buf);
+                fprintf (stderr, "%s: can't parse NID %s\n", progname, buf);
                 return (-1);
         }
 
@@ -214,29 +217,29 @@ set_peer(char *hostname, struct lustre_mount_data *lmd)
         if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL) {
                 if (lmd->lmd_server_nid == PTL_NID_ANY) {
                         if (ptl_parse_nid (&nid, hostname) != 0) {
-                                fprintf (stderr, "mount: can't parse NID %s\n",
-                                         hostname);
+                                fprintf (stderr, "%s: can't parse NID %s\n",
+                                         progname, hostname);
                                 return (-1);
                         }
                         lmd->lmd_server_nid = nid;
                 }
 
                 if (ptl_parse_ipaddr(&lmd->lmd_server_ipaddr, hostname) != 0) {
-                        fprintf (stderr, "mount: can't parse host %s\n",
-                                 hostname);
+                        fprintf (stderr, "%s: can't parse host %s\n",
+                                 progname, hostname);
                         return (-1);
                 }
         } else if (lmd->lmd_nal == QSWNAL) {
                 char buf[64];
                 rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf);
                 if (rc != 1) {
-                        fprintf (stderr, "mount: can't get elan id from host %s\n",
-                                 hostname);
+                        fprintf (stderr, "%s: can't get elan id from host %s\n",
+                                 progname, hostname);
                         return -1;
                 }
                 if (ptl_parse_nid (&nid, buf) != 0) {
-                        fprintf (stderr, "mount: can't parse NID %s\n",
-                                 hostname);
+                        fprintf (stderr, "%s: can't parse NID %s\n",
+                                 progname, hostname);
                         return (-1);
                 }
                 lmd->lmd_server_nid = nid;
@@ -260,8 +263,9 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
                 return -EINVAL;
 
         if (strlen(source) > sizeof(target) + 1) {
-                fprintf(stderr, "mount: "
-                        "exessively long host:/mds/profile argument\n");
+                fprintf(stderr, "%s: "
+                        "exessively long host:/mds/profile argument\n",
+                        progname);
                 return -EINVAL;
         }
         strcpy(target, source);
@@ -276,14 +280,16 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
                         *s = '\0';
                         profile = s + 1;
                 } else {
-                        fprintf(stderr, "mount: "
+                        fprintf(stderr, "%s: "
                                 "directory to mount not in "
-                                "host:/mds/profile format\n");
+                                "host:/mds/profile format\n",
+                                progname);
                         return(-1);
                 }
         } else {
-                fprintf(stderr, "mount: "
-                        "directory to mount not in host:/mds/profile format\n");
+                fprintf(stderr, "%s: "
+                        "directory to mount not in host:/mds/profile format\n",
+                        progname);
                 return(-1);
         }
         if (verbose)
@@ -302,13 +308,13 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd)
         if (rc)
                 return rc;
         if (strlen(mds) > sizeof(lmd->lmd_mds) + 1) {
-                fprintf(stderr, "mount: mds name too long\n");
+                fprintf(stderr, "%s: mds name too long\n", progname);
                 return(-1);
         }
         strcpy(lmd->lmd_mds, mds);
 
         if (strlen(profile) > sizeof(lmd->lmd_profile) + 1) {
-                fprintf(stderr, "mount: profile name too long\n");
+                fprintf(stderr, "%s: profile name too long\n", progname);
                 return(-1);
         }
         strcpy(lmd->lmd_profile, profile);
@@ -325,29 +331,44 @@ main(int argc, char * const argv[])
         char * target = argv[2];
         char * options = "";
         int opt;
-        int i;
+        int i = 3;
         struct lustre_mount_data lmd;
 
         int rc;
 
+        progname = strrchr(argv[0], '/');
+        progname = progname ? progname + 1 : argv[0];
+
         while ((opt = getopt(argc, argv, "vno:")) != EOF) {
                 switch (opt) {
                 case 'v':
                         verbose = 1;
                         printf("verbose: %d\n", verbose);
+                        i++;
                         break;
                 case 'n':
                         nomtab = 1;
                         printf("nomtab: %d\n", nomtab);
+                        i++;
                         break;
                 case 'o':
                         options = optarg;
+                        i++;
                         break;
                 default:
+                        i++;
                         break;
                 }
         }
 
+        if (argc < i) {
+                fprintf(stderr, 
+                        "%s: too few arguments\n"
+                        "Usage: %s <source> <target> [-v] [-n] [-o ...]\n",
+                        progname, progname);
+                exit(1);
+        }
+
         if (verbose)
                 for (i = 0; i < argc; i++) {
                         printf("arg[%d] = %s\n", i, argv[i]);
@@ -360,7 +381,7 @@ main(int argc, char * const argv[])
         }
 
         if (debug) {
-                printf("mount: debug mode, not mounting\n");
+                printf("%s: debug mode, not mounting\n", progname);
                 exit(0);
         }