powerpc )
AC_MSG_RESULT($host_cpu)
- KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
- KCPPFLAGS='-D__KERNEL__'
+ KCFLAGS='-O2 -g -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
MOD_LINK=elf32ppclinux
;;
AC_SUBST(LINUX25)
AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
+# ---------- Red Hat 2.4.18 has iobuf->dovary --------------
+# But other kernels don't
+
+AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+AC_TRY_COMPILE([#define __KERNEL__
+ #include <linux/iobuf.h>],
+ [struct kiobuf iobuf;
+ iobuf.dovary = 1;],
+ [AC_MSG_RESULT([yes])
+ CPPFLAGS="$CPPFLAGS -DHAVE_KIOBUF_DOVARY"],
+ [AC_MSG_RESULT([no])])
+
# ---------- Red Hat 2.4.20 backports some 2.5 bits --------
# This needs to run after we've defined the KCPPFLAGS
struct list_head kprfd_list; /* stash in queues (routing target can use) */
ptl_nid_t kprfd_target_nid; /* final destination NID */
ptl_nid_t kprfd_gateway_nid; /* gateway NID */
- int kprfd_nob; /* # message bytes (including header) */
- int kprfd_niov; /* # message frags (including header) */
- struct iovec *kprfd_iov; /* message fragments */
- void *kprfd_router_arg; // originating NAL's router arg
+ ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
+ int kprfd_nob; /* # payload bytes */
+ int kprfd_niov; /* # payload frags */
+ ptl_kiov_t *kprfd_kiov; /* payload fragments */
+ void *kprfd_router_arg; /* originating NAL's router arg */
kpr_fwd_callback_t kprfd_callback; /* completion callback */
void *kprfd_callback_arg; /* completion callback arg */
- kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets
+ kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
} kpr_fwd_desc_t;
typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
}
static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
- int nob, int niov, struct iovec *iov,
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+ int nob, int niov, ptl_kiov_t *kiov,
kpr_fwd_callback_t callback, void *callback_arg)
{
fwd->kprfd_target_nid = nid;
fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_hdr = hdr;
fwd->kprfd_nob = nob;
fwd->kprfd_niov = niov;
- fwd->kprfd_iov = iov;
+ fwd->kprfd_kiov = kiov;
fwd->kprfd_callback = callback;
fwd->kprfd_callback_arg = callback_arg;
}
#ifdef __linux__
# include <asm/types.h>
-# include <asm/timex.h>
+# if defined(__powerpc__) && !defined(__KERNEL__)
+# define __KERNEL__
+# include <asm/timex.h>
+# undef __KERNEL__
+# else
+# include <asm/timex.h>
+# endif
#else
# include <sys/types.h>
typedef u_int32_t __u32;
# include <linux/time.h>
#else
# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL)
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
#endif
#include <portals/errno.h>
struct timeval arrival_time;
volatile ptl_seq_t sequence;
-} ptl_event_t;
+} __attribute__((packed)) ptl_event_t;
#ifdef __CYGWIN__
#pragma pop
#endif
#include "linux/init.h"
#include "linux/sem.h"
#include "linux/vmalloc.h"
+#include "linux/sysctl.h"
#define DEBUG_SUBSYSTEM S_GMNAL
extern int gmnal_small_msg_size;
extern int num_rx_threads;
extern int num_stxds;
+extern int gm_port;
#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size
#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c)
#define GMNAL_MAGIC 0x1234abcd
+/*
+ * The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT gm_port
/*
gmnal_rxtwe_t *rxtwe_tail;
spinlock_t rxtwe_lock;
struct semaphore rxtwe_wait;
+ struct ctl_table_header *sysctl;
} gmnal_data_t;
/*
extern gmnal_data_t *global_nal_data;
/*
- * The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT 4
-
-/*
* for ioctl get pid
*/
#define GMNAL_IOC_GET_GNID 1
int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
void *gmnal_cb_malloc(nal_cb_t *, size_t);
void gmnal_cb_free(nal_cb_t *, void *, size_t);
a->cb_recv_pages = gmnal_cb_recv_pages; \
a->cb_read = gmnal_cb_read; \
a->cb_write = gmnal_cb_write; \
- a->cb_callback = NULL; \
+ a->cb_callback = gmnal_cb_callback; \
a->cb_malloc = gmnal_cb_malloc; \
a->cb_free = gmnal_cb_free; \
a->cb_map = NULL; \
void gmnal_stop_ctthread(gmnal_data_t *);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
char *gmnal_gm_error(gm_status_t);
char *gmnal_rxevent(gm_recv_event_t*);
int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
#include "gmnal.h"
+
+
gmnal_data_t *global_nal_data = NULL;
+#define GLOBAL_NID_STR_LEN 16
+char global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ * Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL 201
+#define GMNAL_SYSCTL_GLOBALNID 1
+
+static ctl_table gmnal_sysctl_table[] = {
+ {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+ global_nid_str, GLOBAL_NID_STR_LEN,
+ 0444, NULL, &proc_dostring},
+ { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+ {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+ { 0 }
+};
+
+
+
+
+
+
/*
* gmnal_api_forward
* This function takes a pack block of arguments from the NAL API
ptl_pid_t portals_pid = 0;
- CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d],
- ac_size[%d]\n", interface, ptl_size, ac_size);
+ CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+ "ac_size[%d]\n", interface, ptl_size, ac_size);
PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
}
- CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d],
- name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
+ CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+ "name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
"gmnal", GM_API_VERSION);
GMNAL_GM_LOCK(nal_data);
CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
break;
case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
- CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib
- and driver\n");
+ CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+ "and driver\n");
break;
case(GM_OUT_OF_MEMORY):
CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
break;
default:
- CDEBUG(D_ERROR, "gm_open Failure. Unknow error
- code [%d]\n", gm_status);
+ CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+ "code [%d]\n", gm_status);
break;
}
GMNAL_GM_LOCK(nal_data);
}
CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
nal_data->gm_global_nid = global_nid;
+ snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
/*
pid = gm_getpid();
return(NULL);
}
+ nal_data->sysctl = NULL;
+ nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
CDEBUG(D_INFO, "gmnal_init finished\n");
global_nal_data = nal->nal_data;
gm_close(nal_data->gm_port);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
+ if (nal_data->sysctl)
+ unregister_sysctl_table (nal_data->sysctl);
PORTAL_FREE(nal, sizeof(nal_t));
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
int status = PTL_OK;
- CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p],
- niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+ "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, niov, iov, mlen, rlen);
switch(srxd->type) {
int status = PTL_OK;
struct iovec *iovec = NULL, *iovec_dup = NULL;
int i = 0;
+ ptl_kiov_t *kiov_dup = kiov;;
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p],
- cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+ "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, kniov, kiov, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
status = gmnal_small_rx(nal_cb, private, cookie, kniov,
iovec_dup, mlen, rlen);
+ for (i=0; i<kniov; i++) {
+ kunmap(kiov_dup->kiov_page);
+ kiov_dup++;
+ }
PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
}
niov, iov, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid,
niov, iov, len);
int i = 0;
gmnal_data_t *nal_data;
struct iovec *iovec = NULL, *iovec_dup = NULL;
+ ptl_kiov_t *kiov_dup = kiov;
CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
nal_data = nal_cb->nal_data;
gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid,
pid, kniov, iovec, len);
}
+ for (i=0; i<kniov; i++) {
+ kunmap(kiov_dup->kiov_page);
+ kiov_dup++;
+ }
PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
return(PTL_OK);
}
return(PTL_OK);
}
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+
+ if (eq->event_callback != NULL) {
+ CDEBUG(D_INFO, "found callback\n");
+ eq->event_callback(ev);
+ }
+
+ return(PTL_OK);
+}
+
void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
{
void *ptr = NULL;
gmnal_msghdr = (gmnal_msghdr_t*)buffer;
portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
- CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d],
- type [%d], length [%d], buffer [%p]\n",
+ CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+ "type [%d], length [%d], buffer [%p]\n",
snode, sport, type, length, buffer);
- CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d],
- gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
+ CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+ "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
gmnal_msghdr->magic, gmnal_msghdr->type);
- CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"],
- dest_node ["LPD64"]\n", portals_hdr->src_nid,
+ CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+ "dest_node ["LPD64"]\n", portals_hdr->src_nid,
portals_hdr->dest_nid);
if (!private) {
CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
*/
CDEBUG(D_PORTALS, "calling lib_finalize\n");
lib_finalize(nal_cb, private, cookie, PTL_OK);
-
/*
* return buffer so it can be used again
*/
unsigned int local_nid;
gm_status_t gm_status = GM_SUCCESS;
- CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p]
- hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d]
- iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
+ CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+ "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+ "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
stxd->msg_size = tot_size;
- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p]
- gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d]
- stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
+ CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+ "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+ "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
stxd->msg_size, global_nid, local_nid, stxd);
GMNAL_GM_LOCK(nal_data);
/*
* do a resend on the dropped ones
*/
- CDEBUG(D_ERROR, "send stxd [%p] was dropped
- resending\n", context);
+ CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+ "resending\n", context);
GMNAL_GM_LOCK(nal_data);
gm_send_to_peer_with_callback(nal_data->gm_port,
stxd->buffer,
case(GM_YP_NO_MATCH):
default:
CDEBUG(D_ERROR, "Unknown send error\n");
+ gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+ stxd->gm_target_node, GMNAL_GM_PORT,
+ gmnal_resume_sending_callback, context);
+ return;
+
}
/*
}
gmnal_return_stxd(nal_data, stxd);
lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
return;
}
+/*
+ * After an error on the port
+ * call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+ gm_status_t status)
+{
+ gmnal_data_t *nal_data;
+ gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
+ CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+ gmnal_return_stxd(stxd->nal_data, stxd);
+ return;
+}
void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
context);
GMNAL_GM_LOCK(nal_data);
} else {
- CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is
- [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+ CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+ "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
}
int niov_dup;
- CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p]
- hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d],
- iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
+ CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+ "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+ "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
if (nal_cb)
iov->iov_base, iov->iov_len);
if (gm_status != GM_SUCCESS) {
GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s]
- for memory [%p] len ["LPSZ"]\n",
+ CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+ "for memory [%p] len ["LPSZ"]\n",
gm_status, gmnal_gm_error(gm_status),
iov->iov_base, iov->iov_len);
GMNAL_GM_LOCK(nal_data);
gmnal_msghdr_t *msghdr = NULL;
gm_status_t gm_status;
- CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p],
- cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+ "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, nriov, riov, mlen, rlen);
if (!srxd) {
CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
riov->iov_base, riov->iov_len);
if (gm_status != GM_SUCCESS) {
GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s]
- for memory [%p] len ["LPSZ"]\n",
+ CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+ "for memory [%p] len ["LPSZ"]\n",
gm_status, gmnal_gm_error(gm_status),
riov->iov_base, riov->iov_len);
GMNAL_GM_LOCK(nal_data);
int ncalls = 0;
- CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p],
- nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+ CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+ "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
srxd->gm_source_node,
&source_node) != GM_SUCCESS) {
- CDEBUG(D_ERROR, "cannot resolve global_id [%u]
- to local node_id\n", srxd->gm_source_node);
+ CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+ "to local node_id\n", srxd->gm_source_node);
GMNAL_GM_UNLOCK(nal_data);
return(GMNAL_STATUS_FAIL);
}
stxd->msg_size= sizeof(gmnal_msghdr_t);
- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p]
- gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d]
- stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
+ CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+ "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+ "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
GMNAL_GM_LOCK(nal_data);
stxd->gm_priority = GM_LOW_PRIORITY;
*/
int num_rx_threads = -1;
int num_stxds = 5;
+int gm_port = 4;
ptl_handle_ni_t kgmnal_ni;
MODULE_PARM(gmnal_small_msg_size, "i");
MODULE_PARM(num_rx_threads, "i");
MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
MODULE_AUTHOR("Morgan Doyle");
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!txbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
return(GMNAL_STATUS_FAIL);
txd->next = nal_data->stxd;
nal_data->stxd = txd;
- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
}
for (i=0; i<=nrxt_stx; i++) {
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!txbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
return(GMNAL_STATUS_FAIL);
txd->next = nal_data->rxt_stxd;
nal_data->rxt_stxd = txd;
- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
}
/*
CDEBUG(D_TRACE, "gmnal_free_small tx\n");
while(txd) {
- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
_txd = txd;
txd = txd->next;
GMNAL_GM_LOCK(nal_data);
}
txd = nal_data->rxt_stxd;
while(txd) {
- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
_txd = txd;
txd = txd->next;
GMNAL_GM_LOCK(nal_data);
#if 0
PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
if (!rxbuffer) {
- CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+ "size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
return(GMNAL_STATUS_FAIL);
}
- CDEBUG(D_NET, "Calling gm_register_memory with port [%p]
- rxbuffer [%p], size [%d]\n", nal_data->gm_port,
+ CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+ "rxbuffer [%p], size [%d]\n", nal_data->gm_port,
rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_LOCK(nal_data);
gm_status = gm_register_memory(nal_data->gm_port, rxbuffer,
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (gm_status != GM_SUCCESS) {
- CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
- index [%d]\n", rxbuffer, i);
+ CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+ " index [%d]\n", rxbuffer, i);
switch(gm_status) {
case(GM_FAILURE):
CDEBUG(D_ERROR, "GM_FAILURE\n");
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!rxbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
return(GMNAL_STATUS_FAIL);
if (gm_hash_insert(nal_data->srxd_hash,
(void*)rxbuffer, (void*)rxd)) {
- CDEBUG(D_ERROR, "failed to create hash entry rxd[%p]
- for rxbuffer[%p]\n", rxd, rxbuffer);
+ CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+ "for rxbuffer[%p]\n", rxd, rxbuffer);
return(GMNAL_STATUS_FAIL);
}
rxd->next = nal_data->srxd;
nal_data->srxd = rxd;
- CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p],
- size [%d]\n", rxd, rxd->buffer, rxd->size);
+ CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+ "size [%d]\n", rxd, rxd->buffer, rxd->size);
}
return(GMNAL_STATUS_OK);
char *
gmnal_gm_error(gm_status_t status)
{
+ return(gm_strerror(status));
+
switch(status) {
case(GM_SUCCESS):
return("SUCCESS");
}
spin_lock(&nal_data->rxtwe_lock);
if (nal_data->rxtwe_head) {
- CDEBUG(D_WARNING, "Got a work entry\n");
+ CDEBUG(D_INFO, "Got a work entry\n");
we = nal_data->rxtwe_head;
nal_data->rxtwe_head = we->next;
if (!nal_data->rxtwe_head)
spin_unlock(&nal_data->rxtwe_lock);
} while (!we);
- CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+ CDEBUG(D_INFO, "Returning we[%p]\n", we);
return(we);
}
for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
- /* If krx_pages[0] got allocated, it got mapped.
+ /* If krx_kiov[0].kiov_page got allocated, it got mapped.
* NB subsequent pages get merged */
- if (krx->krx_pages[0] != NULL)
+ if (krx->krx_kiov[0].kiov_page != NULL)
ep_dvma_unload(kqswnal_data.kqn_ep,
kqswnal_data.kqn_ep_rx_nmh,
&krx->krx_elanbuffer);
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_pages[j] != NULL)
- __free_page (krx->krx_pages[j]);
+ if (krx->krx_kiov[j].kiov_page != NULL)
+ __free_page (krx->krx_kiov[j].kiov_page);
}
PORTAL_FREE(kqswnal_data.kqn_rxds,
LASSERT (krx->krx_npages > 0);
for (j = 0; j < krx->krx_npages; j++)
{
- krx->krx_pages[j] = alloc_page(GFP_KERNEL);
- if (krx->krx_pages[j] == NULL)
- {
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ if (page == NULL) {
kqswnal_finalise ();
return (-ENOMEM);
}
- LASSERT(page_address(krx->krx_pages[j]) != NULL);
+ krx->krx_kiov[j].kiov_page = page;
+ LASSERT(page_address(page) != NULL);
#if MULTIRAIL_EKC
ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- page_address(krx->krx_pages[j]),
+ page_address(page),
PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
elan_page_idx, &all_rails, &elanbuffer);
#else
elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
kqswnal_data.kqn_eprxdmahandle,
- page_address(krx->krx_pages[j]),
+ page_address(page),
PAGE_SIZE, elan_page_idx,
&elanbuffer);
if (j == 0)
int krx_rpc_reply_sent; /* rpc reply sent */
atomic_t krx_refcount; /* how to tell when rpc is done */
kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
- struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
- struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+ ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
typedef struct
int offset, int nob)
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- char *buffer = (char *)page_address(krx->krx_pages[0]);
+ char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
int rc;
#if MULTIRAIL_EKC
}
memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
#endif
-
+
if (kqswnal_data.kqn_optimized_gets &&
type == PTL_MSG_GET && /* doing a GET */
nid == targetnid) { /* not forwarding */
{
int rc;
kqswnal_tx_t *ktx;
- struct iovec *iov = fwd->kprfd_iov;
+ ptl_kiov_t *kiov = fwd->kprfd_kiov;
int niov = fwd->kprfd_niov;
int nob = fwd->kprfd_nob;
ptl_nid_t nid = fwd->kprfd_gateway_nid;
LBUG ();
#endif
/* The router wants this NAL to forward a packet */
- CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+ CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
fwd, nid, niov, nob);
- LASSERT (niov > 0);
-
ktx = kqswnal_get_idle_tx (fwd, 0);
if (ktx == NULL) /* can't get txd right now */
return; /* fwd will be scheduled when tx desc freed */
goto failed;
}
- if (nob > KQSW_NRXMSGBYTES_LARGE) {
- CERROR ("Can't forward [%p] to "LPX64
- ": size %d bigger than max packet size %ld\n",
- fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
- rc = -EMSGSIZE;
- goto failed;
- }
+ /* copy hdr into pre-mapped buffer */
+ memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
+ ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
- ktx->ktx_port = (nob <= (KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) ?
+ ktx->ktx_port = (nob <= KQSW_SMALLPAYLOAD) ?
EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
ktx->ktx_nid = nid;
ktx->ktx_state = KTX_FORWARDING;
ktx->ktx_args[0] = fwd;
+ ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- if ((kqswnal_data.kqn_copy_small_fwd || niov > 1) &&
- nob <= KQSW_TX_BUFFER_SIZE)
+ if (nob <= KQSW_TX_MAXCONTIG)
{
- /* send from ktx's pre-mapped contiguous buffer? */
- lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, 0, nob);
+ /* send payload from ktx's pre-mapped contiguous buffer */
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
- 0, nob);
+ 0, KQSW_HDR_SIZE + nob);
#else
ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
- ktx->ktx_frags[0].Len = nob;
+ ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
#endif
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
+ if (nob > 0)
+ lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
+ niov, kiov, 0, nob);
}
else
{
- /* zero copy */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
- rc = kqswnal_map_tx_iov (ktx, 0, nob, niov, iov);
+ /* zero copy payload */
+#if MULTIRAIL_EKC
+ ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
+ 0, KQSW_HDR_SIZE);
+#else
+ ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
+ ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
+#endif
+ rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
if (rc != 0)
goto failed;
-
- ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base;
}
rc = kqswnal_launch (ktx);
if (error != 0)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
void
kqswnal_rx (kqswnal_rx_t *krx)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
+ int payload_nob;
int nob;
int niov;
return;
}
- /* NB forwarding may destroy iov; rebuild every time */
- for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
- {
- LASSERT (niov < krx->krx_npages);
- krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
- krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+ nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
+ niov = 0;
+ if (nob > 0) {
+ krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
+ krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
+ niov = 1;
+ nob -= PAGE_SIZE - KQSW_HDR_SIZE;
+
+ while (nob > 0) {
+ LASSERT (niov < krx->krx_npages);
+
+ krx->krx_kiov[niov].kiov_offset = 0;
+ krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
+ niov++;
+ nob -= PAGE_SIZE;
+ }
}
- kpr_fwd_init (&krx->krx_fwd, dest_nid,
- krx->krx_nob, niov, krx->krx_iov,
+ kpr_fwd_init (&krx->krx_fwd, dest_nid,
+ hdr, payload_nob, niov, krx->krx_kiov,
kqswnal_fwd_callback, krx);
kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
void
kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
", dpid %d, spid %d, type %d\n",
size_t rlen)
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+ char *buffer = page_address(krx->krx_kiov[0].kiov_page);
int page;
char *page_ptr;
int page_nob;
#if KQSW_CHECKSUM
kqsw_csum_t senders_csum;
kqsw_csum_t payload_csum = 0;
- kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
- sizeof(ptl_hdr_t));
+ kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
size_t csum_len = mlen;
int csum_frags = 0;
int csum_nob = 0;
atomic_inc (&csum_counter);
- memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
- sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+ memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
if (senders_csum != hdr_csum)
kqswnal_csum_error (krx, 1);
#endif
if (mlen != 0) {
page = 0;
- page_ptr = ((char *) page_address(krx->krx_pages[0])) +
- KQSW_HDR_SIZE;
+ page_ptr = buffer + KQSW_HDR_SIZE;
page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
LASSERT (niov > 0);
{
page++;
LASSERT (page < krx->krx_npages);
- page_ptr = page_address(krx->krx_pages[page]);
+ page_ptr = page_address(krx->krx_kiov[page].kiov_page);
page_nob = PAGE_SIZE;
}
}
#if KQSW_CHECKSUM
- memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
- sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+ memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t),
+ sizeof(kqsw_csum_t));
if (csum_len != rlen)
CERROR("Unable to checksum data in user's buffer\n");
void
ksocknal_free_fmbs (ksock_fmb_pool_t *p)
{
+ int npages = p->fmp_buff_pages;
ksock_fmb_t *fmb;
int i;
fmb = list_entry(p->fmp_idle_fmbs.next,
ksock_fmb_t, fmb_list);
- for (i = 0; i < fmb->fmb_npages; i++)
- if (fmb->fmb_pages[i] != NULL)
- __free_page(fmb->fmb_pages[i]);
-
+ for (i = 0; i < npages; i++)
+ if (fmb->fmb_kiov[i].kiov_page != NULL)
+ __free_page(fmb->fmb_kiov[i].kiov_page);
+
list_del(&fmb->fmb_list);
- PORTAL_FREE(fmb, sizeof(*fmb));
+ PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
}
}
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+ ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+ ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
SOCKNAL_LARGE_FWD_NMSGS); i++) {
- ksock_fmb_t *fmb;
+ ksock_fmb_t *fmb;
+ ksock_fmb_pool_t *pool;
+
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS)
+ pool = &ksocknal_data.ksnd_small_fmp;
+ else
+ pool = &ksocknal_data.ksnd_large_fmp;
- PORTAL_ALLOC(fmb, sizeof(*fmb));
+ PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
+ fmb_kiov[pool->fmp_buff_pages]));
if (fmb == NULL) {
ksocknal_module_fini();
return (-ENOMEM);
}
- if (i < SOCKNAL_SMALL_FWD_NMSGS) {
- fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
- fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
- } else {
- fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
- fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
- }
-
- for (j = 0; j < fmb->fmb_npages; j++) {
- fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
+ fmb->fmb_pool = pool;
+
+ for (j = 0; j < pool->fmp_buff_pages; j++) {
+ fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
- if (fmb->fmb_pages[j] == NULL) {
+ if (fmb->fmb_kiov[j].kiov_page == NULL) {
ksocknal_module_fini ();
return (-ENOMEM);
}
- LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
+ LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
}
- list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
}
}
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/irq.h>
#include <linux/init.h>
#include <linux/fs.h>
#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
/* # pages in a large message fwd buffer */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
struct list_head fmp_idle_fmbs; /* free buffers */
struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
int fmp_nactive_fmbs; /* # buffers in use */
+ int fmp_buff_pages; /* # pages per buffer */
} ksock_fmb_pool_t;
#define SOCKNAL_INIT_ALL 3
/* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded
- * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
- * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1
- * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
- * fragments.
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more ptl_kiov_t fragments.
*
* On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, if the message
- * requires forwarding or will be received into mapped memory, up to
- * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
- * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
- */
+ * receive (the header). Once the header has been received, the payload is
+ * received into either struct iovec or ptl_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
struct ksock_conn; /* forward ref */
struct ksock_peer; /* forward ref */
#endif
} ksock_tx_t;
+typedef struct /* forwarded packet */
+{
+ ksock_tx_t ftx_tx; /* send info */
+ struct iovec ftx_iov; /* hdr iovec */
+} ksock_ftx_t;
+
#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
/* network zero copy callback descriptor embedded in ksock_tx_t */
{ /* (socknal->router) */
struct list_head fmb_list; /* queue idle */
kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
- int fmb_npages; /* # pages allocated */
ksock_fmb_pool_t *fmb_pool; /* owning pool */
struct ksock_peer *fmb_peer; /* peer received from */
- struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
- struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+ ptl_hdr_t fmb_hdr; /* message header */
+ ptl_kiov_t fmb_kiov[0]; /* payload frags */
} ksock_fmb_t;
/* space for the rx frag descriptors; we either read a single contiguous
- * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+ * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
typedef union {
struct iovec iov[PTL_MD_MAX_IOV];
ptl_kiov_t kiov[PTL_MD_MAX_IOV];
PORTAL_FREE(ltx, ltx->ltx_desc_size);
}
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
struct page *
ksocknal_kvaddr_to_page (unsigned long vaddr)
{
int more = (tx->tx_niov > 1) ||
(tx->tx_nkiov > 0) ||
(!list_empty (&conn->ksnc_tx_queue));
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
int offset = vaddr & (PAGE_SIZE - 1);
int zcsize = MIN (fragsize, PAGE_SIZE - offset);
struct page *page;
LASSERT (fragsize <= tx->tx_resid);
LASSERT (tx->tx_niov > 0);
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
(sock->sk->route_caps & NETIF_F_SG) &&
(sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
{
ptl_nid_t nid = fwd->kprfd_gateway_nid;
- ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+ ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
int rc;
CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
if (nid == ksocknal_lib.ni.nid)
nid = fwd->kprfd_target_nid;
- tx->tx_isfwd = 1; /* This is a forwarding packet */
- tx->tx_nob = fwd->kprfd_nob;
- tx->tx_niov = fwd->kprfd_niov;
- tx->tx_iov = fwd->kprfd_iov;
- tx->tx_nkiov = 0;
- tx->tx_kiov = NULL;
+ /* setup iov for hdr */
+ ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
+ ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
+
+ ftx->ftx_tx.tx_isfwd = 1; /* This is a forwarding packet */
+ ftx->ftx_tx.tx_nob = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
+ ftx->ftx_tx.tx_niov = 1;
+ ftx->ftx_tx.tx_iov = &ftx->ftx_iov;
+ ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
+ ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov;
- rc = ksocknal_launch_packet (tx, nid);
+ rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
if (rc != 0)
kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
}
{
ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
ksock_fmb_pool_t *fmp = fmb->fmb_pool;
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
ksock_conn_t *conn = NULL;
ksock_sched_t *sched;
unsigned long flags;
ksocknal_get_idle_fmb (ksock_conn_t *conn)
{
int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
unsigned long flags;
ksock_fmb_pool_t *pool;
ksock_fmb_t *fmb;
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
- if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
pool = &ksocknal_data.ksnd_small_fmp;
else
pool = &ksocknal_data.ksnd_large_fmp;
int
ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
{
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ int payload_nob = conn->ksnc_rx_nob_left;
ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
- int niov; /* at least the header */
- int nob;
+ int niov = 0;
+ int nob = payload_nob;
LASSERT (conn->ksnc_rx_scheduled);
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
LASSERT (payload_nob >= 0);
- LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
- /* Got a forwarding buffer; copy the header we just read into the
- * forwarding buffer. If there's payload, start reading reading it
- * into the buffer, otherwise the forwarding buffer can be kicked
- * off immediately.
- *
- * NB fmb->fmb_iov spans the WHOLE packet.
- * conn->ksnc_rx_iov spans just the payload.
- */
- fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
- /* copy header */
- memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+ LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
/* Take a ref on the conn's peer to prevent module unload before
- * forwarding completes. NB we ref peer and not conn since because
- * all refs on conn after it has been closed must remove themselves
- * in finite time */
+ * forwarding completes. */
fmb->fmb_peer = conn->ksnc_peer;
atomic_inc (&conn->ksnc_peer->ksnp_refcount);
- if (payload_nob == 0) { /* got complete packet already */
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
- conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
- dest_nid, packet_nob);
+ /* Copy the header we just read into the forwarding buffer. If
+ * there's payload, start reading reading it into the buffer,
+ * otherwise the forwarding buffer can be kicked off
+ * immediately. */
+ fmb->fmb_hdr = conn->ksnc_hdr;
- fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+ while (nob > 0) {
+ LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
+ LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
+ fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ }
+
+ kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
+ payload_nob, niov, fmb->fmb_kiov,
+ ksocknal_fmb_callback, fmb);
- kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
- packet_nob, 1, fmb->fmb_iov,
- ksocknal_fmb_callback, fmb);
+ if (payload_nob == 0) { /* got complete packet already */
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid);
- /* forward it now */
kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
ksocknal_new_packet (conn, 0); /* on to next packet */
return (1);
}
- niov = 1;
- if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */
- fmb->fmb_iov[0].iov_len = packet_nob;
- } else {
- fmb->fmb_iov[0].iov_len = PAGE_SIZE;
- nob = packet_nob - PAGE_SIZE;
-
- do {
- LASSERT (niov < fmb->fmb_npages);
- fmb->fmb_iov[niov].iov_base =
- page_address (fmb->fmb_pages[niov]);
- fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
- nob -= PAGE_SIZE;
- niov++;
- } while (nob > 0);
- }
-
- kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
- packet_nob, niov, fmb->fmb_iov,
- ksocknal_fmb_callback, fmb);
-
conn->ksnc_cookie = fmb; /* stash fmb for later */
conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
- /* payload is desc's iov-ed buffer, but skipping the hdr */
- LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
- sizeof (struct iovec));
-
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- conn->ksnc_rx_iov[0].iov_base =
- (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
- sizeof (ptl_hdr_t));
- conn->ksnc_rx_iov[0].iov_len =
- fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
- if (niov > 1)
- memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
- (niov - 1) * sizeof (struct iovec));
-
- conn->ksnc_rx_niov = niov;
+ /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
+ * buffer */
+ LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_nkiov = niov;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
+
CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
return (0);
CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
target_nid, src_ne->kpne_interface.kprni_nalid);
- LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
- LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+ LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
atomic_inc (&kpr_queue_depth);
atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
kpr_fwd_packets++; /* (loose) stats accounting */
- kpr_fwd_bytes += nob;
+ kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
if (src_ne->kpne_shutdown) /* caller is shutting down */
goto out;
gmnalnid_SOURCES = gmnalnid.c
ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence
+ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
ptlctl_DEPENDENCIES = libptlctl.a
debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
debugctl_DEPENDENCIES = libptlctl.a
routerstat_SOURCES = routerstat.c
free(pcfg.pcfg_pbuf1);
close(pfd);
printf("%u\n", nid);
- exit(nid);
+ exit(0);
}
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.1
+ * bug fixes
+ - fixes for glimpse AST timeouts / incorrectly 0-sized files (2818)
+ - don't overwrite extent policy data in reply if lock was blocked (2901)
+ - drop filter export grants atomically with removal from device (2663)
+ - del obd_self_export from work_list in class_disconnect_exports (2908)
+ - don't LBUG if MDS recovery times out during orphan cleanup (2530)
+ - swab reply message in mdc_close, other PPC fixes (2464)
+
2004-03-04 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.0
* bug fixes
LOGL(data->lcfg_inlbuf3, data->lcfg_inllen3, ptr);
if (data->lcfg_inlbuf4)
LOGL(data->lcfg_inlbuf4, data->lcfg_inllen4, ptr);
-// if (lustre_cfg_is_invalid(overlay))
-// return 1;
*plen = len;
lcfg = (struct lustre_cfg *)*buf;
if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) {
- CERROR("Version mismatch kernel vs application\n");
+ CERROR("Version mismatch kernel: %#x application: %#x\n",
+ LUSTRE_CFG_VERSION, lcfg->lcfg_version);
return -EINVAL;
}
-// if (lustre_cfg_is_invalid(data)) {
-// CERROR("ioctl not correctly formatted\n");
-// return -EINVAL;
-// }
if (lcfg->lcfg_dev_name) {
lcfg->lcfg_dev_name = &lcfg->lcfg_bulk[0];
ELDLM_LOCK_CHANGED = 300,
ELDLM_LOCK_ABORTED = 301,
ELDLM_LOCK_REPLACED = 302,
+ ELDLM_NO_LOCK_DATA = 303,
ELDLM_NAMESPACE_EXISTS = 400,
ELDLM_BAD_NAMESPACE = 401
struct ldlm_valblock_ops {
int (*lvbo_init)(struct ldlm_resource *res);
int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m,
- int buf_idx);
+ int buf_idx, int increase);
};
struct ldlm_namespace {
struct list_head loi_cli_item;
struct list_head loi_write_item;
+ int loi_kms_valid:1;
__u64 loi_kms; /* known minimum size */
__u64 loi_rss; /* recently seen size */
__u64 loi_mtime; /* recently seen mtime */
ext3-xattr-ptr-arith-fix.patch
gfp_memalloc-2.4.22.patch
procfs-ndynamic-2.4.patch
-linux-2.4.20-tmpfs-xattr.patch
+linux-2.4.20-filemap.patch
ext3-truncate-buffer-head.patch
lock->l_completion_ast = completion;
lock->l_glimpse_ast = glimpse;
- lock->l_lvb_len = lvb_len;
- OBD_ALLOC(lock->l_lvb_data, lvb_len);
- if (lock->l_lvb_data == NULL) {
- OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
- RETURN(NULL);
+ if (lvb_len) {
+ lock->l_lvb_len = lvb_len;
+ OBD_ALLOC(lock->l_lvb_data, lvb_len);
+ if (lock->l_lvb_data == NULL) {
+ OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
+ RETURN(NULL);
+ }
}
RETURN(lock);
#include <linux/lustre_dlm.h>
#include <linux/obd_class.h>
+#include <portals/list.h>
#include "ldlm_internal.h"
extern kmem_cache_t *ldlm_resource_slab;
} else if (rc == -EINVAL) {
LDLM_DEBUG(lock, "lost the race -- client no longer has this "
"lock");
+ } else if (rc == -ELDLM_NO_LOCK_DATA) {
+ LDLM_DEBUG(lock, "lost a race -- client has a lock, but no "
+ "inode");
} else if (rc) {
LDLM_ERROR(lock, "client sent rc %d rq_status %d from "
"glimpse AST", rc, req->rq_status);
} else {
- rc = res->lr_namespace->ns_lvbo->lvbo_update(res,
- req->rq_repmsg, 0);
+ rc = res->lr_namespace->ns_lvbo->lvbo_update
+ (res, req->rq_repmsg, 0, 1);
}
ptlrpc_req_finished(req);
RETURN(rc);
if (res && res->lr_namespace->ns_lvbo &&
res->lr_namespace->ns_lvbo->lvbo_update) {
(void)res->lr_namespace->ns_lvbo->lvbo_update
- (res, NULL, 0);
+ (res, NULL, 0, 0);
//(res, req->rq_reqmsg, 1);
}
lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
LDLM_DEBUG(lock, "completion AST, new lock mode");
}
- if (lock->l_resource->lr_type != LDLM_PLAIN)
+
+ if (lock->l_resource->lr_type != LDLM_PLAIN) {
memcpy(&lock->l_policy_data, &dlm_req->lock_desc.l_policy_data,
sizeof(lock->l_policy_data));
+ LDLM_DEBUG(lock, "completion AST, new policy data");
+ }
ldlm_resource_unlink_lock(lock);
if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
struct ldlm_request *dlm_req,
struct ldlm_lock *lock)
{
+ int rc = -ENOSYS;
ENTRY;
l_lock(&ns->ns_lock);
if (lock->l_glimpse_ast != NULL) {
l_unlock(&ns->ns_lock);
l_check_no_ns_lock(ns);
- lock->l_glimpse_ast(lock, req);
+ rc = lock->l_glimpse_ast(lock, req);
l_lock(&ns->ns_lock);
}
+ if (req->rq_repmsg != NULL) {
+ ptlrpc_reply(req);
+ } else {
+ req->rq_status = rc;
+ ptlrpc_error(req);
+ }
+
if (lock->l_granted_mode == LCK_PW &&
!lock->l_readers && !lock->l_writers &&
time_after(jiffies, lock->l_last_used + 10 * HZ)) {
CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n",
lock, reply->lock_handle.cookie, *flags);
- if (type == LDLM_EXTENT) {
- CDEBUG(D_INFO, "requested extent: "LPU64" -> "LPU64", got "
- "extent "LPU64" -> "LPU64"\n",
- body->lock_desc.l_policy_data.l_extent.start,
- body->lock_desc.l_policy_data.l_extent.end,
- reply->lock_desc.l_policy_data.l_extent.start,
- reply->lock_desc.l_policy_data.l_extent.end);
- }
- if (policy != NULL)
- memcpy(&lock->l_policy_data, &reply->lock_desc.l_policy_data,
- sizeof(reply->lock_desc.l_policy_data));
/* If enqueue returned a blocked lock but the completion handler has
* already run, then it fixed up the resource and we don't need to do it
}
LDLM_DEBUG(lock, "client-side enqueue, new resource");
}
+ if (policy != NULL)
+ memcpy(&lock->l_policy_data,
+ &reply->lock_desc.l_policy_data,
+ sizeof(reply->lock_desc.l_policy_data));
+ if (type != LDLM_PLAIN)
+ LDLM_DEBUG(lock,"client-side enqueue, new policy data");
}
+
if ((*flags) & LDLM_FL_AST_SENT) {
l_lock(&ns->ns_lock);
lock->l_flags |= LDLM_FL_CBPENDING;
}
#endif
-/* This function is a disaster. I hate the LOV. */
static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
{
struct ptlrpc_request *req = reqp;
struct inode *inode = ll_inode_from_lock(lock);
- struct obd_export *exp;
struct ll_inode_info *lli;
struct ost_lvb *lvb;
- struct {
- int stripe_number;
- __u64 size;
- struct lov_stripe_md *lsm;
- } data;
- __u32 vallen = sizeof(data);
- int rc, size = sizeof(*lvb);
+ int rc, size = sizeof(*lvb), stripe = 0;
ENTRY;
if (inode == NULL)
- RETURN(0);
+ GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
lli = ll_i2info(inode);
if (lli == NULL)
- goto iput;
+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
if (lli->lli_smd == NULL)
- goto iput;
- exp = ll_i2obdexp(inode);
+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
/* First, find out which stripe index this lock corresponds to. */
if (lli->lli_smd->lsm_stripe_count > 1)
- data.stripe_number = ll_lock_to_stripe_offset(inode, lock);
- else
- data.stripe_number = 0;
-
- data.size = inode->i_size;
- data.lsm = lli->lli_smd;
-
- rc = obd_get_info(exp, strlen("size_to_stripe"), "size_to_stripe",
- &vallen, &data);
- if (rc != 0) {
- CERROR("obd_get_info: rc = %d\n", rc);
- LBUG();
- }
-
- LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> size "LPU64,
- inode->i_size, data.stripe_number, data.size);
+ stripe = ll_lock_to_stripe_offset(inode, lock);
rc = lustre_pack_reply(req, 1, &size, NULL);
if (rc) {
CERROR("lustre_pack_reply: %d\n", rc);
- goto iput;
+ GOTO(iput, rc);
}
lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb));
- lvb->lvb_size = data.size;
- ptlrpc_reply(req);
+ lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
+ LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
+ inode->i_size, stripe, lvb->lvb_size);
+ GOTO(iput, 0);
iput:
iput(inode);
- RETURN(0);
+
+ out:
+ /* These errors are normal races, so we don't want to fill the console
+ * with messages by calling ptlrpc_error() */
+ if (rc == -ELDLM_NO_LOCK_DATA)
+ lustre_pack_reply(req, 0, NULL, NULL);
+
+ req->rq_status = rc;
+ return rc;
}
__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
exp = class_conn2export(&mdc_conn);
ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
+#if 1
rc = class_config_parse_llog(ctxt, profile, cfg);
+#else
+ /*
+ * For debugging, it's useful to just dump the log
+ */
+ rc = class_config_dump_llog(ctxt, profile, cfg);
+#endif
if (rc) {
CERROR("class_config_parse_llog failed: rc = %d\n", rc);
}
ll_read_inode2(inode, opaque);
return 0;
}
+
struct inode *ll_iget(struct super_block *sb, ino_t hash,
struct lustre_md *md)
{
if (ost_uuid && !obd_uuid_equals(ost_uuid, &lov->tgts[i].uuid))
continue;
-
- memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+
/* XXX: LOV STACKING: use real "obj_mdp" sub-data */
err = obd_create(lov->tgts[i].ltd_exp, tmp_oa, &obj_mdp, oti);
- if (err) {
+ if (err)
+ /* This export will be disabled until it is recovered,
+ and then orphan recovery will be completed. */
CERROR("error in orphan recovery on OST idx %d/%d: "
"rc = %d\n", i, lov->desc.ld_tgt_count, err);
- if (!rc)
- rc = err;
- }
if (ost_uuid)
break;
LASSERT(ea != NULL);
- if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
+ if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
src_oa->o_flags == OBD_FL_DELORPHAN) {
rc = lov_clear_orphans(exp, src_oa, ea, oti);
RETURN(rc);
if (!lov->desc.ld_active_tgt_count)
RETURN(-EIO);
- /* Recreate a specific object id at the given OST index */
+ /* Recreate a specific object id at the given OST index */
if (src_oa->o_valid & OBD_MD_FLFLAGS && src_oa->o_flags &
OBD_FL_RECREATE_OBJS) {
struct lov_stripe_md obj_md;
if (i == lsm->lsm_stripe_count)
RETURN(-EINVAL);
- rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti);
+ rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa,
+ &obj_mdp, oti);
RETURN(rc);
}
}
if (*ea == NULL || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
- if (ost_start_count <= 0) {
+ if (--ost_start_count <= 0) {
ost_start_idx = ll_insecure_random_int();
ost_start_count = LOV_CREATE_RESEED_INTERVAL;
- } else {
- --ost_start_count;
- ost_start_idx += lsm->lsm_stripe_count;
- if (lsm->lsm_stripe_count == ost_count)
- ++ost_start_idx;
+ } else if (lsm->lsm_stripe_count >=
+ lov->desc.ld_active_tgt_count) {
+ /* If we allocate from all of the stripes, make the
+ * next file start on the next OST. */
+ ++ost_start_idx;
}
ost_idx = ost_start_idx % ost_count;
} else {
struct lov_stripe_md *obj_mdp = &obj_md;
int err;
+ ++ost_start_idx;
if (lov->tgts[ost_idx].active == 0) {
CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
continue;
/* XXX LOV STACKING: submd should be from the subobj */
submd->lsm_object_id = loi->loi_id;
submd->lsm_stripe_count = 0;
+ submd->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
submd->lsm_oinfo->loi_rss = loi->loi_rss;
submd->lsm_oinfo->loi_kms = loi->loi_kms;
loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
LASSERT(lock != NULL);
loi->loi_rss = tmp;
- // Extend KMS up to the end of this lock, and no further
+ /* Extend KMS up to the end of this lock and no further
+ * A lock on [x,y] means a KMS of up to y + 1 bytes! */
if (tmp > lock->l_policy_data.l_extent.end)
tmp = lock->l_policy_data.l_extent.end + 1;
- if (tmp > loi->loi_kms) {
+ if (tmp >= loi->loi_kms) {
CDEBUG(D_INODE, "lock acquired, setting rss="
LPU64", kms="LPU64"\n", loi->loi_rss,
tmp);
loi->loi_kms = tmp;
+ loi->loi_kms_valid = 1;
} else {
CDEBUG(D_INODE, "lock acquired, setting rss="
LPU64"; leaving kms="LPU64", end="LPU64
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
int er;
- if (!lov->tgts[i].active)
- continue;
+ /* initialize all OSCs, even inactive ones */
er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
sizeof(obd_id), ((obd_id*)val) + i);
CERROR("Unexpected: can't find mdc_open_data, but the "
"close succeeded. Please tell CFS.\n");
}
+ if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
+ lustre_swab_mds_body)) {
+ CERROR("Error unpacking mds_body\n");
+ rc = -EPROTO;
+ }
}
if (req->rq_async_args.pointer_arg[0] != NULL) {
CERROR("returned without dropping rpc_lock: rc %d\n", rc);
GOTO(out, rc);
rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
- if (rc < 0)
- GOTO(out, rc);
out:
- if (rc && mds->mds_lov_objids) {
- /* Might as well crash here, until we figure out what to do.
- * If we OBD_FREE, we'll just LASSERT the next time through this
- * function. */
- LBUG();
- OBD_FREE(mds->mds_lov_objids,
- mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id));
- mds->mds_lov_objids = NULL;
- }
-
RETURN(rc);
}
#endif
#define DEBUG_SUBSYSTEM S_MDS
+#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/version.h>
CDEBUG(D_HA,
"exp %p export uuid == obd uuid, don't discon\n",
exp);
+ /* Need to delete this now so we don't end up pointing
+ * to work_list later when this export is cleaned up. */
+ list_del_init(&exp->exp_obd_chain);
class_export_put(exp);
continue;
}
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
int rc = 0;
-
+ ENTRY;
if (rec->lrh_type == OBD_CFG_REC) {
char *buf;
struct lustre_cfg *lcfg;
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
int rc = 0;
-
+ ENTRY;
if (rec->lrh_type == OBD_CFG_REC) {
char *buf;
struct lustre_cfg *lcfg;
if (pcfg->pcfg_flags)
CDEBUG(D_INFO, " flags: %x\n",
pcfg->pcfg_flags);
+ } else {
+ CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
+ rc = -EINVAL;
}
out:
RETURN(rc);
LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
for (i = 0; i < objcount; i++, obj++) {
- int verify = obj->ioo_id != ECHO_PERSISTENT_OBJID;
+ int verify = (rc == 0 &&
+ obj->ioo_id != ECHO_PERSISTENT_OBJID);
int j;
for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) {
#include <linux/lprocfs_status.h>
#include <linux/lustre_log.h>
#include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
#include "filter_internal.h"
LASSERT(l->l_glimpse_ast != NULL);
rc = l->l_glimpse_ast(l, NULL); /* this will update the LVB */
+ if (rc != 0 && res->lr_namespace->ns_lvbo &&
+ res->lr_namespace->ns_lvbo->lvbo_update) {
+ res->lr_namespace->ns_lvbo->lvbo_update(res, NULL, 0, 1);
+ }
down(&res->lr_lvb_sem);
-#if 0
- if (res_lvb->lvb_size == reply_lvb->lvb_size)
- LDLM_ERROR(l, "we lost the glimpse race!");
-#endif
reply_lvb->lvb_size = res_lvb->lvb_size;
up(&res->lr_lvb_sem);
spin_unlock(&obd->obd_osfs_lock);
/* Do these assertions outside the spinlocks so we don't kill system */
- LASSERTF(tot_granted == fo_tot_granted, "%s "LPU64" != "LPU64"\n",
- func, tot_granted, fo_tot_granted);
- LASSERTF(tot_pending == fo_tot_pending, "%s "LPU64" != "LPU64"\n",
- func, tot_pending, fo_tot_pending);
- LASSERTF(tot_dirty == fo_tot_dirty, "%s "LPU64" != "LPU64"\n",
- func, tot_dirty, fo_tot_dirty);
- LASSERTF(tot_pending <= tot_granted, "%s "LPU64" > "LPU64"\n",
- func, tot_pending, tot_granted);
- LASSERTF(tot_granted <= maxsize, "%s "LPU64" > "LPU64"\n",
- func, tot_granted, maxsize);
- LASSERTF(tot_dirty <= maxsize, "%s "LPU64" > "LPU64"\n",
- func, tot_dirty, maxsize);
+ if (tot_granted != fo_tot_granted)
+ CERROR("%s: tot_granted "LPU64" != fo_tot_granted "LPU64"\n",
+ func, tot_granted, fo_tot_granted);
+ if (tot_pending != fo_tot_pending)
+ CERROR("%s: tot_pending "LPU64" != fo_tot_pending "LPU64"\n",
+ func, tot_pending, fo_tot_pending);
+ if (tot_dirty != fo_tot_dirty)
+ CERROR("%s: tot_dirty "LPU64" != fo_tot_dirty "LPU64"\n",
+ func, tot_dirty, fo_tot_dirty);
+ if (tot_pending > tot_granted)
+ CERROR("%s: tot_pending "LPU64" > tot_granted "LPU64"\n",
+ func, tot_pending, tot_granted);
+ if (tot_granted > maxsize)
+ CERROR("%s: tot_granted "LPU64" > maxsize "LPU64"\n",
+ func, tot_granted, maxsize);
+ if (tot_dirty > maxsize)
+ CERROR("%s: tot_dirty "LPU64" > maxsize "LPU64"\n",
+ func, tot_dirty, maxsize);
}
-/* Remove this client from the grant accounting totals. This is done at
- * disconnect time and also at export destroy time in case there was a race
- * between removing the export and an incoming BRW updating the client grant.
+/* Remove this client from the grant accounting totals. We also remove
+ * the export from the obd device under the osfs and dev locks to ensure
+ * that the filter_grant_sanity_check() calculations are always valid.
* The client should do something similar when it invalidates its import. */
static void filter_grant_discard(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
spin_lock(&obd->obd_osfs_lock);
+ spin_lock(&exp->exp_obd->obd_dev_lock);
+ list_del_init(&exp->exp_obd_chain);
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+
CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
if (exp->exp_obd->obd_replayable)
filter_client_free(exp, exp->exp_flags);
- filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
+ filter_grant_discard(exp);
+ if (!(exp->exp_flags & OBD_OPT_FORCE))
+ filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
RETURN(0);
}
exp->exp_flags = flags;
spin_unlock_irqrestore(&exp->exp_lock, irqflags);
+ if (!(flags & OBD_OPT_FORCE))
+ filter_grant_sanity_check(obd, __FUNCTION__);
filter_grant_discard(exp);
/* Disconnect early so that clients can't keep using export */
rc = class_disconnect(exp, flags);
- /* Do this twice in case a BRW arrived between the first call and
- * the class_export_unlink() call (bug 2663) */
- filter_grant_discard(exp);
-
ldlm_cancel_locks_for_export(exp);
fsfilt_sync(obd, obd->u.filter.fo_sb);
if (res->lr_namespace->ns_lvbo &&
res->lr_namespace->ns_lvbo->lvbo_update) {
rc = res->lr_namespace->ns_lvbo->lvbo_update
- (res, NULL, 0);
+ (res, NULL, 0, 0);
}
ldlm_resource_putref(res);
}
GOTO(cleanup, rc);
cleanup_phase = 1;
-#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18))
+#ifdef HAVE_KIOBUF_DOVARY
iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */
#endif
rc = expand_kiobuf(iobuf, obj->ioo_bufcnt);
*
* m != NULL : called by the DLM itself after a glimpse callback
* m == NULL : called by the filter after a disk write
+ *
+ * If 'increase' is true, don't allow values to move backwards.
*/
static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
- int buf_idx)
+ int buf_idx, int increase)
{
int rc = 0;
struct ost_lvb *lvb = res->lr_lvb_data;
//GOTO(out, rc = -EPROTO);
GOTO(out, rc = 0);
}
- if (new->lvb_size > lvb->lvb_size) {
+ if (new->lvb_size > lvb->lvb_size || !increase) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_size, new->lvb_size);
lvb->lvb_size = new->lvb_size;
}
- if (new->lvb_mtime > lvb->lvb_mtime) {
+ if (new->lvb_mtime > lvb->lvb_mtime || !increase) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_mtime, new->lvb_mtime);
oa->o_valid = OBD_MD_FLID;
obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
- lvb->lvb_size = dentry->d_inode->i_size;
- lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
- CDEBUG(D_DLMTRACE, "res: "LPU64" disk lvb size: "LPU64", mtime: "
- LPU64"\n", res->lr_name.name[0], lvb->lvb_size, lvb->lvb_mtime);
+ if (dentry->d_inode->i_size > lvb->lvb_size || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size from disk: "
+ LPU64" -> "LPU64"\n", res->lr_name.name[0],
+ lvb->lvb_size, dentry->d_inode->i_size);
+ lvb->lvb_size = dentry->d_inode->i_size;
+ }
+ if (dentry->d_inode->i_mtime > lvb->lvb_mtime || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime from disk: "
+ LPU64" -> "LPU64"\n", res->lr_name.name[0],
+ lvb->lvb_mtime,(__u64)LTIME_S(dentry->d_inode->i_mtime));
+ lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
+ }
f_dput(dentry);
out:
policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
policy->l_extent.end |= ~PAGE_MASK;
+ if (lsm->lsm_oinfo->loi_kms_valid == 0)
+ goto no_match;
+
/* Next, search for already existing extent locks that will cover us */
rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, policy, mode,
lockh);
}
}
+ no_match:
rc = ldlm_cli_enqueue(exp, NULL, obd->obd_namespace, res_id, type,
policy, mode, flags, bl_cb, cp_cb, gl_cb, data,
&lvb, sizeof(lvb), lustre_swab_ost_lvb, lockh);
powerpc )
AC_MSG_RESULT($host_cpu)
- KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
- KCPPFLAGS='-D__KERNEL__'
+ KCFLAGS='-O2 -g -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
MOD_LINK=elf32ppclinux
;;
AC_SUBST(LINUX25)
AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib)
+# ---------- Red Hat 2.4.18 has iobuf->dovary --------------
+# But other kernels don't
+
+AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+AC_TRY_COMPILE([#define __KERNEL__
+ #include <linux/iobuf.h>],
+ [struct kiobuf iobuf;
+ iobuf.dovary = 1;],
+ [AC_MSG_RESULT([yes])
+ CPPFLAGS="$CPPFLAGS -DHAVE_KIOBUF_DOVARY"],
+ [AC_MSG_RESULT([no])])
+
# ---------- Red Hat 2.4.20 backports some 2.5 bits --------
# This needs to run after we've defined the KCPPFLAGS
struct list_head kprfd_list; /* stash in queues (routing target can use) */
ptl_nid_t kprfd_target_nid; /* final destination NID */
ptl_nid_t kprfd_gateway_nid; /* gateway NID */
- int kprfd_nob; /* # message bytes (including header) */
- int kprfd_niov; /* # message frags (including header) */
- struct iovec *kprfd_iov; /* message fragments */
- void *kprfd_router_arg; // originating NAL's router arg
+ ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
+ int kprfd_nob; /* # payload bytes */
+ int kprfd_niov; /* # payload frags */
+ ptl_kiov_t *kprfd_kiov; /* payload fragments */
+ void *kprfd_router_arg; /* originating NAL's router arg */
kpr_fwd_callback_t kprfd_callback; /* completion callback */
void *kprfd_callback_arg; /* completion callback arg */
- kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets
+ kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
} kpr_fwd_desc_t;
typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
}
static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid,
- int nob, int niov, struct iovec *iov,
+kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
+ int nob, int niov, ptl_kiov_t *kiov,
kpr_fwd_callback_t callback, void *callback_arg)
{
fwd->kprfd_target_nid = nid;
fwd->kprfd_gateway_nid = nid;
+ fwd->kprfd_hdr = hdr;
fwd->kprfd_nob = nob;
fwd->kprfd_niov = niov;
- fwd->kprfd_iov = iov;
+ fwd->kprfd_kiov = kiov;
fwd->kprfd_callback = callback;
fwd->kprfd_callback_arg = callback_arg;
}
#ifdef __linux__
# include <asm/types.h>
-# include <asm/timex.h>
+# if defined(__powerpc__) && !defined(__KERNEL__)
+# define __KERNEL__
+# include <asm/timex.h>
+# undef __KERNEL__
+# else
+# include <asm/timex.h>
+# endif
#else
# include <sys/types.h>
typedef u_int32_t __u32;
# include <linux/time.h>
#else
# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL)
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
#endif
#include <portals/errno.h>
struct timeval arrival_time;
volatile ptl_seq_t sequence;
-} ptl_event_t;
+} __attribute__((packed)) ptl_event_t;
#ifdef __CYGWIN__
#pragma pop
#endif
#include "linux/init.h"
#include "linux/sem.h"
#include "linux/vmalloc.h"
+#include "linux/sysctl.h"
#define DEBUG_SUBSYSTEM S_GMNAL
extern int gmnal_small_msg_size;
extern int num_rx_threads;
extern int num_stxds;
+extern int gm_port;
#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size
#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c)
#define GMNAL_MAGIC 0x1234abcd
+/*
+ * The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT gm_port
/*
gmnal_rxtwe_t *rxtwe_tail;
spinlock_t rxtwe_lock;
struct semaphore rxtwe_wait;
+ struct ctl_table_header *sysctl;
} gmnal_data_t;
/*
extern gmnal_data_t *global_nal_data;
/*
- * The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT 4
-
-/*
* for ioctl get pid
*/
#define GMNAL_IOC_GET_GNID 1
int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
void *gmnal_cb_malloc(nal_cb_t *, size_t);
void gmnal_cb_free(nal_cb_t *, void *, size_t);
a->cb_recv_pages = gmnal_cb_recv_pages; \
a->cb_read = gmnal_cb_read; \
a->cb_write = gmnal_cb_write; \
- a->cb_callback = NULL; \
+ a->cb_callback = gmnal_cb_callback; \
a->cb_malloc = gmnal_cb_malloc; \
a->cb_free = gmnal_cb_free; \
a->cb_map = NULL; \
void gmnal_stop_ctthread(gmnal_data_t *);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
char *gmnal_gm_error(gm_status_t);
char *gmnal_rxevent(gm_recv_event_t*);
int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
#include "gmnal.h"
+
+
gmnal_data_t *global_nal_data = NULL;
+#define GLOBAL_NID_STR_LEN 16
+char global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ * Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL 201
+#define GMNAL_SYSCTL_GLOBALNID 1
+
+static ctl_table gmnal_sysctl_table[] = {
+ {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+ global_nid_str, GLOBAL_NID_STR_LEN,
+ 0444, NULL, &proc_dostring},
+ { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+ {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+ { 0 }
+};
+
+
+
+
+
+
/*
* gmnal_api_forward
* This function takes a pack block of arguments from the NAL API
ptl_pid_t portals_pid = 0;
- CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d],
- ac_size[%d]\n", interface, ptl_size, ac_size);
+ CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+ "ac_size[%d]\n", interface, ptl_size, ac_size);
PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
}
- CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d],
- name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
+ CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+ "name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
"gmnal", GM_API_VERSION);
GMNAL_GM_LOCK(nal_data);
CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
break;
case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
- CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib
- and driver\n");
+ CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+ "and driver\n");
break;
case(GM_OUT_OF_MEMORY):
CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
break;
default:
- CDEBUG(D_ERROR, "gm_open Failure. Unknow error
- code [%d]\n", gm_status);
+ CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+ "code [%d]\n", gm_status);
break;
}
GMNAL_GM_LOCK(nal_data);
}
CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
nal_data->gm_global_nid = global_nid;
+ snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
/*
pid = gm_getpid();
return(NULL);
}
+ nal_data->sysctl = NULL;
+ nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
CDEBUG(D_INFO, "gmnal_init finished\n");
global_nal_data = nal->nal_data;
gm_close(nal_data->gm_port);
gm_finalize();
GMNAL_GM_UNLOCK(nal_data);
+ if (nal_data->sysctl)
+ unregister_sysctl_table (nal_data->sysctl);
PORTAL_FREE(nal, sizeof(nal_t));
PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
int status = PTL_OK;
- CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p],
- niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+ "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, niov, iov, mlen, rlen);
switch(srxd->type) {
int status = PTL_OK;
struct iovec *iovec = NULL, *iovec_dup = NULL;
int i = 0;
+ ptl_kiov_t *kiov_dup = kiov;;
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p],
- cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+ "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, kniov, kiov, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
status = gmnal_small_rx(nal_cb, private, cookie, kniov,
iovec_dup, mlen, rlen);
+ for (i=0; i<kniov; i++) {
+ kunmap(kiov_dup->kiov_page);
+ kiov_dup++;
+ }
PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
}
niov, iov, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid,
niov, iov, len);
int i = 0;
gmnal_data_t *nal_data;
struct iovec *iovec = NULL, *iovec_dup = NULL;
+ ptl_kiov_t *kiov_dup = kiov;
CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
nal_data = nal_cb->nal_data;
gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid,
pid, kniov, iovec, len);
}
+ for (i=0; i<kniov; i++) {
+ kunmap(kiov_dup->kiov_page);
+ kiov_dup++;
+ }
PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
return(PTL_OK);
}
return(PTL_OK);
}
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq,
+ ptl_event_t *ev)
+{
+
+ if (eq->event_callback != NULL) {
+ CDEBUG(D_INFO, "found callback\n");
+ eq->event_callback(ev);
+ }
+
+ return(PTL_OK);
+}
+
void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
{
void *ptr = NULL;
gmnal_msghdr = (gmnal_msghdr_t*)buffer;
portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
- CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d],
- type [%d], length [%d], buffer [%p]\n",
+ CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+ "type [%d], length [%d], buffer [%p]\n",
snode, sport, type, length, buffer);
- CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d],
- gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
+ CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+ "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
gmnal_msghdr->magic, gmnal_msghdr->type);
- CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"],
- dest_node ["LPD64"]\n", portals_hdr->src_nid,
+ CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+ "dest_node ["LPD64"]\n", portals_hdr->src_nid,
portals_hdr->dest_nid);
if (!private) {
CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
*/
CDEBUG(D_PORTALS, "calling lib_finalize\n");
lib_finalize(nal_cb, private, cookie, PTL_OK);
-
/*
* return buffer so it can be used again
*/
unsigned int local_nid;
gm_status_t gm_status = GM_SUCCESS;
- CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p]
- hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d]
- iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
+ CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+ "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+ "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
stxd->msg_size = tot_size;
- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p]
- gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d]
- stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
+ CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+ "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+ "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
stxd->msg_size, global_nid, local_nid, stxd);
GMNAL_GM_LOCK(nal_data);
/*
* do a resend on the dropped ones
*/
- CDEBUG(D_ERROR, "send stxd [%p] was dropped
- resending\n", context);
+ CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+ "resending\n", context);
GMNAL_GM_LOCK(nal_data);
gm_send_to_peer_with_callback(nal_data->gm_port,
stxd->buffer,
case(GM_YP_NO_MATCH):
default:
CDEBUG(D_ERROR, "Unknown send error\n");
+ gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+ stxd->gm_target_node, GMNAL_GM_PORT,
+ gmnal_resume_sending_callback, context);
+ return;
+
}
/*
}
gmnal_return_stxd(nal_data, stxd);
lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
return;
}
+/*
+ * After an error on the port
+ * call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+ gm_status_t status)
+{
+ gmnal_data_t *nal_data;
+ gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
+ CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+ gmnal_return_stxd(stxd->nal_data, stxd);
+ return;
+}
void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
context);
GMNAL_GM_LOCK(nal_data);
} else {
- CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is
- [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+ CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+ "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
}
int niov_dup;
- CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p]
- hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d],
- iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
+ CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+ "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+ "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
global_nid, pid, niov, iov, size);
if (nal_cb)
iov->iov_base, iov->iov_len);
if (gm_status != GM_SUCCESS) {
GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s]
- for memory [%p] len ["LPSZ"]\n",
+ CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+ "for memory [%p] len ["LPSZ"]\n",
gm_status, gmnal_gm_error(gm_status),
iov->iov_base, iov->iov_len);
GMNAL_GM_LOCK(nal_data);
gmnal_msghdr_t *msghdr = NULL;
gm_status_t gm_status;
- CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p],
- cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+ CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+ "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
nal_cb, private, cookie, nriov, riov, mlen, rlen);
if (!srxd) {
CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+ lib_finalize(nal_cb, private, cookie, PTL_FAIL);
return(PTL_FAIL);
}
riov->iov_base, riov->iov_len);
if (gm_status != GM_SUCCESS) {
GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s]
- for memory [%p] len ["LPSZ"]\n",
+ CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+ "for memory [%p] len ["LPSZ"]\n",
gm_status, gmnal_gm_error(gm_status),
riov->iov_base, riov->iov_len);
GMNAL_GM_LOCK(nal_data);
int ncalls = 0;
- CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p],
- nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+ CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+ "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
srxd->gm_source_node,
&source_node) != GM_SUCCESS) {
- CDEBUG(D_ERROR, "cannot resolve global_id [%u]
- to local node_id\n", srxd->gm_source_node);
+ CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+ "to local node_id\n", srxd->gm_source_node);
GMNAL_GM_UNLOCK(nal_data);
return(GMNAL_STATUS_FAIL);
}
stxd->msg_size= sizeof(gmnal_msghdr_t);
- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p]
- gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d]
- stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
+ CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+ "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+ "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
GMNAL_GM_LOCK(nal_data);
stxd->gm_priority = GM_LOW_PRIORITY;
*/
int num_rx_threads = -1;
int num_stxds = 5;
+int gm_port = 4;
ptl_handle_ni_t kgmnal_ni;
MODULE_PARM(gmnal_small_msg_size, "i");
MODULE_PARM(num_rx_threads, "i");
MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
MODULE_AUTHOR("Morgan Doyle");
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!txbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
return(GMNAL_STATUS_FAIL);
txd->next = nal_data->stxd;
nal_data->stxd = txd;
- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
}
for (i=0; i<=nrxt_stx; i++) {
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!txbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
return(GMNAL_STATUS_FAIL);
txd->next = nal_data->rxt_stxd;
nal_data->rxt_stxd = txd;
- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
}
/*
CDEBUG(D_TRACE, "gmnal_free_small tx\n");
while(txd) {
- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
_txd = txd;
txd = txd->next;
GMNAL_GM_LOCK(nal_data);
}
txd = nal_data->rxt_stxd;
while(txd) {
- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p],
- size [%d]\n", txd, txd->buffer, txd->buffer_size);
+ CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+ "size [%d]\n", txd, txd->buffer, txd->buffer_size);
_txd = txd;
txd = txd->next;
GMNAL_GM_LOCK(nal_data);
#if 0
PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
if (!rxbuffer) {
- CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+ "size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
return(GMNAL_STATUS_FAIL);
}
- CDEBUG(D_NET, "Calling gm_register_memory with port [%p]
- rxbuffer [%p], size [%d]\n", nal_data->gm_port,
+ CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+ "rxbuffer [%p], size [%d]\n", nal_data->gm_port,
rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_LOCK(nal_data);
gm_status = gm_register_memory(nal_data->gm_port, rxbuffer,
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (gm_status != GM_SUCCESS) {
- CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
- index [%d]\n", rxbuffer, i);
+ CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+ " index [%d]\n", rxbuffer, i);
switch(gm_status) {
case(GM_FAILURE):
CDEBUG(D_ERROR, "GM_FAILURE\n");
GMNAL_SMALL_MSG_SIZE(nal_data));
GMNAL_GM_UNLOCK(nal_data);
if (!rxbuffer) {
- CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
- size [%d]\n", i,
+ CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+ " size [%d]\n", i,
GMNAL_SMALL_MSG_SIZE(nal_data));
PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
return(GMNAL_STATUS_FAIL);
if (gm_hash_insert(nal_data->srxd_hash,
(void*)rxbuffer, (void*)rxd)) {
- CDEBUG(D_ERROR, "failed to create hash entry rxd[%p]
- for rxbuffer[%p]\n", rxd, rxbuffer);
+ CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+ "for rxbuffer[%p]\n", rxd, rxbuffer);
return(GMNAL_STATUS_FAIL);
}
rxd->next = nal_data->srxd;
nal_data->srxd = rxd;
- CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p],
- size [%d]\n", rxd, rxd->buffer, rxd->size);
+ CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+ "size [%d]\n", rxd, rxd->buffer, rxd->size);
}
return(GMNAL_STATUS_OK);
char *
gmnal_gm_error(gm_status_t status)
{
+ return(gm_strerror(status));
+
switch(status) {
case(GM_SUCCESS):
return("SUCCESS");
}
spin_lock(&nal_data->rxtwe_lock);
if (nal_data->rxtwe_head) {
- CDEBUG(D_WARNING, "Got a work entry\n");
+ CDEBUG(D_INFO, "Got a work entry\n");
we = nal_data->rxtwe_head;
nal_data->rxtwe_head = we->next;
if (!nal_data->rxtwe_head)
spin_unlock(&nal_data->rxtwe_lock);
} while (!we);
- CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+ CDEBUG(D_INFO, "Returning we[%p]\n", we);
return(we);
}
for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
- /* If krx_pages[0] got allocated, it got mapped.
+ /* If krx_kiov[0].kiov_page got allocated, it got mapped.
* NB subsequent pages get merged */
- if (krx->krx_pages[0] != NULL)
+ if (krx->krx_kiov[0].kiov_page != NULL)
ep_dvma_unload(kqswnal_data.kqn_ep,
kqswnal_data.kqn_ep_rx_nmh,
&krx->krx_elanbuffer);
kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_pages[j] != NULL)
- __free_page (krx->krx_pages[j]);
+ if (krx->krx_kiov[j].kiov_page != NULL)
+ __free_page (krx->krx_kiov[j].kiov_page);
}
PORTAL_FREE(kqswnal_data.kqn_rxds,
LASSERT (krx->krx_npages > 0);
for (j = 0; j < krx->krx_npages; j++)
{
- krx->krx_pages[j] = alloc_page(GFP_KERNEL);
- if (krx->krx_pages[j] == NULL)
- {
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ if (page == NULL) {
kqswnal_finalise ();
return (-ENOMEM);
}
- LASSERT(page_address(krx->krx_pages[j]) != NULL);
+ krx->krx_kiov[j].kiov_page = page;
+ LASSERT(page_address(page) != NULL);
#if MULTIRAIL_EKC
ep_dvma_load(kqswnal_data.kqn_ep, NULL,
- page_address(krx->krx_pages[j]),
+ page_address(page),
PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
elan_page_idx, &all_rails, &elanbuffer);
#else
elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
kqswnal_data.kqn_eprxdmahandle,
- page_address(krx->krx_pages[j]),
+ page_address(page),
PAGE_SIZE, elan_page_idx,
&elanbuffer);
if (j == 0)
int krx_rpc_reply_sent; /* rpc reply sent */
atomic_t krx_refcount; /* how to tell when rpc is done */
kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
- struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
- struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
+ ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
typedef struct
int offset, int nob)
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- char *buffer = (char *)page_address(krx->krx_pages[0]);
+ char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
int rc;
#if MULTIRAIL_EKC
}
memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
#endif
-
+
if (kqswnal_data.kqn_optimized_gets &&
type == PTL_MSG_GET && /* doing a GET */
nid == targetnid) { /* not forwarding */
{
int rc;
kqswnal_tx_t *ktx;
- struct iovec *iov = fwd->kprfd_iov;
+ ptl_kiov_t *kiov = fwd->kprfd_kiov;
int niov = fwd->kprfd_niov;
int nob = fwd->kprfd_nob;
ptl_nid_t nid = fwd->kprfd_gateway_nid;
LBUG ();
#endif
/* The router wants this NAL to forward a packet */
- CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n",
+ CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
fwd, nid, niov, nob);
- LASSERT (niov > 0);
-
ktx = kqswnal_get_idle_tx (fwd, 0);
if (ktx == NULL) /* can't get txd right now */
return; /* fwd will be scheduled when tx desc freed */
goto failed;
}
- if (nob > KQSW_NRXMSGBYTES_LARGE) {
- CERROR ("Can't forward [%p] to "LPX64
- ": size %d bigger than max packet size %ld\n",
- fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE);
- rc = -EMSGSIZE;
- goto failed;
- }
+ /* copy hdr into pre-mapped buffer */
+ memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
+ ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
- ktx->ktx_port = (nob <= (KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) ?
+ ktx->ktx_port = (nob <= KQSW_SMALLPAYLOAD) ?
EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
ktx->ktx_nid = nid;
ktx->ktx_state = KTX_FORWARDING;
ktx->ktx_args[0] = fwd;
+ ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- if ((kqswnal_data.kqn_copy_small_fwd || niov > 1) &&
- nob <= KQSW_TX_BUFFER_SIZE)
+ if (nob <= KQSW_TX_MAXCONTIG)
{
- /* send from ktx's pre-mapped contiguous buffer? */
- lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, 0, nob);
+ /* send payload from ktx's pre-mapped contiguous buffer */
#if MULTIRAIL_EKC
ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
- 0, nob);
+ 0, KQSW_HDR_SIZE + nob);
#else
ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
- ktx->ktx_frags[0].Len = nob;
+ ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
#endif
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
+ if (nob > 0)
+ lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
+ niov, kiov, 0, nob);
}
else
{
- /* zero copy */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
- rc = kqswnal_map_tx_iov (ktx, 0, nob, niov, iov);
+ /* zero copy payload */
+#if MULTIRAIL_EKC
+ ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
+ 0, KQSW_HDR_SIZE);
+#else
+ ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
+ ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
+#endif
+ rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
if (rc != 0)
goto failed;
-
- ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base;
}
rc = kqswnal_launch (ktx);
if (error != 0)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
void
kqswnal_rx (kqswnal_rx_t *krx)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
+ int payload_nob;
int nob;
int niov;
return;
}
- /* NB forwarding may destroy iov; rebuild every time */
- for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++)
- {
- LASSERT (niov < krx->krx_npages);
- krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]);
- krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob);
+ nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
+ niov = 0;
+ if (nob > 0) {
+ krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
+ krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
+ niov = 1;
+ nob -= PAGE_SIZE - KQSW_HDR_SIZE;
+
+ while (nob > 0) {
+ LASSERT (niov < krx->krx_npages);
+
+ krx->krx_kiov[niov].kiov_offset = 0;
+ krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
+ niov++;
+ nob -= PAGE_SIZE;
+ }
}
- kpr_fwd_init (&krx->krx_fwd, dest_nid,
- krx->krx_nob, niov, krx->krx_iov,
+ kpr_fwd_init (&krx->krx_fwd, dest_nid,
+ hdr, payload_nob, niov, krx->krx_kiov,
kqswnal_fwd_callback, krx);
kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
void
kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
{
- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
", dpid %d, spid %d, type %d\n",
size_t rlen)
{
kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
+ char *buffer = page_address(krx->krx_kiov[0].kiov_page);
int page;
char *page_ptr;
int page_nob;
#if KQSW_CHECKSUM
kqsw_csum_t senders_csum;
kqsw_csum_t payload_csum = 0;
- kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]),
- sizeof(ptl_hdr_t));
+ kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
size_t csum_len = mlen;
int csum_frags = 0;
int csum_nob = 0;
atomic_inc (&csum_counter);
- memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
- sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
+ memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
if (senders_csum != hdr_csum)
kqswnal_csum_error (krx, 1);
#endif
if (mlen != 0) {
page = 0;
- page_ptr = ((char *) page_address(krx->krx_pages[0])) +
- KQSW_HDR_SIZE;
+ page_ptr = buffer + KQSW_HDR_SIZE;
page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
LASSERT (niov > 0);
{
page++;
LASSERT (page < krx->krx_npages);
- page_ptr = page_address(krx->krx_pages[page]);
+ page_ptr = page_address(krx->krx_kiov[page].kiov_page);
page_nob = PAGE_SIZE;
}
}
#if KQSW_CHECKSUM
- memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) +
- sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t));
+ memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t),
+ sizeof(kqsw_csum_t));
if (csum_len != rlen)
CERROR("Unable to checksum data in user's buffer\n");
void
ksocknal_free_fmbs (ksock_fmb_pool_t *p)
{
+ int npages = p->fmp_buff_pages;
ksock_fmb_t *fmb;
int i;
fmb = list_entry(p->fmp_idle_fmbs.next,
ksock_fmb_t, fmb_list);
- for (i = 0; i < fmb->fmb_npages; i++)
- if (fmb->fmb_pages[i] != NULL)
- __free_page(fmb->fmb_pages[i]);
-
+ for (i = 0; i < npages; i++)
+ if (fmb->fmb_kiov[i].kiov_page != NULL)
+ __free_page(fmb->fmb_kiov[i].kiov_page);
+
list_del(&fmb->fmb_list);
- PORTAL_FREE(fmb, sizeof(*fmb));
+ PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
}
}
spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
+ ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
+ ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
SOCKNAL_LARGE_FWD_NMSGS); i++) {
- ksock_fmb_t *fmb;
+ ksock_fmb_t *fmb;
+ ksock_fmb_pool_t *pool;
+
+
+ if (i < SOCKNAL_SMALL_FWD_NMSGS)
+ pool = &ksocknal_data.ksnd_small_fmp;
+ else
+ pool = &ksocknal_data.ksnd_large_fmp;
- PORTAL_ALLOC(fmb, sizeof(*fmb));
+ PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
+ fmb_kiov[pool->fmp_buff_pages]));
if (fmb == NULL) {
ksocknal_module_fini();
return (-ENOMEM);
}
- if (i < SOCKNAL_SMALL_FWD_NMSGS) {
- fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
- fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp;
- } else {
- fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
- fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
- }
-
- for (j = 0; j < fmb->fmb_npages; j++) {
- fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
+ fmb->fmb_pool = pool;
+
+ for (j = 0; j < pool->fmp_buff_pages; j++) {
+ fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
- if (fmb->fmb_pages[j] == NULL) {
+ if (fmb->fmb_kiov[j].kiov_page == NULL) {
ksocknal_module_fini ();
return (-ENOMEM);
}
- LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
+ LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
}
- list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
+ list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
}
}
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/irq.h>
#include <linux/init.h>
#include <linux/fs.h>
#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
/* # pages in a large message fwd buffer */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
struct list_head fmp_idle_fmbs; /* free buffers */
struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
int fmp_nactive_fmbs; /* # buffers in use */
+ int fmp_buff_pages; /* # pages per buffer */
} ksock_fmb_pool_t;
#define SOCKNAL_INIT_ALL 3
/* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded
- * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
- * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1
- * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
- * fragments.
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more ptl_kiov_t fragments.
*
* On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, if the message
- * requires forwarding or will be received into mapped memory, up to
- * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
- * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
- */
+ * receive (the header). Once the header has been received, the payload is
+ * received into either struct iovec or ptl_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
struct ksock_conn; /* forward ref */
struct ksock_peer; /* forward ref */
#endif
} ksock_tx_t;
+typedef struct /* forwarded packet */
+{
+ ksock_tx_t ftx_tx; /* send info */
+ struct iovec ftx_iov; /* hdr iovec */
+} ksock_ftx_t;
+
#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
/* network zero copy callback descriptor embedded in ksock_tx_t */
{ /* (socknal->router) */
struct list_head fmb_list; /* queue idle */
kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
- int fmb_npages; /* # pages allocated */
ksock_fmb_pool_t *fmb_pool; /* owning pool */
struct ksock_peer *fmb_peer; /* peer received from */
- struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
- struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+ ptl_hdr_t fmb_hdr; /* message header */
+ ptl_kiov_t fmb_kiov[0]; /* payload frags */
} ksock_fmb_t;
/* space for the rx frag descriptors; we either read a single contiguous
- * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+ * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
typedef union {
struct iovec iov[PTL_MD_MAX_IOV];
ptl_kiov_t kiov[PTL_MD_MAX_IOV];
PORTAL_FREE(ltx, ltx->ltx_desc_size);
}
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
struct page *
ksocknal_kvaddr_to_page (unsigned long vaddr)
{
int more = (tx->tx_niov > 1) ||
(tx->tx_nkiov > 0) ||
(!list_empty (&conn->ksnc_tx_queue));
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
int offset = vaddr & (PAGE_SIZE - 1);
int zcsize = MIN (fragsize, PAGE_SIZE - offset);
struct page *page;
LASSERT (fragsize <= tx->tx_resid);
LASSERT (tx->tx_niov > 0);
-#if SOCKNAL_ZC
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
(sock->sk->route_caps & NETIF_F_SG) &&
(sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
{
ptl_nid_t nid = fwd->kprfd_gateway_nid;
- ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
+ ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
int rc;
CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
if (nid == ksocknal_lib.ni.nid)
nid = fwd->kprfd_target_nid;
- tx->tx_isfwd = 1; /* This is a forwarding packet */
- tx->tx_nob = fwd->kprfd_nob;
- tx->tx_niov = fwd->kprfd_niov;
- tx->tx_iov = fwd->kprfd_iov;
- tx->tx_nkiov = 0;
- tx->tx_kiov = NULL;
+ /* setup iov for hdr */
+ ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
+ ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
+
+ ftx->ftx_tx.tx_isfwd = 1; /* This is a forwarding packet */
+ ftx->ftx_tx.tx_nob = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
+ ftx->ftx_tx.tx_niov = 1;
+ ftx->ftx_tx.tx_iov = &ftx->ftx_iov;
+ ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
+ ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov;
- rc = ksocknal_launch_packet (tx, nid);
+ rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
if (rc != 0)
kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
}
{
ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
ksock_fmb_pool_t *fmp = fmb->fmb_pool;
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
+ ptl_hdr_t *hdr = (ptl_hdr_t *)page_address(fmb->fmb_kiov[0].kiov_page);
ksock_conn_t *conn = NULL;
ksock_sched_t *sched;
unsigned long flags;
ksocknal_get_idle_fmb (ksock_conn_t *conn)
{
int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
unsigned long flags;
ksock_fmb_pool_t *pool;
ksock_fmb_t *fmb;
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
- if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
+ if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
pool = &ksocknal_data.ksnd_small_fmp;
else
pool = &ksocknal_data.ksnd_large_fmp;
int
ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
{
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
+ int payload_nob = conn->ksnc_rx_nob_left;
ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
- int niov; /* at least the header */
- int nob;
+ int niov = 0;
+ int nob = payload_nob;
LASSERT (conn->ksnc_rx_scheduled);
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
LASSERT (payload_nob >= 0);
- LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
+ LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
- /* Got a forwarding buffer; copy the header we just read into the
- * forwarding buffer. If there's payload, start reading reading it
- * into the buffer, otherwise the forwarding buffer can be kicked
- * off immediately.
- *
- * NB fmb->fmb_iov spans the WHOLE packet.
- * conn->ksnc_rx_iov spans just the payload.
- */
- fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
- /* copy header */
- memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t));
+ LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
/* Take a ref on the conn's peer to prevent module unload before
- * forwarding completes. NB we ref peer and not conn since because
- * all refs on conn after it has been closed must remove themselves
- * in finite time */
+ * forwarding completes. */
fmb->fmb_peer = conn->ksnc_peer;
atomic_inc (&conn->ksnc_peer->ksnp_refcount);
- if (payload_nob == 0) { /* got complete packet already */
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n",
- conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
- dest_nid, packet_nob);
+ /* Copy the header we just read into the forwarding buffer. If
+ * there's payload, start reading reading it into the buffer,
+ * otherwise the forwarding buffer can be kicked off
+ * immediately. */
+ fmb->fmb_hdr = conn->ksnc_hdr;
- fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
+ while (nob > 0) {
+ LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
+ LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
+ fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
+ nob -= PAGE_SIZE;
+ niov++;
+ }
+
+ kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
+ payload_nob, niov, fmb->fmb_kiov,
+ ksocknal_fmb_callback, fmb);
- kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
- packet_nob, 1, fmb->fmb_iov,
- ksocknal_fmb_callback, fmb);
+ if (payload_nob == 0) { /* got complete packet already */
+ CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
+ conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid);
- /* forward it now */
kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
ksocknal_new_packet (conn, 0); /* on to next packet */
return (1);
}
- niov = 1;
- if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */
- fmb->fmb_iov[0].iov_len = packet_nob;
- } else {
- fmb->fmb_iov[0].iov_len = PAGE_SIZE;
- nob = packet_nob - PAGE_SIZE;
-
- do {
- LASSERT (niov < fmb->fmb_npages);
- fmb->fmb_iov[niov].iov_base =
- page_address (fmb->fmb_pages[niov]);
- fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
- nob -= PAGE_SIZE;
- niov++;
- } while (nob > 0);
- }
-
- kpr_fwd_init (&fmb->fmb_fwd, dest_nid,
- packet_nob, niov, fmb->fmb_iov,
- ksocknal_fmb_callback, fmb);
-
conn->ksnc_cookie = fmb; /* stash fmb for later */
conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
- /* payload is desc's iov-ed buffer, but skipping the hdr */
- LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) /
- sizeof (struct iovec));
-
- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
- conn->ksnc_rx_iov[0].iov_base =
- (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) +
- sizeof (ptl_hdr_t));
- conn->ksnc_rx_iov[0].iov_len =
- fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
- if (niov > 1)
- memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1],
- (niov - 1) * sizeof (struct iovec));
-
- conn->ksnc_rx_niov = niov;
+ /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
+ * buffer */
+ LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_nkiov = niov;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
+
CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
return (0);
CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
target_nid, src_ne->kpne_interface.kprni_nalid);
- LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
- LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+ LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
atomic_inc (&kpr_queue_depth);
atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
kpr_fwd_packets++; /* (loose) stats accounting */
- kpr_fwd_bytes += nob;
+ kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
if (src_ne->kpne_shutdown) /* caller is shutting down */
goto out;
gmnalnid_SOURCES = gmnalnid.c
ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence
+ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
ptlctl_DEPENDENCIES = libptlctl.a
debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl -lncurses # -lefence
+debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
debugctl_DEPENDENCIES = libptlctl.a
routerstat_SOURCES = routerstat.c
free(pcfg.pcfg_pbuf1);
close(pfd);
printf("%u\n", nid);
- exit(nid);
+ exit(0);
}
# lustre.spec
-%define version v1_2_0
+%define version 1.2.0.3
%define kversion @LINUXRELEASE@
%define linuxdir @LINUX@
%define enable_doc @ENABLE_DOC@
sh conf-sanity.sh
fi
-if [ "$REPLAY_OST_SINGLE" != "no" ]; then
- sh replay-ost-single.sh
-fi
-
if [ "$RECOVERY_SMALL" != "no" ]; then
sh recovery-small.sh
fi
+
+if [ "$REPLAY_OST_SINGLE" != "no" ]; then
+ sh replay-ost-single.sh
+fi
{
int rc, i;
- if (argc < 2) {
- printf("Usage %s filename\n", argv[0]);
+ if (argc < 2) {
+ printf("Usage %s filename {filename ...}\n", argv[0]);
return 1;
}
for (i = 1; i < argc; i++) {
- rc = unlink(argv[i]);
- if (rc)
- printf("unlink(%s) error: %s\n", argv[i],
- strerror(errno));
+ rc = unlink(argv[i]);
+ if (rc) {
+ printf("unlink(%s): %s ", argv[i], strerror(errno));
+ rc = access(argv[i], F_OK);
+ if (rc && errno == ENOENT)
+ printf("(unlinked anyways)\n");
+ else if (rc == 0)
+ printf("(still exists)\n");
+ else
+ printf("(%s looking up)\n", strerror(errno));
+ }
}
return rc;
-}
+}
}
run_test 42 "recoery after ost failure"
+# b=2530
+# directory orphans can't be unlinked from PENDING directory
+test_43() {
+ replay_barrier mds
+
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+ facet_failover mds
+ df $MOUNT || return 1
+ sleep 10
+ do_facet ost "sysctl -w lustre.fail_loc=0"
+
+ return 0
+}
+run_test 43 "mds osc import failure during recovery; don't LBUG"
+
equals_msg test complete, cleaning up
$CLEANUP
run_test 25a "create file in symlinked directory ==============="
test_25b() {
- if [ ! -d $DIR/d25 ]; then
- run_one 25a
- fi
+ [ ! -d $DIR/d25 ] && test_25a
$CHECKSTAT -t file $DIR/s25/foo || error
}
run_test 25b "lookup file in symlinked directory ==============="
run_test 26d "create multiple component recursive symlink ======"
test_26e() {
- if [ ! -h $DIR/d26-3 ]; then
- run_one 26d
- fi
+ [ ! -h $DIR/d26-3 ] && test_26d
rm $DIR/d26-3
}
run_test 26e "unlink multiple component recursive symlink ======"
TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
test_34a() {
- rm -f $DIR/test_34_file
- $MCREATE $DIR/test_34_file || error
- $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
- $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
- $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ rm -f $DIR/f34
+ $MCREATE $DIR/f34 || error
+ $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+ $TRUNCATE $DIR/f34 $TEST_34_SIZE || error
+ $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
}
run_test 34a "truncate file that has not been opened ==========="
test_34b() {
- [ ! -f $DIR/test_34_file ] && run_one 34a
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
- $OPENFILE -f O_RDONLY $DIR/test_34_file
- $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" || error
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ [ ! -f $DIR/f34 ] && test_34a
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+ $OPENFILE -f O_RDONLY $DIR/f34
+ $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
}
run_test 34b "O_RDONLY opening file doesn't create objects ====="
test_34c() {
- [ ! -f $DIR/test_34_file ] && run_one 34a
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
- $OPENFILE -f O_RDWR $DIR/test_34_file
- $LFIND $DIR/test_34_file 2>&1 | grep -q "no stripe info" && error
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ [ ! -f $DIR/f34 ] && test_34a
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+ $OPENFILE -f O_RDWR $DIR/f34
+ $LFIND $DIR/f34 2>&1 | grep -q "no stripe info" && error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
}
run_test 34c "O_RDWR opening file-with-size works =============="
test_34d() {
- dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
- $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
- rm $DIR/test_34_file
+ dd if=/dev/zero of=$DIR/f34 conv=notrunc bs=4k count=1 || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error
+ rm $DIR/f34
}
run_test 34d "write to sparse file ============================="
test_34e() {
- rm -f $DIR/test_34_file
- $MCREATE $DIR/test_34_file || error
- $TRUNCATE $DIR/test_34_file 1000 || error
- $CHECKSTAT -s 1000 $DIR/test_34_file || error
- $OPENFILE -f O_RDWR $DIR/test_34_file
- $CHECKSTAT -s 1000 $DIR/test_34_file || error
+ rm -f $DIR/f34e
+ $MCREATE $DIR/f34e || error
+ $TRUNCATE $DIR/f34e 1000 || error
+ $CHECKSTAT -s 1000 $DIR/f34e || error
+ $OPENFILE -f O_RDWR $DIR/f34e
+ $CHECKSTAT -s 1000 $DIR/f34e || error
}
run_test 34e "create objects, some with size and some without =="
test_35a() {
- cp /bin/sh $DIR/test_35a_file
- chmod 444 $DIR/test_35a_file
- chown $RUNAS_ID $DIR/test_35a_file
- $RUNAS $DIR/test_35a_file && error || true
- rm $DIR/test_35a_file
+ cp /bin/sh $DIR/f35a
+ chmod 444 $DIR/f35a
+ chown $RUNAS_ID $DIR/f35a
+ $RUNAS $DIR/f35a && error || true
+ rm $DIR/f35a
}
run_test 35a "exec file with mode 444 (should return and not leak) ====="
-
test_36a() {
- rm -f $DIR/test_36_file
- utime $DIR/test_36_file || error
+ rm -f $DIR/f36
+ utime $DIR/f36 || error
}
run_test 36a "MDS utime check (mknod, utime) ==================="
test_36b() {
- echo "" > $DIR/test_36_file
- utime $DIR/test_36_file || error
+ echo "" > $DIR/f36
+ utime $DIR/f36 || error
}
run_test 36b "OST utime check (open, utime) ===================="
test_36c() {
- rm -f $DIR/d36/test_36_file
+ rm -f $DIR/d36/f36
mkdir $DIR/d36
chown $RUNAS_ID $DIR/d36
- $RUNAS utime $DIR/d36/test_36_file || error
+ $RUNAS utime $DIR/d36/f36 || error
}
run_test 36c "non-root MDS utime check (mknod, utime) =========="
test_36d() {
- [ ! -d $DIR/d36 ] && run_one 36c
- echo "" > $DIR/d36/test_36_file
- $RUNAS utime $DIR/d36/test_36_file || error
+ [ ! -d $DIR/d36 ] && test_36c
+ echo "" > $DIR/d36/f36
+ $RUNAS utime $DIR/d36/f36 || error
}
run_test 36d "non-root OST utime check (open, utime) ==========="
test_36e() {
[ $RUNAS_ID -eq $UID ] && return
[ ! -d $DIR/d36 ] && mkdir $DIR/d36
- touch $DIR/d36/test_36_file2
- $RUNAS utime $DIR/d36/test_36_file2 && error || true
+ touch $DIR/d36/f36e
+ $RUNAS utime $DIR/d36/f36e && error "utime worked, want failure" || true
}
run_test 36e "utime on non-owned file (should return error) ===="
trap start_kupdated EXIT
}
+# ensure that all stripes have some grant before we test client-side cache
+for i in `seq -f $DIR/f42-%g 1 $STRIPECOUNT`; do
+ dd if=/dev/zero of=$i bs=4k count=1
+ rm $i
+done
+
# Tests 42* verify that our behaviour is correct WRT caching, file closure,
# file truncation, and file removal.
test_42a() {
cancel_lru_locks OSC
stop_kupdated
- sync # just to be safe
- BEFOREWRITES=`count_ost_writes`
- dd if=/dev/zero of=$DIR/f42a bs=1024 count=100
- AFTERWRITES=`count_ost_writes`
- [ $BEFOREWRITES -eq $AFTERWRITES ] || \
+ sync; sleep 1; sync # just to be safe
+ BEFOREWRITES=`count_ost_writes`
+ grep [0-9] /proc/fs/lustre/osc/OSC*MNT*/cur_grant_bytes
+ dd if=/dev/zero of=$DIR/f42a bs=1024 count=100
+ AFTERWRITES=`count_ost_writes`
+ [ $BEFOREWRITES -eq $AFTERWRITES ] || \
error "$BEFOREWRITES < $AFTERWRITES"
start_kupdated
}
run_test 99a "cvs init ========================================="
test_99b() {
- [ ! -d $DIR/d99cvsroot ] && run_one 99a
+ [ ! -d $DIR/d99cvsroot ] && test_99a
cd /etc/init.d
$RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
}
run_test 99b "cvs import ======================================="
test_99c() {
- [ ! -d $DIR/d99cvsroot ] && run_one 99b
+ [ ! -d $DIR/d99cvsroot ] && test_99b
cd $DIR
mkdir -p $DIR/d99reposname
chown $RUNAS_ID $DIR/d99reposname
run_test 99c "cvs checkout ====================================="
test_99d() {
- [ ! -d $DIR/d99cvsroot ] && run_one 99c
+ [ ! -d $DIR/d99cvsroot ] && test_99c
cd $DIR/d99reposname
$RUNAS touch foo99
$RUNAS cvs add -m 'addmsg' foo99
run_test 99d "cvs add =========================================="
test_99e() {
- [ ! -d $DIR/d99cvsroot ] && run_one 99c
+ [ ! -d $DIR/d99cvsroot ] && test_99c
cd $DIR/d99reposname
$RUNAS cvs update
}
run_test 99e "cvs update ======================================="
test_99f() {
- [ ! -d $DIR/d99cvsroot ] && run_one 99d
+ [ ! -d $DIR/d99cvsroot ] && test_99d
cd $DIR/d99reposname
$RUNAS cvs commit -m 'nomsg' foo99
}
int debug = 0;
int verbose = 0;
int nomtab = 0;
+static char *progname = NULL;
static void
update_mtab_entry(char *spec, char *node, char *type, char *opts,
if (!nomtab) {
fp = setmntent(MOUNTED, "a+");
if (fp == NULL) {
- fprintf(stderr, "setmntent(%s): %s:", MOUNTED,
- strerror (errno));
+ fprintf(stderr, "%s: setmntent(%s): %s:",
+ progname, MOUNTED, strerror (errno));
} else {
if ((addmntent (fp, &mnt)) == 1) {
- fprintf(stderr, "addmntent: %s:",
- strerror (errno));
+ fprintf(stderr, "%s: addmntent: %s:",
+ progname, strerror (errno));
}
endmntent(fp);
}
lmd->lmd_nal = ptl_name2nal(opteq+1);
} else if(!strcmp(opt, "local_nid")) {
if (ptl_parse_nid(&nid, opteq+1) != 0) {
- fprintf (stderr, "mount: "
+ fprintf (stderr, "%s: "
"can't parse NID %s\n",
+ progname,
opteq+1);
return (-1);
}
lmd->lmd_local_nid = nid;
} else if(!strcmp(opt, "server_nid")) {
if (ptl_parse_nid(&nid, opteq+1) != 0) {
- fprintf (stderr, "mount: "
+ fprintf (stderr, "%s: "
"can't parse NID %s\n",
- opteq+1);
+ progname, opteq+1);
return (-1);
}
lmd->lmd_server_nid = nid;
if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL) {
rc = gethostname(buf, sizeof(buf) - 1);
if (rc) {
- fprintf (stderr, "mount: can't get local buf:"
- "%d\n", rc);
+ fprintf (stderr, "%s: can't get local buf: %d\n",
+ progname, rc);
return rc;
}
} else if (lmd->lmd_nal == QSWNAL) {
} while (rc != 0 && pfiles[++i] != NULL);
if (rc != 0) {
- fprintf(stderr,
- "mount: can't read Elan ID from /proc\n");
+ fprintf(stderr, "%s: can't read Elan ID from /proc\n",
+ progname);
+
return -1;
}
}
if (ptl_parse_nid (&nid, buf) != 0) {
- fprintf (stderr, "mount: can't parse NID %s\n", buf);
+ fprintf (stderr, "%s: can't parse NID %s\n", progname, buf);
return (-1);
}
if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL) {
if (lmd->lmd_server_nid == PTL_NID_ANY) {
if (ptl_parse_nid (&nid, hostname) != 0) {
- fprintf (stderr, "mount: can't parse NID %s\n",
- hostname);
+ fprintf (stderr, "%s: can't parse NID %s\n",
+ progname, hostname);
return (-1);
}
lmd->lmd_server_nid = nid;
}
if (ptl_parse_ipaddr(&lmd->lmd_server_ipaddr, hostname) != 0) {
- fprintf (stderr, "mount: can't parse host %s\n",
- hostname);
+ fprintf (stderr, "%s: can't parse host %s\n",
+ progname, hostname);
return (-1);
}
} else if (lmd->lmd_nal == QSWNAL) {
char buf[64];
rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf);
if (rc != 1) {
- fprintf (stderr, "mount: can't get elan id from host %s\n",
- hostname);
+ fprintf (stderr, "%s: can't get elan id from host %s\n",
+ progname, hostname);
return -1;
}
if (ptl_parse_nid (&nid, buf) != 0) {
- fprintf (stderr, "mount: can't parse NID %s\n",
- hostname);
+ fprintf (stderr, "%s: can't parse NID %s\n",
+ progname, hostname);
return (-1);
}
lmd->lmd_server_nid = nid;
return -EINVAL;
if (strlen(source) > sizeof(target) + 1) {
- fprintf(stderr, "mount: "
- "exessively long host:/mds/profile argument\n");
+ fprintf(stderr, "%s: "
+ "exessively long host:/mds/profile argument\n",
+ progname);
return -EINVAL;
}
strcpy(target, source);
*s = '\0';
profile = s + 1;
} else {
- fprintf(stderr, "mount: "
+ fprintf(stderr, "%s: "
"directory to mount not in "
- "host:/mds/profile format\n");
+ "host:/mds/profile format\n",
+ progname);
return(-1);
}
} else {
- fprintf(stderr, "mount: "
- "directory to mount not in host:/mds/profile format\n");
+ fprintf(stderr, "%s: "
+ "directory to mount not in host:/mds/profile format\n",
+ progname);
return(-1);
}
if (verbose)
if (rc)
return rc;
if (strlen(mds) > sizeof(lmd->lmd_mds) + 1) {
- fprintf(stderr, "mount: mds name too long\n");
+ fprintf(stderr, "%s: mds name too long\n", progname);
return(-1);
}
strcpy(lmd->lmd_mds, mds);
if (strlen(profile) > sizeof(lmd->lmd_profile) + 1) {
- fprintf(stderr, "mount: profile name too long\n");
+ fprintf(stderr, "%s: profile name too long\n", progname);
return(-1);
}
strcpy(lmd->lmd_profile, profile);
char * target = argv[2];
char * options = "";
int opt;
- int i;
+ int i = 3;
struct lustre_mount_data lmd;
int rc;
+ progname = strrchr(argv[0], '/');
+ progname = progname ? progname + 1 : argv[0];
+
while ((opt = getopt(argc, argv, "vno:")) != EOF) {
switch (opt) {
case 'v':
verbose = 1;
printf("verbose: %d\n", verbose);
+ i++;
break;
case 'n':
nomtab = 1;
printf("nomtab: %d\n", nomtab);
+ i++;
break;
case 'o':
options = optarg;
+ i++;
break;
default:
+ i++;
break;
}
}
+ if (argc < i) {
+ fprintf(stderr,
+ "%s: too few arguments\n"
+ "Usage: %s <source> <target> [-v] [-n] [-o ...]\n",
+ progname, progname);
+ exit(1);
+ }
+
if (verbose)
for (i = 0; i < argc; i++) {
printf("arg[%d] = %s\n", i, argv[i]);
}
if (debug) {
- printf("mount: debug mode, not mounting\n");
+ printf("%s: debug mode, not mounting\n", progname);
exit(0);
}