*
*/
-#define EXPORT_SYMTAB
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-
#define DEBUG_SUBSYSTEM S_RPC
+#include <linux/module.h>
#include <linux/obd_support.h>
-#include <linux/obd_class.h>
#include <linux/lustre_net.h>
-ptl_handle_eq_t sent_pkt_eq, rcvd_rep_eq, bulk_source_eq, bulk_sink_eq;
+ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq, bulk_source_eq,
+ bulk_sink_eq;
static const ptl_handle_ni_t *socknal_nip = NULL, *qswnal_nip = NULL;
/*
* Free the packet when it has gone out
*/
-static int sent_packet_callback(ptl_event_t *ev, void *data)
+static int request_out_callback(ptl_event_t *ev)
{
ENTRY;
+ LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
+
+ if (ev->type != PTL_EVENT_SENT) {
+ // XXX make sure we understand all events, including ACK's
+ CERROR("Unknown event %d\n", ev->type);
+ LBUG();
+ }
+
+ RETURN(1);
+}
+
+
+/*
+ * Free the packet when it has gone out
+ */
+static int reply_out_callback(ptl_event_t *ev)
+{
+ ENTRY;
+
+ LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* replies always contiguous */
+
if (ev->type == PTL_EVENT_SENT) {
OBD_FREE(ev->mem_desc.start, ev->mem_desc.length);
- } else {
+ } else {
// XXX make sure we understand all events, including ACK's
- CERROR("Unknown event %d\n", ev->type);
- BUG();
+ CERROR("Unknown event %d\n", ev->type);
+ LBUG();
}
- EXIT;
- return 1;
+ RETURN(1);
}
/*
* Wake up the thread waiting for the reply once it comes in.
*/
-static int rcvd_reply_callback(ptl_event_t *ev, void *data)
+static int reply_in_callback(ptl_event_t *ev)
{
- struct ptlrpc_request *rpc = ev->mem_desc.user_ptr;
+ struct ptlrpc_request *req = ev->mem_desc.user_ptr;
ENTRY;
+ LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* replies always contiguous */
+
+ if (req->rq_xid == 0x5a5a5a5a5a5a5a5a) {
+ CERROR("Reply received for freed request! Probably a missing "
+ "ptlrpc_abort()\n");
+ LBUG();
+ }
+
+ if (req->rq_xid != ev->match_bits) {
+ CERROR("Reply packet for wrong request\n");
+ LBUG();
+ }
+
if (ev->type == PTL_EVENT_PUT) {
- rpc->rq_repbuf = ev->mem_desc.start + ev->offset;
+ req->rq_repmsg = ev->mem_desc.start + ev->offset;
barrier();
- wake_up_interruptible(&rpc->rq_wait_for_rep);
- } else {
+ wake_up(&req->rq_wait_for_rep);
+ } else {
// XXX make sure we understand all events, including ACK's
- CERROR("Unknown event %d\n", ev->type);
- BUG();
+ CERROR("Unknown event %d\n", ev->type);
+ LBUG();
}
- EXIT;
- return 1;
+ RETURN(1);
}
-int server_request_callback(ptl_event_t *ev, void *data)
+int request_in_callback(ptl_event_t *ev)
{
- struct ptlrpc_service *service = data;
- int rc;
+ struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr;
+ struct ptlrpc_service *service = rqbd->rqbd_service;
+
+ LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
if (ev->rlength != ev->mlength)
CERROR("Warning: Possibly truncated rpc (%d/%d)\n",
ev->mlength, ev->rlength);
- /* The ME is unlinked when there is less than 1024 bytes free
- * on its MD. This ensures we are always able to handle the rpc,
- * although the 1024 value is a guess as to the size of a
- * large rpc (the known safe margin should be determined).
- *
- * NOTE: The portals API by default unlinks all MD's associated
- * with an ME when it's unlinked. For now, this behavior
- * has been commented out of the portals library so the
- * MD can be unlinked when its ref count drops to zero.
- * A new MD and ME will then be created that use the same
- * kmalloc()'ed memory and inserted at the ring tail.
- */
-
- service->srv_ref_count[service->srv_md_active]++;
-
- if (ev->offset >= (service->srv_buf_size - 1024)) {
- CDEBUG(D_INODE, "Unlinking ME %d\n", service->srv_me_active);
-
- rc = PtlMEUnlink(service->srv_me_h[service->srv_me_active]);
- service->srv_me_h[service->srv_me_active] = 0;
-
- if (rc != PTL_OK) {
- CERROR("PtlMEUnlink failed - DROPPING soon: %d\n", rc);
- BUG();
- return rc;
- }
-
- service->srv_me_active = NEXT_INDEX(service->srv_me_active,
- service->srv_ring_length);
-
- if (service->srv_me_h[service->srv_me_active] == 0)
- CERROR("All %d ring ME's are unlinked!\n",
- service->srv_ring_length);
- }
-
- if (ev->type == PTL_EVENT_PUT) {
+ if (ev->type == PTL_EVENT_PUT)
wake_up(&service->srv_waitq);
- } else {
+ else
CERROR("Unexpected event type: %d\n", ev->type);
- }
return 0;
}
-
-static int bulk_source_callback(ptl_event_t *ev, void *data)
+static int bulk_source_callback(ptl_event_t *ev)
{
- struct ptlrpc_bulk_desc *bulk = ev->mem_desc.user_ptr;
-
+ struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+ struct ptlrpc_bulk_page *bulk;
+ struct list_head *tmp;
+ struct list_head *next;
ENTRY;
- if (ev->type == PTL_EVENT_SENT) {
- CDEBUG(D_NET, "got SENT event\n");
- } else if (ev->type == PTL_EVENT_ACK) {
- CDEBUG(D_NET, "got ACK event\n");
- bulk->b_flags = PTL_BULK_SENT;
- wake_up_interruptible(&bulk->b_waitq);
- } else {
- CERROR("Unexpected event type!\n");
- BUG();
+ CDEBUG(D_NET, "got %s event %d\n",
+ (ev->type == PTL_EVENT_SENT) ? "SENT" :
+ (ev->type == PTL_EVENT_ACK) ? "ACK" : "UNEXPECTED", ev->type);
+
+ LASSERT (ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_ACK);
+
+ LASSERT (atomic_read (&desc->bd_source_callback_count) > 0 &&
+ atomic_read (&desc->bd_source_callback_count) <= 2);
+
+ /* 1 fragment for each page always */
+ LASSERT (ev->mem_desc.niov == desc->bd_page_count);
+
+ if (atomic_dec_and_test (&desc->bd_source_callback_count)) {
+ list_for_each_safe(tmp, next, &desc->bd_page_list) {
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+ bp_link);
+
+ if (bulk->bp_cb != NULL)
+ bulk->bp_cb(bulk);
+ }
+ desc->bd_flags |= PTL_BULK_FL_SENT;
+ wake_up(&desc->bd_waitq);
+ if (desc->bd_cb != NULL)
+ desc->bd_cb(desc, desc->bd_cb_data);
}
- EXIT;
- return 1;
+ RETURN(0);
}
-static int bulk_sink_callback(ptl_event_t *ev, void *data)
+static int bulk_sink_callback(ptl_event_t *ev)
{
- struct ptlrpc_bulk_desc *bulk = ev->mem_desc.user_ptr;
-
+ struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+ struct ptlrpc_bulk_page *bulk;
+ struct list_head *tmp;
+ struct list_head *next;
+ ptl_size_t total = 0;
ENTRY;
if (ev->type == PTL_EVENT_PUT) {
- if (bulk->b_buf != ev->mem_desc.start + ev->offset)
- CERROR("bulkbuf != mem_desc -- why?\n");
- bulk->b_flags = PTL_BULK_RCVD;
- if (bulk->b_cb != NULL)
- bulk->b_cb(bulk, data);
- wake_up_interruptible(&bulk->b_waitq);
+ /* put with zero offset */
+ LASSERT (ev->offset == 0);
+ /* used iovs */
+ LASSERT ((ev->mem_desc.options & PTL_MD_IOV) != 0);
+ /* 1 fragment for each page always */
+ LASSERT (ev->mem_desc.niov == desc->bd_page_count);
+
+ list_for_each_safe (tmp, next, &desc->bd_page_list) {
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+ bp_link);
+
+ total += bulk->bp_buflen;
+
+ if (bulk->bp_cb != NULL)
+ bulk->bp_cb(bulk);
+ }
+
+ LASSERT (ev->mem_desc.length == total);
+
+ desc->bd_flags |= PTL_BULK_FL_RCVD;
+ wake_up(&desc->bd_waitq);
+ if (desc->bd_cb != NULL)
+ desc->bd_cb(desc, desc->bd_cb_data);
} else {
CERROR("Unexpected event type!\n");
- BUG();
+ LBUG();
}
- EXIT;
- return 1;
+ RETURN(1);
}
int ptlrpc_init_portals(void)
else
ni = *socknal_nip;
- rc = PtlEQAlloc(ni, 128, sent_packet_callback, NULL, &sent_pkt_eq);
+ rc = PtlEQAlloc(ni, 1024, request_out_callback, &request_out_eq);
+ if (rc != PTL_OK)
+ CERROR("PtlEQAlloc failed: %d\n", rc);
+
+ rc = PtlEQAlloc(ni, 1024, reply_out_callback, &reply_out_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
- rc = PtlEQAlloc(ni, 128, rcvd_reply_callback, NULL, &rcvd_rep_eq);
+ rc = PtlEQAlloc(ni, 1024, reply_in_callback, &reply_in_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
- rc = PtlEQAlloc(ni, 128, bulk_source_callback, NULL, &bulk_source_eq);
+ rc = PtlEQAlloc(ni, 1024, bulk_source_callback, &bulk_source_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
- rc = PtlEQAlloc(ni, 128, bulk_sink_callback, NULL, &bulk_sink_eq);
+ rc = PtlEQAlloc(ni, 1024, bulk_sink_callback, &bulk_sink_eq);
if (rc != PTL_OK)
CERROR("PtlEQAlloc failed: %d\n", rc);
void ptlrpc_exit_portals(void)
{
- PtlEQFree(sent_pkt_eq);
- PtlEQFree(rcvd_rep_eq);
+ PtlEQFree(request_out_eq);
+ PtlEQFree(reply_out_eq);
+ PtlEQFree(reply_in_eq);
PtlEQFree(bulk_source_eq);
PtlEQFree(bulk_sink_eq);