Whamcloud - gitweb
ONLY UPDATE IF YOU NEED THIS (i.e. Andreas probably will)
[fs/lustre-release.git] / lustre / ptlrpc / events.c
index 3c4ad5a..55b35ea 100644 (file)
  *
  */
 
-#define EXPORT_SYMTAB
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-
 #define DEBUG_SUBSYSTEM S_RPC
 
+#include <linux/module.h>
 #include <linux/obd_support.h>
-#include <linux/obd_class.h>
 #include <linux/lustre_net.h>
 
-ptl_handle_eq_t sent_pkt_eq, rcvd_rep_eq, bulk_source_eq, bulk_sink_eq;
-static const ptl_handle_ni_t *socknal_nip = NULL, *qswnal_nip = NULL;
+ptl_handle_eq_t request_out_eq, reply_in_eq, reply_out_eq, bulk_source_eq,
+        bulk_sink_eq;
+static const ptl_handle_ni_t *socknal_nip = NULL, *qswnal_nip = NULL, *gmnal_nip = NULL;
 
 /*
  *  Free the packet when it has gone out
  */
-static int sent_packet_callback(ptl_event_t *ev, void *data)
+static int request_out_callback(ptl_event_t *ev)
 {
+        struct ptlrpc_request *req = ev->mem_desc.user_ptr;
         ENTRY;
 
+        LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
+
+        if (ev->type != PTL_EVENT_SENT) {
+                // XXX make sure we understand all events, including ACK's
+                CERROR("Unknown event %d\n", ev->type);
+                LBUG();
+        }
+
+        /* this balances the atomic_inc in ptl_send_rpc */
+        ptlrpc_req_finished(req);
+        RETURN(1);
+}
+
+
+/*
+ *  Free the packet when it has gone out
+ */
+static int reply_out_callback(ptl_event_t *ev)
+{
+        ENTRY;
+
+        LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* replies always contiguous */
+
         if (ev->type == PTL_EVENT_SENT) {
                 OBD_FREE(ev->mem_desc.start, ev->mem_desc.length);
-        } else { 
+        } else {
                 // XXX make sure we understand all events, including ACK's
-                CERROR("Unknown event %d\n", ev->type); 
-                BUG();
+                CERROR("Unknown event %d\n", ev->type);
+                LBUG();
         }
 
-        EXIT;
-        return 1;
+        RETURN(1);
 }
 
 /*
  * Wake up the thread waiting for the reply once it comes in.
  */
-static int rcvd_reply_callback(ptl_event_t *ev, void *data)
+static int reply_in_callback(ptl_event_t *ev)
 {
-        struct ptlrpc_request *rpc = ev->mem_desc.user_ptr;
+        struct ptlrpc_request *req = ev->mem_desc.user_ptr;
         ENTRY;
 
+        LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* replies always contiguous */
+
+        if (req->rq_xid == 0x5a5a5a5a5a5a5a5a) {
+                CERROR("Reply received for freed request!  Probably a missing "
+                       "ptlrpc_abort()\n");
+                LBUG();
+        }
+
+        if (req->rq_xid != ev->match_bits) {
+                CERROR("Reply packet for wrong request\n");
+                LBUG();
+        }
+
         if (ev->type == PTL_EVENT_PUT) {
-                rpc->rq_repbuf = ev->mem_desc.start + ev->offset;
+                req->rq_repmsg = ev->mem_desc.start + ev->offset;
                 barrier();
-                wake_up_interruptible(&rpc->rq_wait_for_rep);
-        } else { 
+                wake_up(&req->rq_wait_for_rep);
+        } else {
                 // XXX make sure we understand all events, including ACK's
-                CERROR("Unknown event %d\n", ev->type); 
-                BUG();
+                CERROR("Unknown event %d\n", ev->type);
+                LBUG();
         }
 
-        EXIT;
-        return 1;
+        RETURN(1);
 }
 
-int server_request_callback(ptl_event_t *ev, void *data)
+int request_in_callback(ptl_event_t *ev)
 {
-        struct ptlrpc_service *service = data;
-        int rc;
-
+        struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr;
+        struct ptlrpc_service *service = rqbd->rqbd_service;
+
+        LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
+        LASSERT (ev->type == PTL_EVENT_PUT);    /* we only enable puts */
+        LASSERT (atomic_read (&service->srv_nrqbds_receiving) > 0);
+        LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0);
+        
         if (ev->rlength != ev->mlength)
                 CERROR("Warning: Possibly truncated rpc (%d/%d)\n",
                        ev->mlength, ev->rlength);
 
-        /* The ME is unlinked when there is less than 1024 bytes free
-         * on its MD.  This ensures we are always able to handle the rpc, 
-         * although the 1024 value is a guess as to the size of a
-         * large rpc (the known safe margin should be determined).
-         *
-         * NOTE: The portals API by default unlinks all MD's associated
-         *       with an ME when it's unlinked.  For now, this behavior
-         *       has been commented out of the portals library so the
-         *       MD can be unlinked when its ref count drops to zero.
-         *       A new MD and ME will then be created that use the same
-         *       kmalloc()'ed memory and inserted at the ring tail.
-         */
-
-        service->srv_ref_count[service->srv_md_active]++;
-
-        if (ev->offset >= (service->srv_buf_size - 1024)) {
-                CDEBUG(D_INODE, "Unlinking ME %d\n", service->srv_me_active);
-
-                rc = PtlMEUnlink(service->srv_me_h[service->srv_me_active]);
-                service->srv_me_h[service->srv_me_active] = 0;
-
-                if (rc != PTL_OK) {
-                        CERROR("PtlMEUnlink failed - DROPPING soon: %d\n", rc);
-                        BUG();
-                        return rc;
+        if (ptl_is_valid_handle (&ev->unlinked_me))
+        {
+                /* This is the last request to be received into this
+                 * request buffer.  We don't bump the refcount, since the
+                 * thread servicing this event is effectively taking over
+                 * portals' reference.
+                 */
+#warning ev->unlinked_me.nal_idx is not set properly in a callback
+                LASSERT (ev->unlinked_me.handle_idx == rqbd->rqbd_me_h.handle_idx);
+
+                if (atomic_dec_and_test (&service->srv_nrqbds_receiving)) /* we're off-air */
+                {
+                        CERROR ("All request buffers busy\n");
+                        /* we'll probably start dropping packets in portals soon */
                 }
-
-                service->srv_me_active = NEXT_INDEX(service->srv_me_active,
-                        service->srv_ring_length);
-
-                if (service->srv_me_h[service->srv_me_active] == 0)
-                        CERROR("All %d ring ME's are unlinked!\n",
-                               service->srv_ring_length);
         }
+        else
+                atomic_inc (&rqbd->rqbd_refcount); /* +1 ref for service thread */
 
-        if (ev->type == PTL_EVENT_PUT) {
-                wake_up(&service->srv_waitq);
-        } else {
-                CERROR("Unexpected event type: %d\n", ev->type);
-        }
+        wake_up(&service->srv_waitq);
 
         return 0;
 }
 
-
-static int bulk_source_callback(ptl_event_t *ev, void *data)
+static int bulk_source_callback(ptl_event_t *ev)
 {
-        struct ptlrpc_bulk_desc *bulk = ev->mem_desc.user_ptr;
-
+        struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+        struct ptlrpc_bulk_page *bulk;
+        struct list_head        *tmp;
+        struct list_head        *next;
         ENTRY;
 
-        if (ev->type == PTL_EVENT_SENT) {
-                CDEBUG(D_NET, "got SENT event\n");
-        } else if (ev->type == PTL_EVENT_ACK) {
-                CDEBUG(D_NET, "got ACK event\n");
-                bulk->b_flags = PTL_BULK_SENT;
-                wake_up_interruptible(&bulk->b_waitq);
-        } else {
-                CERROR("Unexpected event type!\n");
-                BUG();
+        CDEBUG(D_NET, "got %s event %d\n",
+               (ev->type == PTL_EVENT_SENT) ? "SENT" :
+               (ev->type == PTL_EVENT_ACK)  ? "ACK"  : "UNEXPECTED", ev->type);
+
+        LASSERT (ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_ACK);
+
+        LASSERT (atomic_read (&desc->bd_source_callback_count) > 0 &&
+                 atomic_read (&desc->bd_source_callback_count) <= 2);
+
+        /* 1 fragment for each page always */
+        LASSERT (ev->mem_desc.niov == desc->bd_page_count);
+
+        if (atomic_dec_and_test (&desc->bd_source_callback_count)) {
+                list_for_each_safe(tmp, next, &desc->bd_page_list) {
+                        bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+                                          bp_link);
+
+                        if (bulk->bp_cb != NULL)
+                                bulk->bp_cb(bulk);
+                }
+                desc->bd_flags |= PTL_BULK_FL_SENT;
+                wake_up(&desc->bd_waitq);
+                if (desc->bd_cb != NULL)
+                        desc->bd_cb(desc, desc->bd_cb_data);
         }
 
-        EXIT;
-        return 1;
+        RETURN(0);
 }
 
-static int bulk_sink_callback(ptl_event_t *ev, void *data)
+static int bulk_sink_callback(ptl_event_t *ev)
 {
-        struct ptlrpc_bulk_desc *bulk = ev->mem_desc.user_ptr;
-
+        struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
+        struct ptlrpc_bulk_page *bulk;
+        struct list_head        *tmp;
+        struct list_head        *next;
+        ptl_size_t               total = 0;
         ENTRY;
 
         if (ev->type == PTL_EVENT_PUT) {
-                if (bulk->b_buf != ev->mem_desc.start + ev->offset)
-                        CERROR("bulkbuf != mem_desc -- why?\n");
-                bulk->b_flags = PTL_BULK_RCVD;
-                if (bulk->b_cb != NULL)
-                        bulk->b_cb(bulk, data);
-                wake_up_interruptible(&bulk->b_waitq);
+                /* put with zero offset */
+                LASSERT (ev->offset == 0);
+                /* used iovs */
+                LASSERT ((ev->mem_desc.options & PTL_MD_IOV) != 0);
+                /* 1 fragment for each page always */
+                LASSERT (ev->mem_desc.niov == desc->bd_page_count);
+
+                list_for_each_safe (tmp, next, &desc->bd_page_list) {
+                        bulk = list_entry(tmp, struct ptlrpc_bulk_page,
+                                          bp_link);
+
+                        total += bulk->bp_buflen;
+
+                        if (bulk->bp_cb != NULL)
+                                bulk->bp_cb(bulk);
+                }
+
+                LASSERT (ev->mem_desc.length == total);
+
+                desc->bd_flags |= PTL_BULK_FL_RCVD;
+                wake_up(&desc->bd_waitq);
+                if (desc->bd_cb != NULL)
+                        desc->bd_cb(desc, desc->bd_cb_data);
         } else {
                 CERROR("Unexpected event type!\n");
-                BUG();
+                LBUG();
         }
 
-        EXIT;
-        return 1;
+        RETURN(1);
 }
 
 int ptlrpc_init_portals(void)
@@ -180,30 +229,37 @@ int ptlrpc_init_portals(void)
 
         socknal_nip = inter_module_get_request("ksocknal_ni", "ksocknal");
         qswnal_nip = inter_module_get_request("kqswnal_ni", "kqswnal");
-        if (socknal_nip == NULL && qswnal_nip == NULL) {
-                CERROR("get_ni failed: is a NAL module loaded?\n");
-                return -EIO;
-        }
+        gmnal_nip = inter_module_get_request("kgmnal_ni", "kgmnal");
 
         /* Use the qswnal if it's there */
         if (qswnal_nip != NULL)
                 ni = *qswnal_nip;
-        else
+        else if (gmnal_nip != NULL)
+                ni = *gmnal_nip;
+        else if (socknal_nip != NULL)
                 ni = *socknal_nip;
+        else {
+                CERROR("get_ni failed: is a NAL module loaded?\n");
+                return -EIO;
+        }
+
+        rc = PtlEQAlloc(ni, 1024, request_out_callback, &request_out_eq);
+        if (rc != PTL_OK)
+                CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 128, sent_packet_callback, NULL, &sent_pkt_eq);
+        rc = PtlEQAlloc(ni, 1024, reply_out_callback, &reply_out_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 128, rcvd_reply_callback, NULL, &rcvd_rep_eq);
+        rc = PtlEQAlloc(ni, 1024, reply_in_callback, &reply_in_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 128, bulk_source_callback, NULL, &bulk_source_eq);
+        rc = PtlEQAlloc(ni, 1024, bulk_source_callback, &bulk_source_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
-        rc = PtlEQAlloc(ni, 128, bulk_sink_callback, NULL, &bulk_sink_eq);
+        rc = PtlEQAlloc(ni, 1024, bulk_sink_callback, &bulk_sink_eq);
         if (rc != PTL_OK)
                 CERROR("PtlEQAlloc failed: %d\n", rc);
 
@@ -212,8 +268,9 @@ int ptlrpc_init_portals(void)
 
 void ptlrpc_exit_portals(void)
 {
-        PtlEQFree(sent_pkt_eq);
-        PtlEQFree(rcvd_rep_eq);
+        PtlEQFree(request_out_eq);
+        PtlEQFree(reply_out_eq);
+        PtlEQFree(reply_in_eq);
         PtlEQFree(bulk_source_eq);
         PtlEQFree(bulk_sink_eq);
 
@@ -221,4 +278,6 @@ void ptlrpc_exit_portals(void)
                 inter_module_put("kqswnal_ni");
         if (socknal_nip != NULL)
                 inter_module_put("ksocknal_ni");
+        if (gmnal_nip != NULL)
+                inter_module_put("kgmnal_ni");
 }