X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Flib-msg.c;h=c3cb58be8d7a47b945ea5f75fd510bf01c38f2d5;hp=a9834b5bcae26e8d559a330ab729fca64e38c5b8;hb=12b6c0b993bca79091d035cbe4cf1805d8adcc47;hpb=ed88907a96ba81d3558e71ade9def98bdc785169

diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c
index a9834b5..c3cb58b 100644
--- a/lnet/lnet/lib-msg.c
+++ b/lnet/lnet/lib-msg.c
@@ -1,25 +1,39 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- * lib/lib-msg.c
- * Message decoding, parsing and finalizing routines
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   This file is part of Lustre, http://www.lustre.org
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * lnet/lnet/lib-msg.c
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Message decoding, parsing and finalizing routines
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
@@ -27,40 +41,291 @@
 #include <lnet/lib-lnet.h>
 
 void
-lnet_enq_event_locked (lnet_eq_t *eq, lnet_event_t *ev)
+lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev)
 {
-        lnet_event_t  *eq_slot;
+        ENTRY;
 
-        /* Allocate the next queue slot */
-        ev->sequence = eq->eq_enq_seq++;
+        memset(ev, 0, sizeof(*ev));
 
-        /* size must be a power of 2 to handle sequence # overflow */
-        LASSERT (eq->eq_size != 0 &&
-                 eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
-        eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
+        ev->status   = 0;
+        ev->unlinked = 1;
+        ev->type     = LNET_EVENT_UNLINK;
+        lnet_md_deconstruct(md, &ev->md);
+        lnet_md2handle(&ev->md_handle, md);
+        EXIT;
+}
 
-        /* There is no race since both event consumers and event producers
-         * take the LNET_LOCK, so we don't screw around with memory
-         * barriers, setting the sequence number last or wierd structure
-         * layout assertions. */
-        *eq_slot = *ev;
+/*
+ * Don't need any lock, must be called after lnet_commit_md
+ */
+void
+lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type)
+{
+	lnet_hdr_t	*hdr = &msg->msg_hdr;
+	lnet_event_t	*ev  = &msg->msg_ev;
+
+	LASSERT(!msg->msg_routing);
+
+	ev->type = ev_type;
+
+	if (ev_type == LNET_EVENT_SEND) {
+		/* event for active message */
+		ev->target.nid    = le64_to_cpu(hdr->dest_nid);
+		ev->target.pid    = le32_to_cpu(hdr->dest_pid);
+		ev->initiator.nid = LNET_NID_ANY;
+		ev->initiator.pid = the_lnet.ln_pid;
+		ev->sender        = LNET_NID_ANY;
+
+	} else {
+		/* event for passive message */
+		ev->target.pid    = hdr->dest_pid;
+		ev->target.nid    = hdr->dest_nid;
+		ev->initiator.pid = hdr->src_pid;
+		ev->initiator.nid = hdr->src_nid;
+		ev->rlength       = hdr->payload_length;
+		ev->sender        = msg->msg_from;
+		ev->mlength	  = msg->msg_wanted;
+		ev->offset	  = msg->msg_offset;
+	}
+
+	switch (ev_type) {
+	default:
+		LBUG();
+
+	case LNET_EVENT_PUT: /* passive PUT */
+		ev->pt_index   = hdr->msg.put.ptl_index;
+		ev->match_bits = hdr->msg.put.match_bits;
+		ev->hdr_data   = hdr->msg.put.hdr_data;
+		return;
+
+	case LNET_EVENT_GET: /* passive GET */
+		ev->pt_index   = hdr->msg.get.ptl_index;
+		ev->match_bits = hdr->msg.get.match_bits;
+		ev->hdr_data   = 0;
+		return;
+
+	case LNET_EVENT_ACK: /* ACK */
+		ev->match_bits = hdr->msg.ack.match_bits;
+		ev->mlength    = hdr->msg.ack.mlength;
+		return;
+
+	case LNET_EVENT_REPLY: /* REPLY */
+		return;
+
+	case LNET_EVENT_SEND: /* active message */
+		if (msg->msg_type == LNET_MSG_PUT) {
+			ev->pt_index   = le32_to_cpu(hdr->msg.put.ptl_index);
+			ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
+			ev->offset     = le32_to_cpu(hdr->msg.put.offset);
+			ev->mlength    =
+			ev->rlength    = le32_to_cpu(hdr->payload_length);
+			ev->hdr_data   = le64_to_cpu(hdr->msg.put.hdr_data);
+
+		} else {
+			LASSERT(msg->msg_type == LNET_MSG_GET);
+			ev->pt_index   = le32_to_cpu(hdr->msg.get.ptl_index);
+			ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
+			ev->mlength    =
+			ev->rlength    = le32_to_cpu(hdr->msg.get.sink_length);
+			ev->offset     = le32_to_cpu(hdr->msg.get.src_offset);
+			ev->hdr_data   = 0;
+		}
+		return;
+	}
+}
 
-        /* Call the callback handler (if any) */
-        if (eq->eq_callback != NULL)
-                eq->eq_callback (eq_slot);
+void
+lnet_msg_commit(lnet_msg_t *msg, int sending)
+{
+	struct lnet_msg_container *container = &the_lnet.ln_msg_container;
+	lnet_counters_t		  *counters  = &the_lnet.ln_counters;
+
+	/* routed message can be committed for both receiving and sending */
+	LASSERT(!msg->msg_tx_committed);
+
+	if (msg->msg_rx_committed) { /* routed message, or reply for GET */
+		LASSERT(sending);
+		LASSERT(msg->msg_onactivelist);
+		msg->msg_tx_committed = 1;
+		return;
+	}
+
+	LASSERT(!msg->msg_onactivelist);
+	msg->msg_onactivelist = 1;
+	cfs_list_add(&msg->msg_activelist, &container->msc_active);
+
+	counters->msgs_alloc++;
+	if (counters->msgs_alloc > counters->msgs_max)
+		counters->msgs_max = counters->msgs_alloc;
+
+	if (sending)
+		msg->msg_tx_committed = 1;
+	else
+		msg->msg_rx_committed = 1;
+}
 
-#ifdef __KERNEL__
-        /* Wake anyone waiting in LNetEQPoll() */
-        if (cfs_waitq_active(&the_lnet.ln_waitq))
-                cfs_waitq_broadcast(&the_lnet.ln_waitq);
-#else
-# if !HAVE_LIBPTHREAD
-        /* LNetEQPoll() calls into _the_ LND to wait for action */
-# else
-        /* Wake anyone waiting in LNetEQPoll() */
-        pthread_cond_broadcast(&the_lnet.ln_cond);
-# endif
-#endif
+static void
+lnet_msg_tx_decommit(lnet_msg_t *msg, int status)
+{
+	lnet_counters_t	*counters = &the_lnet.ln_counters;
+	lnet_event_t *ev = &msg->msg_ev;
+
+	LASSERT(msg->msg_tx_committed);
+	if (status != 0)
+		goto out;
+
+	switch (ev->type) {
+	default: /* routed message */
+		LASSERT(msg->msg_routing);
+		LASSERT(msg->msg_rx_committed);
+		LASSERT(ev->type == 0);
+
+		counters->route_length += msg->msg_len;
+		counters->route_count++;
+		goto out;
+
+	case LNET_EVENT_PUT:
+		/* should have been decommitted */
+		LASSERT(!msg->msg_rx_committed);
+		/* overwritten while sending ACK */
+		LASSERT(msg->msg_type == LNET_MSG_ACK);
+		msg->msg_type = LNET_MSG_PUT; /* fix type */
+		break;
+
+	case LNET_EVENT_SEND:
+		LASSERT(!msg->msg_rx_committed);
+		if (msg->msg_type == LNET_MSG_PUT)
+			counters->send_length += msg->msg_len;
+		break;
+
+	case LNET_EVENT_GET:
+		LASSERT(msg->msg_rx_committed);
+		/* overwritten while sending reply */
+		LASSERT(msg->msg_type == LNET_MSG_REPLY);
+
+		msg->msg_type = LNET_MSG_GET; /* fix type */
+		counters->send_length += msg->msg_len;
+		break;
+	}
+
+	counters->send_count++;
+ out:
+	lnet_return_tx_credits_locked(msg);
+	msg->msg_tx_committed = 0;
+}
+
+static void
+lnet_msg_rx_decommit(lnet_msg_t *msg, int status)
+{
+	lnet_counters_t	*counters = &the_lnet.ln_counters;
+	lnet_event_t *ev = &msg->msg_ev;
+
+	LASSERT(!msg->msg_tx_committed); /* decommitted or uncommitted */
+	LASSERT(msg->msg_rx_committed);
+
+	if (status != 0)
+		goto out;
+
+	switch (ev->type) {
+	default:
+		LASSERT(ev->type == 0);
+		LASSERT(msg->msg_routing);
+		goto out;
+
+	case LNET_EVENT_ACK:
+		LASSERT(msg->msg_type == LNET_MSG_ACK);
+		break;
+
+	case LNET_EVENT_GET:
+		LASSERT(msg->msg_type == LNET_MSG_GET);
+		break;
+
+	case LNET_EVENT_PUT:
+		LASSERT(msg->msg_type == LNET_MSG_PUT);
+		break;
+
+	case LNET_EVENT_REPLY:
+		LASSERT(msg->msg_type == LNET_MSG_REPLY ||
+			msg->msg_type == LNET_MSG_GET); /* optimized GET */
+		break;
+	}
+
+	counters->recv_count++;
+	if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
+		counters->recv_length += msg->msg_wanted;
+
+ out:
+	lnet_return_rx_credits_locked(msg);
+	msg->msg_rx_committed = 0;
+}
+
+void
+lnet_msg_decommit(lnet_msg_t *msg, int status)
+{
+	lnet_counters_t	*counters = &the_lnet.ln_counters;
+
+	LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
+	LASSERT(msg->msg_onactivelist);
+
+	if (msg->msg_tx_committed) /* always decommit for sending first */
+		lnet_msg_tx_decommit(msg, status);
+
+	if (msg->msg_rx_committed)
+		lnet_msg_rx_decommit(msg, status);
+
+	cfs_list_del(&msg->msg_activelist);
+	msg->msg_onactivelist = 0;
+	counters->msgs_alloc--;
+}
+
+void
+lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
+		   unsigned int offset, unsigned int mlen)
+{
+	/* Here, we attach the MD on lnet_msg and mark it busy and
+	 * decrementing its threshold. Come what may, the lnet_msg "owns"
+	 * the MD until a call to lnet_msg_detach_md or lnet_finalize()
+	 * signals completion. */
+	LASSERT(!msg->msg_routing);
+
+	msg->msg_md = md;
+	if (msg->msg_receiving) { /* commited for receiving */
+		msg->msg_offset = offset;
+		msg->msg_wanted = mlen;
+	}
+
+	md->md_refcount++;
+	if (md->md_threshold != LNET_MD_THRESH_INF) {
+		LASSERT(md->md_threshold > 0);
+		md->md_threshold--;
+	}
+
+	/* build umd in event */
+	lnet_md2handle(&msg->msg_ev.md_handle, md);
+	lnet_md_deconstruct(md, &msg->msg_ev.md);
+}
+
+void
+lnet_msg_detach_md(lnet_msg_t *msg, int status)
+{
+	lnet_libmd_t	*md = msg->msg_md;
+	int		unlink;
+
+	/* Now it's safe to drop my caller's ref */
+	md->md_refcount--;
+	LASSERT(md->md_refcount >= 0);
+
+	unlink = lnet_md_unlinkable(md);
+	if (md->md_eq != NULL) {
+		msg->msg_ev.status   = status;
+		msg->msg_ev.unlinked = unlink;
+		lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
+	}
+
+	if (unlink)
+		lnet_md_unlink(md);
+
+	msg->msg_md = NULL;
 }
 
 void
@@ -75,22 +340,22 @@ lnet_complete_msg_locked(lnet_msg_t *msg)
         if (status == 0 && msg->msg_ack) {
                 /* Only send an ACK if the PUT completed successfully */
 
-                lnet_return_credits_locked(msg);
+		lnet_msg_decommit(msg, 0);
 
                 msg->msg_ack = 0;
                 LNET_UNLOCK();
-        
+
                 LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
                 LASSERT(!msg->msg_routing);
 
                 ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
-                
+
                 lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
 
                 msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
                 msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
                 msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
-                
+
                 rc = lnet_send(msg->msg_ev.target.nid, msg);
 
                 LNET_LOCK();
@@ -112,26 +377,19 @@ lnet_complete_msg_locked(lnet_msg_t *msg)
                         return;
         }
 
-        lnet_return_credits_locked(msg);
-
-        LASSERT (msg->msg_onactivelist);
-        msg->msg_onactivelist = 0;
-        list_del (&msg->msg_activelist);
-        the_lnet.ln_counters.msgs_alloc--;
-        lnet_msg_free(msg);
+	lnet_msg_decommit(msg, status);
+	lnet_msg_free_locked(msg);
 }
 
 
 void
 lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
 {
-#ifdef __KERNEL__
-        int                i;
-        int                my_slot;
-#endif
-        lnet_libmd_t      *md;
+	struct lnet_msg_container	*container;
+	int				my_slot;
+	int				i;
 
-        LASSERT (!in_interrupt ());
+        LASSERT (!cfs_in_interrupt ());
 
         if (msg == NULL)
                 return;
@@ -152,73 +410,139 @@ lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
                msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
                msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
 #endif
-        LNET_LOCK();
 
         LASSERT (msg->msg_onactivelist);
 
         msg->msg_ev.status = status;
 
-        md = msg->msg_md;
-        if (md != NULL) {
-                int      unlink;
+	if (msg->msg_md != NULL) {
+		lnet_res_lock();
+		lnet_msg_detach_md(msg, status);
+		lnet_res_unlock();
+	}
 
-                /* Now it's safe to drop my caller's ref */
-                md->md_refcount--;
-                LASSERT (md->md_refcount >= 0);
+	if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
+		/* not commited to network yet */
+		LASSERT(!msg->msg_onactivelist);
+		lnet_msg_free(msg);
+		return;
+	}
 
-                unlink = lnet_md_unlinkable(md);
-                
-                msg->msg_ev.unlinked = unlink;
-                
-                if (md->md_eq != NULL)
-                        lnet_enq_event_locked(md->md_eq, &msg->msg_ev);
-                
-                if (unlink)
-                        lnet_md_unlink(md);
+	LNET_LOCK();
+	container = &the_lnet.ln_msg_container;
+	cfs_list_add_tail(&msg->msg_list, &container->msc_finalizing);
 
-                msg->msg_md = NULL;
-        }
+	/* Recursion breaker.  Don't complete the message here if I am (or
+	 * enough other threads are) already completing messages */
 
-        list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq);
+#ifdef __KERNEL__
+	my_slot = -1;
+	for (i = 0; i < container->msc_nfinalizers; i++) {
+		if (container->msc_finalizers[i] == cfs_current())
+			goto out;
 
-        /* Recursion breaker.  Don't complete the message here if I am (or
-         * enough other threads are) already completing messages */
+		if (my_slot < 0 && container->msc_finalizers[i] == NULL)
+			my_slot = i;
+	}
 
-#ifdef __KERNEL__
-        my_slot = -1;
-        for (i = 0; i < the_lnet.ln_nfinalizers; i++) {
-                if (the_lnet.ln_finalizers[i] == cfs_current())
-                        goto out;
-                if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL)
-                        my_slot = i;
-        }
-        if (my_slot < 0)
-                goto out;
+	if (my_slot < 0)
+		goto out;
 
-        the_lnet.ln_finalizers[my_slot] = cfs_current();
+	container->msc_finalizers[my_slot] = cfs_current();
 #else
-        if (the_lnet.ln_finalizing)
-                goto out;
+	LASSERT(container->msc_nfinalizers == 1);
+	if (container->msc_finalizers[0] != NULL)
+		goto out;
+
+	my_slot = i = 0;
+	container->msc_finalizers[0] = (struct lnet_msg_container *)1;
 #endif
 
-        while (!list_empty(&the_lnet.ln_finalizeq)) {
-                msg = list_entry(the_lnet.ln_finalizeq.next,
-                                 lnet_msg_t, msg_list);
-                
-                list_del(&msg->msg_list);
+	while (!cfs_list_empty(&container->msc_finalizing)) {
+		msg = cfs_list_entry(container->msc_finalizing.next,
+                                     lnet_msg_t, msg_list);
+
+                cfs_list_del(&msg->msg_list);
 
                 /* NB drops and regains the lnet lock if it actually does
                  * anything, so my finalizing friends can chomp along too */
                 lnet_complete_msg_locked(msg);
         }
 
-#ifdef __KERNEL__
-        the_lnet.ln_finalizers[my_slot] = NULL;
-#else
-        the_lnet.ln_finalizing = 0;
-#endif
-
+	container->msc_finalizers[my_slot] = NULL;
  out:
-        LNET_UNLOCK();
+	LNET_UNLOCK();
+}
+
+void
+lnet_msg_container_cleanup(struct lnet_msg_container *container)
+{
+	int     count = 0;
+
+	if (container->msc_init == 0)
+		return;
+
+	while (!cfs_list_empty(&container->msc_active)) {
+		lnet_msg_t *msg = cfs_list_entry(container->msc_active.next,
+						 lnet_msg_t, msg_activelist);
+
+		LASSERT(msg->msg_onactivelist);
+		msg->msg_onactivelist = 0;
+		cfs_list_del(&msg->msg_activelist);
+		lnet_msg_free(msg);
+		count++;
+	}
+
+	if (count > 0)
+		CERROR("%d active msg on exit\n", count);
+
+	if (container->msc_finalizers != NULL) {
+		LIBCFS_FREE(container->msc_finalizers,
+			    container->msc_nfinalizers *
+			    sizeof(*container->msc_finalizers));
+		container->msc_finalizers = NULL;
+	}
+#ifdef LNET_USE_LIB_FREELIST
+	lnet_freelist_fini(&container->msc_freelist);
+#endif
+	container->msc_init = 0;
 }
 
+int
+lnet_msg_container_setup(struct lnet_msg_container *container)
+{
+	int	rc;
+
+	container->msc_init = 1;
+
+	CFS_INIT_LIST_HEAD(&container->msc_active);
+	CFS_INIT_LIST_HEAD(&container->msc_finalizing);
+
+#ifdef LNET_USE_LIB_FREELIST
+	memset(&container->msc_freelist, 0, sizeof(lnet_freelist_t));
+
+	rc = lnet_freelist_init(&container->msc_freelist,
+				LNET_FL_MAX_MSGS, sizeof(lnet_msg_t));
+	if (rc != 0) {
+		CERROR("Failed to init freelist for message container\n");
+		lnet_msg_container_cleanup(container);
+		return rc;
+	}
+#else
+	rc = 0;
+#endif
+	/* number of CPUs */
+	container->msc_nfinalizers = cfs_cpt_weight(cfs_cpt_table,
+						    CFS_CPT_ANY);
+	LIBCFS_ALLOC(container->msc_finalizers,
+		     container->msc_nfinalizers *
+		     sizeof(*container->msc_finalizers));
+
+	if (container->msc_finalizers == NULL) {
+		CERROR("Failed to allocate message finalizers\n");
+		lnet_msg_container_cleanup(container);
+		return -ENOMEM;
+	}
+
+	return rc;
+}