X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Flib-move.c;h=abdceaa2d9cb604d652ba1f42bf7992119ee9af3;hp=3ae5fc14553d7d41b91c30b748f3628f7391efaf;hb=bb107521e14572579a78f991fe4d62b038d979ce;hpb=6e3ec5812ebd1b5ecf7cae584f429b013ffe7431 diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 3ae5fc1..abdceaa 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -155,6 +155,7 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src, lnet_libmd_t **md_out) { lnet_portal_t *ptl = &the_lnet.ln_portals[index]; + cfs_list_t *head; lnet_me_t *me; lnet_me_t *tmp; lnet_libmd_t *md; @@ -169,7 +170,11 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src, return LNET_MATCHMD_DROP; } - cfs_list_for_each_entry_safe_typed (me, tmp, &ptl->ptl_ml, + head = lnet_portal_me_head(index, src, match_bits); + if (head == NULL) /* nobody posted anything on this portal */ + goto out; + + cfs_list_for_each_entry_safe_typed (me, tmp, head, lnet_me_t, me_list) { md = me->me_md; @@ -199,8 +204,9 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src, /* not reached */ } + out: if (op_mask == LNET_MD_OP_GET || - (ptl->ptl_options & LNET_PTL_LAZY) == 0) + !lnet_portal_is_lazy(ptl)) return LNET_MATCHMD_DROP; return LNET_MATCHMD_NONE; @@ -1046,7 +1052,7 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send) if (lp->lp_txcredits < 0) { msg->msg_delayed = 1; - cfs_list_add_tail (&msg->msg_list, &lp->lp_txq); + cfs_list_add_tail(&msg->msg_list, &lp->lp_txq); return EAGAIN; } } @@ -1063,7 +1069,7 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send) if (ni->ni_txcredits < 0) { msg->msg_delayed = 1; - cfs_list_add_tail (&msg->msg_list, &ni->ni_txq); + cfs_list_add_tail(&msg->msg_list, &ni->ni_txq); return EAGAIN; } } @@ -1561,6 +1567,34 @@ lnet_drop_delayed_put(lnet_msg_t *msg, char *reason) LNET_UNLOCK(); } +/** + * Turn on the lazy portal attribute. Use with caution! + * + * This portal attribute only affects incoming PUT requests to the portal, + * and is off by default. By default, if there's no matching MD for an + * incoming PUT request, it is simply dropped. With the lazy attribute on, + * such requests are queued indefinitely until either a matching MD is + * posted to the portal or the lazy attribute is turned off. + * + * It would prevent dropped requests, however it should be regarded as the + * last line of defense - i.e. users must keep a close watch on active + * buffers on a lazy portal and once it becomes too low post more buffers as + * soon as possible. This is because delayed requests usually have detrimental + * effects on underlying network connections. A few delayed requests often + * suffice to bring an underlying connection to a complete halt, due to flow + * control mechanisms. + * + * There's also a DOS attack risk. If users don't post match-all MDs on a + * lazy portal, a malicious peer can easily stop a service by sending some + * PUT requests with match bits that won't match any MD. A routed server is + * especially vulnerable since the connections to its neighbor routers are + * shared among all clients. + * + * \param portal Index of the portal to enable the lazy attribute on. + * + * \retval 0 On success. + * \retval -EINVAL If \a portal is not a valid index. + */ int LNetSetLazyPortal(int portal) { @@ -1572,14 +1606,21 @@ LNetSetLazyPortal(int portal) CDEBUG(D_NET, "Setting portal %d lazy\n", portal); LNET_LOCK(); - - ptl->ptl_options |= LNET_PTL_LAZY; - + lnet_portal_setopt(ptl, LNET_PTL_LAZY); LNET_UNLOCK(); return 0; } +/** + * Turn off the lazy portal attribute. Delayed requests on the portal, + * if any, will be all dropped when this function returns. + * + * \param portal Index of the portal to disable the lazy attribute on. + * + * \retval 0 On success. + * \retval -EINVAL If \a portal is not a valid index. + */ int LNetClearLazyPortal(int portal) { @@ -1592,7 +1633,7 @@ LNetClearLazyPortal(int portal) LNET_LOCK(); - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { + if (!lnet_portal_is_lazy(ptl)) { LNET_UNLOCK(); return 0; } @@ -1607,7 +1648,7 @@ LNetClearLazyPortal(int portal) cfs_list_del_init(&ptl->ptl_msgq); ptl->ptl_msgq_version++; - ptl->ptl_options &= ~LNET_PTL_LAZY; + lnet_portal_unsetopt(ptl, LNET_PTL_LAZY); LNET_UNLOCK(); @@ -1662,12 +1703,13 @@ lnet_match_blocked_msg(lnet_libmd_t *md) cfs_list_t *tmp; cfs_list_t *entry; lnet_msg_t *msg; + lnet_portal_t *ptl; lnet_me_t *me = md->md_me; - lnet_portal_t *ptl = &the_lnet.ln_portals[me->me_portal]; LASSERT (me->me_portal < (unsigned int)the_lnet.ln_nportals); - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { + ptl = &the_lnet.ln_portals[me->me_portal]; + if (!lnet_portal_is_lazy(ptl)) { LASSERT (cfs_list_empty(&ptl->ptl_msgq)); return; } @@ -1773,7 +1815,6 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); index = hdr->msg.put.ptl_index; - ptl = &the_lnet.ln_portals[index]; LNET_LOCK(); @@ -1792,6 +1833,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) return 0; case LNET_MATCHMD_NONE: + ptl = &the_lnet.ln_portals[index]; version = ptl->ptl_ml_version; rc = 0; @@ -1800,7 +1842,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) if (rc == 0 && !the_lnet.ln_shutdown && - ((ptl->ptl_options & LNET_PTL_LAZY) != 0)) { + lnet_portal_is_lazy(ptl)) { if (version != ptl->ptl_ml_version) goto again; @@ -2356,6 +2398,50 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, return 0; } +/** + * Initiate an asynchronous PUT operation. + * + * There are several events associated with a PUT: completion of the send on + * the initiator node (LNET_EVENT_SEND), and when the send completes + * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating + * that the operation was accepted by the target. The event LNET_EVENT_PUT is + * used at the target node to indicate the completion of incoming data + * delivery. + * + * The local events will be logged in the EQ associated with the MD pointed to + * by \a mdh handle. Using a MD without an associated EQ results in these + * events being discarded. In this case, the caller must have another + * mechanism (e.g., a higher level protocol) for determining when it is safe + * to modify the memory region associated with the MD. + * + * Note that LNet does not guarantee the order of LNET_EVENT_SEND and + * LNET_EVENT_ACK, though intuitively ACK should happen after SEND. + * + * \param self Indicates the NID of a local interface through which to send + * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself. + * \param mdh A handle for the MD that describes the memory to be sent. The MD + * must be "free floating" (See LNetMDBind()). + * \param ack Controls whether an acknowledgment is requested. + * Acknowledgments are only sent when they are requested by the initiating + * process and the target MD enables them. + * \param target A process identifier for the target process. + * \param portal The index in the \a target's portal table. + * \param match_bits The match bits to use for MD selection at the target + * process. + * \param offset The offset into the target MD (only used when the target + * MD has the LNET_MD_MANAGE_REMOTE option set). + * \param hdr_data 64 bits of user data that can be included in the message + * header. This data is written to an event queue entry at the target if an + * EQ is present on the matching MD. + * + * \retval 0 Success, and only in this case events will be generated + * and logged to EQ (if it exists). + * \retval -EIO Simulated failure. + * \retval -ENOMEM Memory allocation failure. + * \retval -ENOENT Invalid MD object. + * + * \see lnet_event_t::hdr_data and lnet_event_kind_t. + */ int LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, lnet_process_id_t target, unsigned int portal, @@ -2383,6 +2469,7 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, libcfs_id2str(target)); return -ENOMEM; } + msg->msg_vmflush = !!cfs_memory_pressure_get(); LNET_LOCK(); @@ -2447,7 +2534,7 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, rc = lnet_send(self, msg); if (rc != 0) { - CERROR("Error sending PUT to %s: %d\n", + CNETERR( "Error sending PUT to %s: %d\n", libcfs_id2str(target), rc); lnet_finalize (NULL, msg, rc); } @@ -2542,6 +2629,26 @@ lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len) reply->msg_ev.mlength = len; } +/** + * Initiate an asynchronous GET operation. + * + * On the initiator node, an LNET_EVENT_SEND is logged when the GET request + * is sent, and an LNET_EVENT_REPLY is logged when the data returned from + * the target node in the REPLY has been written to local MD. + * + * On the target node, an LNET_EVENT_GET is logged when the GET request + * arrives and is accepted into a MD. + * + * \param self,target,portal,match_bits,offset See the discussion in LNetPut(). + * \param mdh A handle for the MD that describes the memory into which the + * requested data will be received. The MD must be "free floating" (See LNetMDBind()). + * + * \retval 0 Success, and only in this case events will be generated + * and logged to EQ (if it exists) of the MD. + * \retval -EIO Simulated failure. + * \retval -ENOMEM Memory allocation failure. + * \retval -ENOENT Invalid MD object. + */ int LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, lnet_process_id_t target, unsigned int portal, @@ -2624,7 +2731,7 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, rc = lnet_send(self, msg); if (rc < 0) { - CERROR("error sending GET to %s: %d\n", + CNETERR( "Error sending GET to %s: %d\n", libcfs_id2str(target), rc); lnet_finalize (NULL, msg, rc); } @@ -2633,6 +2740,20 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, return 0; } +/** + * Calculate distance to node at \a dstnid. + * + * \param dstnid Target NID. + * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid + * is saved here. + * \param orderp If not NULL, order of the route to reach \a dstnid is saved + * here. + * + * \retval 0 If \a dstnid belongs to a local interface, and reserved option + * local_nid_dist_zero is set, which is the default. + * \retval positives Distance to target NID, i.e. number of hops plus one. + * \retval -EHOSTUNREACH If \a dstnid is not reachable. + */ int LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) { @@ -2714,6 +2835,23 @@ LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) return -EHOSTUNREACH; } +/** + * Set the number of asynchronous messages expected from a target process. + * + * This function is only meaningful for userspace callers. It's a no-op when + * called from kernel. + * + * Asynchronous messages are those that can come from a target when the + * userspace process is not waiting for IO to complete; e.g., AST callbacks + * from Lustre servers. Specifying the expected number of such messages + * allows them to be eagerly received when user process is not running in + * LNet; otherwise network errors may occur. + * + * \param id Process ID of the target process. + * \param nasync Number of asynchronous messages expected from the target. + * + * \return 0 on success, and an error code otherwise. + */ int LNetSetAsync(lnet_process_id_t id, int nasync) { @@ -2731,7 +2869,6 @@ LNetSetAsync(lnet_process_id_t id, int nasync) int rc2; /* Target on a local network? */ - ni = lnet_net2ni(LNET_NIDNET(id.nid)); if (ni != NULL) { if (ni->ni_lnd->lnd_setasync != NULL)