4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2011, 2012, Whamcloud, Inc.
27 * This file is part of Lustre, http://www.lustre.org/
28 * Lustre is a trademark of Sun Microsystems, Inc.
32 * portal & match routines
34 * Author: liang@whamcloud.com
37 #define DEBUG_SUBSYSTEM S_LNET
39 #include <lnet/lib-lnet.h>
42 lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
43 __u64 mbits, __u64 ignore_bits)
45 struct lnet_portal *ptl = the_lnet.ln_portals[index];
48 unique = ignore_bits == 0 &&
49 match_id.nid != LNET_NID_ANY &&
50 match_id.pid != LNET_PID_ANY;
52 LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
54 /* prefer to check w/o any lock */
55 if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
58 /* unset, new portal */
60 /* check again with lock */
61 if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
68 lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
70 lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
77 if ((lnet_ptl_is_unique(ptl) && !unique) ||
78 (lnet_ptl_is_wildcard(ptl) && unique))
84 lnet_try_match_md(lnet_libmd_t *md,
85 struct lnet_match_info *info, struct lnet_msg *msg)
87 /* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
88 * lnet_match_blocked_msg() relies on this to avoid races */
91 lnet_me_t *me = md->md_me;
93 /* mismatched MD op */
94 if ((md->md_options & info->mi_opc) == 0)
95 return LNET_MATCHMD_NONE;
98 if (lnet_md_exhausted(md))
99 return LNET_MATCHMD_NONE;
101 /* mismatched ME nid/pid? */
102 if (me->me_match_id.nid != LNET_NID_ANY &&
103 me->me_match_id.nid != info->mi_id.nid)
104 return LNET_MATCHMD_NONE;
106 if (me->me_match_id.pid != LNET_PID_ANY &&
107 me->me_match_id.pid != info->mi_id.pid)
108 return LNET_MATCHMD_NONE;
110 /* mismatched ME matchbits? */
111 if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
112 return LNET_MATCHMD_NONE;
114 /* Hurrah! This _is_ a match; check it out... */
116 if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
117 offset = md->md_offset;
119 offset = info->mi_roffset;
121 if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
122 mlength = md->md_max_size;
123 LASSERT(md->md_offset + mlength <= md->md_length);
125 mlength = md->md_length - offset;
128 if (info->mi_rlength <= mlength) { /* fits in allowed space */
129 mlength = info->mi_rlength;
130 } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
131 /* this packet _really_ is too big */
132 CERROR("Matching packet from %s, match "LPU64
133 " length %d too big: %d left, %d allowed\n",
134 libcfs_id2str(info->mi_id), info->mi_mbits,
135 info->mi_rlength, md->md_length - offset, mlength);
137 return LNET_MATCHMD_DROP;
140 /* Commit to this ME/MD */
141 CDEBUG(D_NET, "Incoming %s index %x from %s of "
142 "length %d/%d into md "LPX64" [%d] + %d\n",
143 (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
144 info->mi_portal, libcfs_id2str(info->mi_id), mlength,
145 info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
147 lnet_msg_attach_md(msg, md, offset, mlength);
148 md->md_offset = offset + mlength;
150 /* Auto-unlink NOW, so the ME gets unlinked if required.
151 * We bumped md->md_refcount above so the MD just gets flagged
152 * for unlink when it is finalized. */
153 if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
154 lnet_md_exhausted(md)) {
158 return LNET_MATCHMD_OK;
161 struct lnet_match_table *
162 lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
163 __u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
165 struct lnet_portal *ptl;
167 LASSERT(index < the_lnet.ln_nportals);
169 if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
172 ptl = the_lnet.ln_portals[index];
173 /* NB: Now we only have one match-table for each portal,
174 * and will have match-table per CPT in upcoming changes,
175 * ME will be scattered to different match-tables based
176 * on attaching information */
177 return ptl->ptl_mtable;
180 struct lnet_match_table *
181 lnet_mt_of_match(unsigned int index, lnet_process_id_t id, __u64 mbits)
183 struct lnet_portal *ptl;
185 LASSERT(index < the_lnet.ln_nportals);
187 ptl = the_lnet.ln_portals[index];
188 if (!lnet_ptl_is_unique(ptl) &&
189 !lnet_ptl_is_wildcard(ptl) && !lnet_ptl_is_lazy(ptl))
192 /* NB: Now we only have one match-table for each portal,
193 * and will have match-table per CPT in upcoming changes,
194 * request will be scattered to different match-tables based
195 * on matching information */
196 return ptl->ptl_mtable;
200 lnet_mt_match_head(struct lnet_match_table *mtable,
201 lnet_process_id_t id, __u64 mbits)
203 struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
205 if (lnet_ptl_is_wildcard(ptl)) {
206 return &mtable->mt_mlist;
208 } else if (lnet_ptl_is_unique(ptl)) {
209 unsigned long hash = mbits + id.nid + id.pid;
211 hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
212 return &mtable->mt_mhash[hash];
219 lnet_mt_match_md(struct lnet_match_table *mtable,
220 struct lnet_match_info *info, struct lnet_msg *msg)
227 head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
228 if (head == NULL) /* nobody posted anything on this portal */
231 cfs_list_for_each_entry_safe(me, tmp, head, me_list) {
232 /* ME attached but MD not attached yet */
233 if (me->me_md == NULL)
236 LASSERT(me == me->me_md->md_me);
238 rc = lnet_try_match_md(me->me_md, info, msg);
243 case LNET_MATCHMD_NONE:
246 case LNET_MATCHMD_OK:
247 return LNET_MATCHMD_OK;
249 case LNET_MATCHMD_DROP:
250 return LNET_MATCHMD_DROP;
256 if (info->mi_opc == LNET_MD_OP_GET ||
257 !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
258 return LNET_MATCHMD_DROP;
260 return LNET_MATCHMD_NONE;
264 lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
266 struct lnet_match_table *mtable;
267 struct lnet_portal *ptl;
270 CDEBUG(D_NET, "Request from %s of length %d into portal %d "
271 "MB="LPX64"\n", libcfs_id2str(info->mi_id),
272 info->mi_rlength, info->mi_portal, info->mi_mbits);
274 if (info->mi_portal >= the_lnet.ln_nportals) {
275 CERROR("Invalid portal %d not in [0-%d]\n",
276 info->mi_portal, the_lnet.ln_nportals);
277 return LNET_MATCHMD_DROP;
280 mtable = lnet_mt_of_match(info->mi_portal,
281 info->mi_id, info->mi_mbits);
282 if (mtable == NULL) {
283 CDEBUG(D_NET, "Drop early message from %s of length %d into "
284 "portal %d MB="LPX64"\n",
285 libcfs_id2str(info->mi_id), info->mi_rlength,
286 info->mi_portal, info->mi_mbits);
287 return LNET_MATCHMD_DROP;
290 ptl = the_lnet.ln_portals[info->mi_portal];
293 if (the_lnet.ln_shutdown) {
294 rc = LNET_MATCHMD_DROP;
298 rc = lnet_mt_match_md(mtable, info, msg);
299 if (rc != LNET_MATCHMD_NONE) /* matched or dropping */
302 if (!msg->msg_rx_ready_delay)
305 LASSERT(!msg->msg_rx_delayed);
306 msg->msg_rx_delayed = 1;
307 cfs_list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
310 "Delaying %s from %s portal %d MB "LPX64" offset %d len %d\n",
311 info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
312 libcfs_id2str(info->mi_id), info->mi_portal,
313 info->mi_mbits, info->mi_roffset, info->mi_rlength);
320 lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
322 LASSERT(me->me_md == md && md->md_me == me);
328 /* called with lnet_res_lock held */
330 lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
331 cfs_list_t *matches, cfs_list_t *drops)
333 struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
337 LASSERT(md->md_refcount == 0); /* a brand new MD */
342 cfs_list_for_each_entry_safe(msg, tmp, &ptl->ptl_msgq, msg_list) {
343 struct lnet_match_info info;
347 LASSERT(msg->msg_rx_delayed);
350 info.mi_id.nid = hdr->src_nid;
351 info.mi_id.pid = hdr->src_pid;
352 info.mi_opc = LNET_MD_OP_PUT;
353 info.mi_portal = hdr->msg.put.ptl_index;
354 info.mi_rlength = hdr->payload_length;
355 info.mi_roffset = hdr->msg.put.offset;
356 info.mi_mbits = hdr->msg.put.match_bits;
358 rc = lnet_try_match_md(md, &info, msg);
360 if (rc == LNET_MATCHMD_NONE)
363 /* Hurrah! This _is_ a match */
364 cfs_list_del(&msg->msg_list);
366 if (rc == LNET_MATCHMD_OK) {
367 cfs_list_add_tail(&msg->msg_list, matches);
369 CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
370 "match "LPU64" offset %d length %d.\n",
371 libcfs_id2str(info.mi_id),
372 info.mi_portal, info.mi_mbits,
373 info.mi_roffset, info.mi_rlength);
375 LASSERT(rc == LNET_MATCHMD_DROP);
377 cfs_list_add_tail(&msg->msg_list, drops);
380 if (lnet_md_exhausted(md))
386 lnet_ptl_cleanup(struct lnet_portal *ptl)
388 struct lnet_match_table *mtable;
390 LASSERT(cfs_list_empty(&ptl->ptl_msgq));
392 if (ptl->ptl_mtable == NULL) /* uninitialized portal */
395 do { /* iterate over match-tables when we have percpt match-table */
400 mtable = ptl->ptl_mtable;
402 if (mtable->mt_mhash == NULL) /* uninitialized match-table */
405 mhash = mtable->mt_mhash;
407 while (!cfs_list_empty(&mtable->mt_mlist)) {
408 me = cfs_list_entry(mtable->mt_mlist.next,
410 CERROR("Active wildcard ME %p on exit\n", me);
411 cfs_list_del(&me->me_list);
415 for (j = 0; j < LNET_MT_HASH_SIZE; j++) {
416 while (!cfs_list_empty(&mhash[j])) {
417 me = cfs_list_entry(mhash[j].next,
419 CERROR("Active unique ME %p on exit\n", me);
420 cfs_list_del(&me->me_list);
425 LIBCFS_FREE(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
428 LIBCFS_FREE(ptl->ptl_mtable, sizeof(*mtable));
429 ptl->ptl_mtable = NULL;
433 lnet_ptl_setup(struct lnet_portal *ptl, int index)
435 struct lnet_match_table *mtable;
439 ptl->ptl_index = index;
440 CFS_INIT_LIST_HEAD(&ptl->ptl_msgq);
442 LIBCFS_ALLOC(mtable, sizeof(*mtable));
443 if (mtable == NULL) {
444 CERROR("Failed to create match table for portal %d\n", index);
448 ptl->ptl_mtable = mtable;
449 do { /* iterate over match-tables when we have percpt match-table */
450 LIBCFS_ALLOC(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
452 CERROR("Failed to create match hash for portal %d\n",
457 mtable->mt_mhash = mhash;
458 for (j = 0; j < LNET_MT_HASH_SIZE; j++)
459 CFS_INIT_LIST_HEAD(&mhash[j]);
461 CFS_INIT_LIST_HEAD(&mtable->mt_mlist);
462 mtable->mt_portal = index;
467 lnet_ptl_cleanup(ptl);
472 lnet_portals_destroy(void)
476 if (the_lnet.ln_portals == NULL)
479 for (i = 0; i < the_lnet.ln_nportals; i++)
480 lnet_ptl_cleanup(the_lnet.ln_portals[i]);
482 cfs_array_free(the_lnet.ln_portals);
483 the_lnet.ln_portals = NULL;
487 lnet_portals_create(void)
492 size = sizeof(struct lnet_portal);
494 the_lnet.ln_nportals = MAX_PORTALS;
495 the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size);
496 if (the_lnet.ln_portals == NULL) {
497 CERROR("Failed to allocate portals table\n");
501 for (i = 0; i < the_lnet.ln_nportals; i++) {
502 if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
503 lnet_portals_destroy();
512 * Turn on the lazy portal attribute. Use with caution!
514 * This portal attribute only affects incoming PUT requests to the portal,
515 * and is off by default. By default, if there's no matching MD for an
516 * incoming PUT request, it is simply dropped. With the lazy attribute on,
517 * such requests are queued indefinitely until either a matching MD is
518 * posted to the portal or the lazy attribute is turned off.
520 * It would prevent dropped requests, however it should be regarded as the
521 * last line of defense - i.e. users must keep a close watch on active
522 * buffers on a lazy portal and once it becomes too low post more buffers as
523 * soon as possible. This is because delayed requests usually have detrimental
524 * effects on underlying network connections. A few delayed requests often
525 * suffice to bring an underlying connection to a complete halt, due to flow
526 * control mechanisms.
528 * There's also a DOS attack risk. If users don't post match-all MDs on a
529 * lazy portal, a malicious peer can easily stop a service by sending some
530 * PUT requests with match bits that won't match any MD. A routed server is
531 * especially vulnerable since the connections to its neighbor routers are
532 * shared among all clients.
534 * \param portal Index of the portal to enable the lazy attribute on.
536 * \retval 0 On success.
537 * \retval -EINVAL If \a portal is not a valid index.
540 LNetSetLazyPortal(int portal)
542 struct lnet_portal *ptl;
544 if (portal < 0 || portal >= the_lnet.ln_nportals)
547 CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
548 ptl = the_lnet.ln_portals[portal];
551 lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
558 * Turn off the lazy portal attribute. Delayed requests on the portal,
559 * if any, will be all dropped when this function returns.
561 * \param portal Index of the portal to disable the lazy attribute on.
563 * \retval 0 On success.
564 * \retval -EINVAL If \a portal is not a valid index.
567 LNetClearLazyPortal(int portal)
569 struct lnet_portal *ptl;
570 CFS_LIST_HEAD (zombies);
572 if (portal < 0 || portal >= the_lnet.ln_nportals)
575 ptl = the_lnet.ln_portals[portal];
579 if (!lnet_ptl_is_lazy(ptl)) {
584 if (the_lnet.ln_shutdown)
585 CWARN("Active lazy portal %d on exit\n", portal);
587 CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
589 /* grab all the blocked messages atomically */
590 cfs_list_splice_init(&ptl->ptl_msgq, &zombies);
592 lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
596 lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");