4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2011, 2012, Whamcloud, Inc.
27 * This file is part of Lustre, http://www.lustre.org/
28 * Lustre is a trademark of Sun Microsystems, Inc.
32 * portal & match routines
34 * Author: liang@whamcloud.com
37 #define DEBUG_SUBSYSTEM S_LNET
39 #include <lnet/lib-lnet.h>
42 lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
43 __u64 mbits, __u64 ignore_bits)
45 struct lnet_portal *ptl = the_lnet.ln_portals[index];
48 unique = ignore_bits == 0 &&
49 match_id.nid != LNET_NID_ANY &&
50 match_id.pid != LNET_PID_ANY;
52 LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
54 /* prefer to check w/o any lock */
55 if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
58 /* unset, new portal */
60 /* check again with lock */
61 if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
68 lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
70 lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
77 if ((lnet_ptl_is_unique(ptl) && !unique) ||
78 (lnet_ptl_is_wildcard(ptl) && unique))
84 lnet_try_match_md(int index, int op_mask, lnet_process_id_t src,
85 unsigned int rlength, unsigned int roffset,
86 __u64 match_bits, lnet_libmd_t *md, lnet_msg_t *msg)
88 /* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
89 * lnet_match_blocked_msg() relies on this to avoid races */
92 lnet_me_t *me = md->md_me;
94 /* mismatched MD op */
95 if ((md->md_options & op_mask) == 0)
96 return LNET_MATCHMD_NONE;
99 if (lnet_md_exhausted(md))
100 return LNET_MATCHMD_NONE;
102 /* mismatched ME nid/pid? */
103 if (me->me_match_id.nid != LNET_NID_ANY &&
104 me->me_match_id.nid != src.nid)
105 return LNET_MATCHMD_NONE;
107 if (me->me_match_id.pid != LNET_PID_ANY &&
108 me->me_match_id.pid != src.pid)
109 return LNET_MATCHMD_NONE;
111 /* mismatched ME matchbits? */
112 if (((me->me_match_bits ^ match_bits) & ~me->me_ignore_bits) != 0)
113 return LNET_MATCHMD_NONE;
115 /* Hurrah! This _is_ a match; check it out... */
117 if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
118 offset = md->md_offset;
122 if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
123 mlength = md->md_max_size;
124 LASSERT(md->md_offset + mlength <= md->md_length);
126 mlength = md->md_length - offset;
129 if (rlength <= mlength) { /* fits in allowed space */
131 } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
132 /* this packet _really_ is too big */
133 CERROR("Matching packet from %s, match "LPU64
134 " length %d too big: %d left, %d allowed\n",
135 libcfs_id2str(src), match_bits, rlength,
136 md->md_length - offset, mlength);
138 return LNET_MATCHMD_DROP;
141 /* Commit to this ME/MD */
142 CDEBUG(D_NET, "Incoming %s index %x from %s of "
143 "length %d/%d into md "LPX64" [%d] + %d\n",
144 (op_mask == LNET_MD_OP_PUT) ? "put" : "get",
145 index, libcfs_id2str(src), mlength, rlength,
146 md->md_lh.lh_cookie, md->md_niov, offset);
148 lnet_msg_attach_md(msg, md, offset, mlength);
149 md->md_offset = offset + mlength;
151 /* Auto-unlink NOW, so the ME gets unlinked if required.
152 * We bumped md->md_refcount above so the MD just gets flagged
153 * for unlink when it is finalized. */
154 if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
155 lnet_md_exhausted(md)) {
159 return LNET_MATCHMD_OK;
162 struct lnet_match_table *
163 lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
164 __u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
166 struct lnet_portal *ptl;
168 LASSERT(index < the_lnet.ln_nportals);
170 if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
173 ptl = the_lnet.ln_portals[index];
174 /* NB: Now we only have one match-table for each portal,
175 * and will have match-table per CPT in upcoming changes,
176 * ME will be scattered to different match-tables based
177 * on attaching information */
178 return ptl->ptl_mtable;
181 struct lnet_match_table *
182 lnet_mt_of_match(unsigned int index, lnet_process_id_t id, __u64 mbits)
184 struct lnet_portal *ptl;
186 LASSERT(index < the_lnet.ln_nportals);
188 ptl = the_lnet.ln_portals[index];
189 if (!lnet_ptl_is_unique(ptl) &&
190 !lnet_ptl_is_wildcard(ptl) && !lnet_ptl_is_lazy(ptl))
193 /* NB: Now we only have one match-table for each portal,
194 * and will have match-table per CPT in upcoming changes,
195 * request will be scattered to different match-tables based
196 * on matching information */
197 return ptl->ptl_mtable;
201 lnet_mt_match_head(struct lnet_match_table *mtable,
202 lnet_process_id_t id, __u64 mbits)
204 struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
206 if (lnet_ptl_is_wildcard(ptl)) {
207 return &mtable->mt_mlist;
209 } else if (lnet_ptl_is_unique(ptl)) {
210 unsigned long hash = mbits + id.nid + id.pid;
212 hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
213 return &mtable->mt_mhash[hash];
220 lnet_mt_match_md(struct lnet_match_table *mtable,
221 int op_mask, lnet_process_id_t src,
222 unsigned int rlength, unsigned int roffset,
223 __u64 match_bits, lnet_msg_t *msg)
230 head = lnet_mt_match_head(mtable, src, match_bits);
231 if (head == NULL) /* nobody posted anything on this portal */
234 cfs_list_for_each_entry_safe(me, tmp, head, me_list) {
235 /* ME attached but MD not attached yet */
236 if (me->me_md == NULL)
239 LASSERT(me == me->me_md->md_me);
241 rc = lnet_try_match_md(mtable->mt_portal,
242 op_mask, src, rlength, roffset,
243 match_bits, me->me_md, msg);
248 case LNET_MATCHMD_NONE:
251 case LNET_MATCHMD_OK:
252 return LNET_MATCHMD_OK;
254 case LNET_MATCHMD_DROP:
255 return LNET_MATCHMD_DROP;
261 if (op_mask == LNET_MD_OP_GET ||
262 !lnet_ptl_is_lazy(the_lnet.ln_portals[mtable->mt_portal]))
263 return LNET_MATCHMD_DROP;
265 return LNET_MATCHMD_NONE;
269 lnet_ptl_match_md(unsigned int index, int op_mask, lnet_process_id_t src,
270 unsigned int rlength, unsigned int roffset,
271 __u64 match_bits, lnet_msg_t *msg)
273 struct lnet_match_table *mtable;
274 struct lnet_portal *ptl;
277 CDEBUG(D_NET, "Request from %s of length %d into portal %d "
278 "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits);
280 if (index >= the_lnet.ln_nportals) {
281 CERROR("Invalid portal %d not in [0-%d]\n",
282 index, the_lnet.ln_nportals);
283 return LNET_MATCHMD_DROP;
286 mtable = lnet_mt_of_match(index, src, match_bits);
287 if (mtable == NULL) {
288 CDEBUG(D_NET, "Drop early message from %s of length %d into "
289 "portal %d MB="LPX64"\n",
290 libcfs_id2str(src), rlength, index, match_bits);
291 return LNET_MATCHMD_DROP;
294 ptl = the_lnet.ln_portals[index];
297 if (the_lnet.ln_shutdown) {
298 rc = LNET_MATCHMD_DROP;
302 rc = lnet_mt_match_md(mtable, op_mask, src, rlength,
303 roffset, match_bits, msg);
304 if (rc != LNET_MATCHMD_NONE) /* matched or dropping */
307 if (!msg->msg_rx_ready_delay)
310 LASSERT(!msg->msg_rx_delayed);
311 msg->msg_rx_delayed = 1;
312 cfs_list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
315 "Delaying %s from %s portal %d MB "LPX64" offset %d len %d\n",
316 op_mask == LNET_MD_OP_PUT ? "PUT" : "GET",
317 libcfs_id2str(src), index, match_bits, roffset, rlength);
324 lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
326 LASSERT(me->me_md == md && md->md_me == me);
332 /* called with lnet_res_lock held */
334 lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
335 cfs_list_t *matches, cfs_list_t *drops)
337 struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
341 LASSERT(md->md_refcount == 0); /* a brand new MD */
346 cfs_list_for_each_entry_safe(msg, tmp, &ptl->ptl_msgq, msg_list) {
350 lnet_process_id_t src;
352 LASSERT(msg->msg_rx_delayed);
355 index = hdr->msg.put.ptl_index;
357 src.nid = hdr->src_nid;
358 src.pid = hdr->src_pid;
360 rc = lnet_try_match_md(index, LNET_MD_OP_PUT, src,
363 hdr->msg.put.match_bits, md, msg);
365 if (rc == LNET_MATCHMD_NONE)
368 /* Hurrah! This _is_ a match */
369 cfs_list_del(&msg->msg_list);
371 if (rc == LNET_MATCHMD_OK) {
372 cfs_list_add_tail(&msg->msg_list, matches);
374 CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
375 "match "LPU64" offset %d length %d.\n",
377 hdr->msg.put.ptl_index,
378 hdr->msg.put.match_bits,
380 hdr->payload_length);
382 LASSERT(rc == LNET_MATCHMD_DROP);
384 cfs_list_add_tail(&msg->msg_list, drops);
387 if (lnet_md_exhausted(md))
393 lnet_ptl_cleanup(struct lnet_portal *ptl)
395 struct lnet_match_table *mtable;
397 LASSERT(cfs_list_empty(&ptl->ptl_msgq));
399 if (ptl->ptl_mtable == NULL) /* uninitialized portal */
402 do { /* iterate over match-tables when we have percpt match-table */
407 mtable = ptl->ptl_mtable;
409 if (mtable->mt_mhash == NULL) /* uninitialized match-table */
412 mhash = mtable->mt_mhash;
414 while (!cfs_list_empty(&mtable->mt_mlist)) {
415 me = cfs_list_entry(mtable->mt_mlist.next,
417 CERROR("Active wildcard ME %p on exit\n", me);
418 cfs_list_del(&me->me_list);
422 for (j = 0; j < LNET_MT_HASH_SIZE; j++) {
423 while (!cfs_list_empty(&mhash[j])) {
424 me = cfs_list_entry(mhash[j].next,
426 CERROR("Active unique ME %p on exit\n", me);
427 cfs_list_del(&me->me_list);
432 LIBCFS_FREE(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
435 LIBCFS_FREE(ptl->ptl_mtable, sizeof(*mtable));
436 ptl->ptl_mtable = NULL;
440 lnet_ptl_setup(struct lnet_portal *ptl, int index)
442 struct lnet_match_table *mtable;
446 ptl->ptl_index = index;
447 CFS_INIT_LIST_HEAD(&ptl->ptl_msgq);
449 LIBCFS_ALLOC(mtable, sizeof(*mtable));
450 if (mtable == NULL) {
451 CERROR("Failed to create match table for portal %d\n", index);
455 ptl->ptl_mtable = mtable;
456 do { /* iterate over match-tables when we have percpt match-table */
457 LIBCFS_ALLOC(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
459 CERROR("Failed to create match hash for portal %d\n",
464 mtable->mt_mhash = mhash;
465 for (j = 0; j < LNET_MT_HASH_SIZE; j++)
466 CFS_INIT_LIST_HEAD(&mhash[j]);
468 CFS_INIT_LIST_HEAD(&mtable->mt_mlist);
469 mtable->mt_portal = index;
474 lnet_ptl_cleanup(ptl);
479 lnet_portals_destroy(void)
483 if (the_lnet.ln_portals == NULL)
486 for (i = 0; i < the_lnet.ln_nportals; i++)
487 lnet_ptl_cleanup(the_lnet.ln_portals[i]);
489 cfs_array_free(the_lnet.ln_portals);
490 the_lnet.ln_portals = NULL;
494 lnet_portals_create(void)
499 size = sizeof(struct lnet_portal);
501 the_lnet.ln_nportals = MAX_PORTALS;
502 the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size);
503 if (the_lnet.ln_portals == NULL) {
504 CERROR("Failed to allocate portals table\n");
508 for (i = 0; i < the_lnet.ln_nportals; i++) {
509 if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
510 lnet_portals_destroy();
519 * Turn on the lazy portal attribute. Use with caution!
521 * This portal attribute only affects incoming PUT requests to the portal,
522 * and is off by default. By default, if there's no matching MD for an
523 * incoming PUT request, it is simply dropped. With the lazy attribute on,
524 * such requests are queued indefinitely until either a matching MD is
525 * posted to the portal or the lazy attribute is turned off.
527 * It would prevent dropped requests, however it should be regarded as the
528 * last line of defense - i.e. users must keep a close watch on active
529 * buffers on a lazy portal and once it becomes too low post more buffers as
530 * soon as possible. This is because delayed requests usually have detrimental
531 * effects on underlying network connections. A few delayed requests often
532 * suffice to bring an underlying connection to a complete halt, due to flow
533 * control mechanisms.
535 * There's also a DOS attack risk. If users don't post match-all MDs on a
536 * lazy portal, a malicious peer can easily stop a service by sending some
537 * PUT requests with match bits that won't match any MD. A routed server is
538 * especially vulnerable since the connections to its neighbor routers are
539 * shared among all clients.
541 * \param portal Index of the portal to enable the lazy attribute on.
543 * \retval 0 On success.
544 * \retval -EINVAL If \a portal is not a valid index.
547 LNetSetLazyPortal(int portal)
549 struct lnet_portal *ptl;
551 if (portal < 0 || portal >= the_lnet.ln_nportals)
554 CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
555 ptl = the_lnet.ln_portals[portal];
558 lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
565 * Turn off the lazy portal attribute. Delayed requests on the portal,
566 * if any, will be all dropped when this function returns.
568 * \param portal Index of the portal to disable the lazy attribute on.
570 * \retval 0 On success.
571 * \retval -EINVAL If \a portal is not a valid index.
574 LNetClearLazyPortal(int portal)
576 struct lnet_portal *ptl;
577 CFS_LIST_HEAD (zombies);
579 if (portal < 0 || portal >= the_lnet.ln_nportals)
582 ptl = the_lnet.ln_portals[portal];
586 if (!lnet_ptl_is_lazy(ptl)) {
591 if (the_lnet.ln_shutdown)
592 CWARN("Active lazy portal %d on exit\n", portal);
594 CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
596 /* grab all the blocked messages atomically */
597 cfs_list_splice_init(&ptl->ptl_msgq, &zombies);
599 lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
603 lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");