Whamcloud - gitweb
LU-7734 lnet: fix routing selection
[fs/lustre-release.git] / lnet / lnet / lib-ptl.c
index b55e993..0b31878 100644 (file)
@@ -21,7 +21,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2012, 2014, Intel Corporation.
+ * Copyright (c) 2012, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -39,9 +39,9 @@
 #include <lnet/lib-lnet.h>
 
 /* NB: add /proc interfaces in upcoming patches */
-int    portal_rotor    = LNET_PTL_ROTOR_HASH_RT;
-CFS_MODULE_PARM(portal_rotor, "i", int, 0644,
-               "redirect PUTs to different cpu-partitions");
+int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
+module_param(portal_rotor, int, 0644);
+MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
 
 static int
 lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
@@ -184,7 +184,7 @@ lnet_try_match_md(lnet_libmd_t *md,
                mlength = info->mi_rlength;
        } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
                /* this packet _really_ is too big */
-               CERROR("Matching packet from %s, match "LPU64
+               CERROR("Matching packet from %s, match %llu"
                       " length %d too big: %d left, %d allowed\n",
                       libcfs_id2str(info->mi_id), info->mi_mbits,
                       info->mi_rlength, md->md_length - offset, mlength);
@@ -194,7 +194,7 @@ lnet_try_match_md(lnet_libmd_t *md,
 
        /* Commit to this ME/MD */
        CDEBUG(D_NET, "Incoming %s index %x from %s of "
-              "length %d/%d into md "LPX64" [%d] + %d\n",
+              "length %d/%d into md %#llx [%d] + %d\n",
               (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
               info->mi_portal, libcfs_id2str(info->mi_id), mlength,
               info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
@@ -222,7 +222,7 @@ lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
 
        /* if it's a unique portal, return match-table hashed by NID */
        return lnet_ptl_is_unique(ptl) ?
-              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL;
 }
 
 struct lnet_match_table *
@@ -292,7 +292,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
 
        rotor = ptl->ptl_rotor++; /* get round-robin factor */
        if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
-               cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+               cpt = info->mi_cpt;
        else
                cpt = rotor % LNET_CPT_NUMBER;
 
@@ -367,7 +367,7 @@ lnet_mt_match_head(struct lnet_match_table *mtable,
 
                LASSERT(lnet_ptl_is_unique(ptl));
                hash = hash_long(hash, LNET_MT_HASH_BITS);
-               return &mtable->mt_mhash[hash];
+               return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
        }
 }
 
@@ -467,9 +467,13 @@ lnet_ptl_match_delay(struct lnet_portal *ptl,
        int     rc = 0;
        int     i;
 
-       /* steal buffer from other CPTs, and delay it if nothing to steal,
-        * this function is more expensive than a regular match, but we
-        * don't expect it can happen a lot */
+       /*
+        * Steal buffer from other CPTs, and delay msg if nothing to
+        * steal.  This function is more expensive than a regular
+        * match, but we don't expect it can happen a lot. The return
+        * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
+        * LNET_MATCHMD_NONE.
+        */
        LASSERT(lnet_ptl_is_wildcard(ptl));
 
        for (i = 0; i < LNET_CPT_NUMBER; i++) {
@@ -484,50 +488,71 @@ lnet_ptl_match_delay(struct lnet_portal *ptl,
                lnet_res_lock(cpt);
                lnet_ptl_lock(ptl);
 
-               if (i == 0) { /* the first try, attach on stealing list */
+               if (i == 0) {
+                       /* The first try, add to stealing list. */
                        list_add_tail(&msg->msg_list,
                                      &ptl->ptl_msg_stealing);
                }
 
-               if (!list_empty(&msg->msg_list)) { /* on stealing list */
+               if (!list_empty(&msg->msg_list)) {
+                       /* On stealing list. */
                        rc = lnet_mt_match_md(mtable, info, msg);
 
                        if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 &&
                            mtable->mt_enabled)
                                lnet_ptl_disable_mt(ptl, cpt);
 
-                       if ((rc & LNET_MATCHMD_FINISH) != 0)
+                       if ((rc & LNET_MATCHMD_FINISH) != 0) {
+                               /* Match found, remove from stealing list. */
+                               list_del_init(&msg->msg_list);
+                       } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */
+                                  ptl->ptl_mt_nmaps == 0 ||   /* (2) */
+                                  (ptl->ptl_mt_nmaps == 1 &&  /* (3) */
+                                   ptl->ptl_mt_maps[0] == cpt)) {
+                               /*
+                                * No match found, and this is either
+                                * (1) the last cpt to check, or
+                                * (2) there is no active cpt, or
+                                * (3) this is the only active cpt.
+                                * There is nothing to steal: delay or
+                                * drop the message.
+                                */
                                list_del_init(&msg->msg_list);
 
-               } else {
-                       /* could be matched by lnet_ptl_attach_md()
-                        * which is called by another thread */
-                       rc = msg->msg_md == NULL ?
-                            LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
-               }
-
-               if (!list_empty(&msg->msg_list) && /* not matched yet */
-                   (i == LNET_CPT_NUMBER - 1 || /* the last CPT */
-                    ptl->ptl_mt_nmaps == 0 ||   /* no active CPT */
-                    (ptl->ptl_mt_nmaps == 1 &&  /* the only active CPT */
-                     ptl->ptl_mt_maps[0] == cpt))) {
-                       /* nothing to steal, delay or drop */
-                       list_del_init(&msg->msg_list);
-
-                       if (lnet_ptl_is_lazy(ptl)) {
-                               msg->msg_rx_delayed = 1;
-                               list_add_tail(&msg->msg_list,
-                                             &ptl->ptl_msg_delayed);
-                               rc = LNET_MATCHMD_NONE;
+                               if (lnet_ptl_is_lazy(ptl)) {
+                                       msg->msg_rx_delayed = 1;
+                                       list_add_tail(&msg->msg_list,
+                                                     &ptl->ptl_msg_delayed);
+                                       rc = LNET_MATCHMD_NONE;
+                               } else {
+                                       rc = LNET_MATCHMD_DROP;
+                               }
                        } else {
-                               rc = LNET_MATCHMD_DROP;
+                               /* Do another iteration. */
+                               rc = 0;
                        }
+               } else {
+                       /*
+                        * No longer on stealing list: another thread
+                        * matched the message in lnet_ptl_attach_md().
+                        * We are now expected to handle the message.
+                        */
+                       rc = msg->msg_md == NULL ?
+                               LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
                }
 
                lnet_ptl_unlock(ptl);
                lnet_res_unlock(cpt);
 
-               if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed)
+               /*
+                * Note that test (1) above ensures that we always
+                * exit the loop through this break statement.
+                *
+                * LNET_MATCHMD_NONE means msg was added to the
+                * delayed queue, and we may no longer reference it
+                * after lnet_ptl_unlock() and lnet_res_unlock().
+                */
+               if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
                        break;
        }
 
@@ -542,7 +567,7 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
        int                     rc;
 
        CDEBUG(D_NET, "Request from %s of length %d into portal %d "
-              "MB="LPX64"\n", libcfs_id2str(info->mi_id),
+              "MB=%#llx\n", libcfs_id2str(info->mi_id),
               info->mi_rlength, info->mi_portal, info->mi_mbits);
 
        if (info->mi_portal >= the_lnet.ln_nportals) {
@@ -589,15 +614,16 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
 
                lnet_ptl_unlock(ptl);
                lnet_res_unlock(mtable->mt_cpt);
-
-       } else  {
+               rc = LNET_MATCHMD_NONE;
+       } else  {
                lnet_res_unlock(mtable->mt_cpt);
                rc = lnet_ptl_match_delay(ptl, info, msg);
        }
 
-       if (msg->msg_rx_delayed) {
+       /* LNET_MATCHMD_NONE means msg was added to the delay queue */
+       if (rc & LNET_MATCHMD_NONE) {
                CDEBUG(D_NET,
-                      "Delaying %s from %s ptl %d MB "LPX64" off %d len %d\n",
+                      "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
                       info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
                       libcfs_id2str(info->mi_id), info->mi_portal,
                       info->mi_mbits, info->mi_roffset, info->mi_rlength);
@@ -656,7 +682,8 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
                LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
 
                hdr   = &msg->msg_hdr;
-               info.mi_id.nid  = hdr->src_nid;
+               /* Multi-Rail: Primary peer NID */
+               info.mi_id.nid  = msg->msg_initiator;
                info.mi_id.pid  = hdr->src_pid;
                info.mi_opc     = LNET_MD_OP_PUT;
                info.mi_portal  = hdr->msg.put.ptl_index;
@@ -688,7 +715,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
                        list_add_tail(&msg->msg_list, matches);
 
                        CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
-                              "match "LPU64" offset %d length %d.\n",
+                              "match %llu offset %d length %d.\n",
                               libcfs_id2str(info.mi_id),
                               info.mi_portal, info.mi_mbits,
                               info.mi_roffset, info.mi_rlength);
@@ -863,7 +890,7 @@ lnet_portals_create(void)
  *
  * \param portal Index of the portal to enable the lazy attribute on.
  *
- * \retval 0       On success.
+ * \retval 0      On success.
  * \retval -EINVAL If \a portal is not a valid index.
  */
 int
@@ -915,7 +942,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
                /* grab all messages which are on the NI passed in */
                list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
                                         msg_list) {
-                       if (msg->msg_rxpeer->lp_ni == ni)
+                       if (msg->msg_txni == ni || msg->msg_rxni == ni)
                                list_move(&msg->msg_list, &zombies);
                }
        } else {
@@ -944,7 +971,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
  *
  * \param portal Index of the portal to disable the lazy attribute on.
  *
- * \retval 0       On success.
+ * \retval 0      On success.
  * \retval -EINVAL If \a portal is not a valid index.
  */
 int