Whamcloud - gitweb
LU-7734 lnet: fix routing selection
[fs/lustre-release.git] / lnet / lnet / lib-ptl.c
index c8378d8..0b31878 100644 (file)
@@ -21,7 +21,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -39,9 +39,9 @@
 #include <lnet/lib-lnet.h>
 
 /* NB: add /proc interfaces in upcoming patches */
-int    portal_rotor    = LNET_PTL_ROTOR_HASH_RT;
-CFS_MODULE_PARM(portal_rotor, "i", int, 0644,
-               "redirect PUTs to different cpu-partitions");
+int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
+module_param(portal_rotor, int, 0644);
+MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions");
 
 static int
 lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
@@ -184,7 +184,7 @@ lnet_try_match_md(lnet_libmd_t *md,
                mlength = info->mi_rlength;
        } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
                /* this packet _really_ is too big */
-               CERROR("Matching packet from %s, match "LPU64
+               CERROR("Matching packet from %s, match %llu"
                       " length %d too big: %d left, %d allowed\n",
                       libcfs_id2str(info->mi_id), info->mi_mbits,
                       info->mi_rlength, md->md_length - offset, mlength);
@@ -194,7 +194,7 @@ lnet_try_match_md(lnet_libmd_t *md,
 
        /* Commit to this ME/MD */
        CDEBUG(D_NET, "Incoming %s index %x from %s of "
-              "length %d/%d into md "LPX64" [%d] + %d\n",
+              "length %d/%d into md %#llx [%d] + %d\n",
               (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
               info->mi_portal, libcfs_id2str(info->mi_id), mlength,
               info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
@@ -222,7 +222,7 @@ lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
 
        /* if it's a unique portal, return match-table hashed by NID */
        return lnet_ptl_is_unique(ptl) ?
-              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL;
 }
 
 struct lnet_match_table *
@@ -264,10 +264,10 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
 {
        struct lnet_match_table *mtable;
        struct lnet_portal      *ptl;
-       int                     nmaps;
-       int                     rotor;
-       int                     routed;
-       int                     cpt;
+       unsigned int            nmaps;
+       unsigned int            rotor;
+       unsigned int            cpt;
+       bool                    routed;
 
        /* NB: called w/o lock */
        LASSERT(info->mi_portal < the_lnet.ln_nportals);
@@ -292,7 +292,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
 
        rotor = ptl->ptl_rotor++; /* get round-robin factor */
        if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
-               cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+               cpt = info->mi_cpt;
        else
                cpt = rotor % LNET_CPT_NUMBER;
 
@@ -354,7 +354,7 @@ lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
                *bmap |= 1ULL << pos;
 }
 
-cfs_list_t *
+struct list_head *
 lnet_mt_match_head(struct lnet_match_table *mtable,
                   lnet_process_id_t id, __u64 mbits)
 {
@@ -366,8 +366,8 @@ lnet_mt_match_head(struct lnet_match_table *mtable,
                unsigned long hash = mbits + id.nid + id.pid;
 
                LASSERT(lnet_ptl_is_unique(ptl));
-               hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
-               return &mtable->mt_mhash[hash];
+               hash = hash_long(hash, LNET_MT_HASH_BITS);
+               return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK];
        }
 }
 
@@ -375,14 +375,14 @@ int
 lnet_mt_match_md(struct lnet_match_table *mtable,
                 struct lnet_match_info *info, struct lnet_msg *msg)
 {
-       cfs_list_t              *head;
+       struct list_head        *head;
        lnet_me_t               *me;
        lnet_me_t               *tmp;
        int                     exhausted = 0;
        int                     rc;
 
        /* any ME with ignore bits? */
-       if (!cfs_list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
+       if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
                head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
        else
                head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
@@ -391,7 +391,7 @@ lnet_mt_match_md(struct lnet_match_table *mtable,
        if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
                exhausted = LNET_MATCHMD_EXHAUSTED;
 
-       cfs_list_for_each_entry_safe(me, tmp, head, me_list) {
+       list_for_each_entry_safe(me, tmp, head, me_list) {
                /* ME attached but MD not attached yet */
                if (me->me_md == NULL)
                        continue;
@@ -447,8 +447,8 @@ lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
        if (lnet_ptl_is_lazy(ptl)) {
                if (msg->msg_rx_ready_delay) {
                        msg->msg_rx_delayed = 1;
-                       cfs_list_add_tail(&msg->msg_list,
-                                         &ptl->ptl_msg_delayed);
+                       list_add_tail(&msg->msg_list,
+                                     &ptl->ptl_msg_delayed);
                }
                rc = LNET_MATCHMD_NONE;
        } else {
@@ -467,9 +467,13 @@ lnet_ptl_match_delay(struct lnet_portal *ptl,
        int     rc = 0;
        int     i;
 
-       /* steal buffer from other CPTs, and delay it if nothing to steal,
-        * this function is more expensive than a regular match, but we
-        * don't expect it can happen a lot */
+       /*
+        * Steal buffer from other CPTs, and delay msg if nothing to
+        * steal.  This function is more expensive than a regular
+        * match, but we don't expect it can happen a lot. The return
+        * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or
+        * LNET_MATCHMD_NONE.
+        */
        LASSERT(lnet_ptl_is_wildcard(ptl));
 
        for (i = 0; i < LNET_CPT_NUMBER; i++) {
@@ -484,50 +488,71 @@ lnet_ptl_match_delay(struct lnet_portal *ptl,
                lnet_res_lock(cpt);
                lnet_ptl_lock(ptl);
 
-               if (i == 0) { /* the first try, attach on stealing list */
-                       cfs_list_add_tail(&msg->msg_list,
-                                         &ptl->ptl_msg_stealing);
+               if (i == 0) {
+                       /* The first try, add to stealing list. */
+                       list_add_tail(&msg->msg_list,
+                                     &ptl->ptl_msg_stealing);
                }
 
-               if (!cfs_list_empty(&msg->msg_list)) { /* on stealing list */
+               if (!list_empty(&msg->msg_list)) {
+                       /* On stealing list. */
                        rc = lnet_mt_match_md(mtable, info, msg);
 
                        if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 &&
                            mtable->mt_enabled)
                                lnet_ptl_disable_mt(ptl, cpt);
 
-                       if ((rc & LNET_MATCHMD_FINISH) != 0)
-                               cfs_list_del_init(&msg->msg_list);
-
-               } else {
-                       /* could be matched by lnet_ptl_attach_md()
-                        * which is called by another thread */
-                       rc = msg->msg_md == NULL ?
-                            LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
-               }
-
-               if (!cfs_list_empty(&msg->msg_list) && /* not matched yet */
-                   (i == LNET_CPT_NUMBER - 1 || /* the last CPT */
-                    ptl->ptl_mt_nmaps == 0 ||   /* no active CPT */
-                    (ptl->ptl_mt_nmaps == 1 &&  /* the only active CPT */
-                     ptl->ptl_mt_maps[0] == cpt))) {
-                       /* nothing to steal, delay or drop */
-                       cfs_list_del_init(&msg->msg_list);
-
-                       if (lnet_ptl_is_lazy(ptl)) {
-                               msg->msg_rx_delayed = 1;
-                               cfs_list_add_tail(&msg->msg_list,
-                                                 &ptl->ptl_msg_delayed);
-                               rc = LNET_MATCHMD_NONE;
+                       if ((rc & LNET_MATCHMD_FINISH) != 0) {
+                               /* Match found, remove from stealing list. */
+                               list_del_init(&msg->msg_list);
+                       } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */
+                                  ptl->ptl_mt_nmaps == 0 ||   /* (2) */
+                                  (ptl->ptl_mt_nmaps == 1 &&  /* (3) */
+                                   ptl->ptl_mt_maps[0] == cpt)) {
+                               /*
+                                * No match found, and this is either
+                                * (1) the last cpt to check, or
+                                * (2) there is no active cpt, or
+                                * (3) this is the only active cpt.
+                                * There is nothing to steal: delay or
+                                * drop the message.
+                                */
+                               list_del_init(&msg->msg_list);
+
+                               if (lnet_ptl_is_lazy(ptl)) {
+                                       msg->msg_rx_delayed = 1;
+                                       list_add_tail(&msg->msg_list,
+                                                     &ptl->ptl_msg_delayed);
+                                       rc = LNET_MATCHMD_NONE;
+                               } else {
+                                       rc = LNET_MATCHMD_DROP;
+                               }
                        } else {
-                               rc = LNET_MATCHMD_DROP;
+                               /* Do another iteration. */
+                               rc = 0;
                        }
+               } else {
+                       /*
+                        * No longer on stealing list: another thread
+                        * matched the message in lnet_ptl_attach_md().
+                        * We are now expected to handle the message.
+                        */
+                       rc = msg->msg_md == NULL ?
+                               LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
                }
 
                lnet_ptl_unlock(ptl);
                lnet_res_unlock(cpt);
 
-               if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed)
+               /*
+                * Note that test (1) above ensures that we always
+                * exit the loop through this break statement.
+                *
+                * LNET_MATCHMD_NONE means msg was added to the
+                * delayed queue, and we may no longer reference it
+                * after lnet_ptl_unlock() and lnet_res_unlock().
+                */
+               if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE))
                        break;
        }
 
@@ -542,7 +567,7 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
        int                     rc;
 
        CDEBUG(D_NET, "Request from %s of length %d into portal %d "
-              "MB="LPX64"\n", libcfs_id2str(info->mi_id),
+              "MB=%#llx\n", libcfs_id2str(info->mi_id),
               info->mi_rlength, info->mi_portal, info->mi_mbits);
 
        if (info->mi_portal >= the_lnet.ln_nportals) {
@@ -585,19 +610,20 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
                lnet_ptl_lock(ptl);
 
                msg->msg_rx_delayed = 1;
-               cfs_list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
+               list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
 
                lnet_ptl_unlock(ptl);
                lnet_res_unlock(mtable->mt_cpt);
-
-       } else  {
+               rc = LNET_MATCHMD_NONE;
+       } else  {
                lnet_res_unlock(mtable->mt_cpt);
                rc = lnet_ptl_match_delay(ptl, info, msg);
        }
 
-       if (msg->msg_rx_delayed) {
+       /* LNET_MATCHMD_NONE means msg was added to the delay queue */
+       if (rc & LNET_MATCHMD_NONE) {
                CDEBUG(D_NET,
-                      "Delaying %s from %s ptl %d MB "LPX64" off %d len %d\n",
+                      "Delaying %s from %s ptl %d MB %#llx off %d len %d\n",
                       info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
                       libcfs_id2str(info->mi_id), info->mi_portal,
                       info->mi_mbits, info->mi_roffset, info->mi_rlength);
@@ -622,11 +648,11 @@ lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
 /* called with lnet_res_lock held */
 void
 lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
-                  cfs_list_t *matches, cfs_list_t *drops)
+                  struct list_head *matches, struct list_head *drops)
 {
        struct lnet_portal      *ptl = the_lnet.ln_portals[me->me_portal];
        struct lnet_match_table *mtable;
-       cfs_list_t              *head;
+       struct list_head        *head;
        lnet_msg_t              *tmp;
        lnet_msg_t              *msg;
        int                     exhausted = 0;
@@ -640,15 +666,15 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
        cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
        mtable = ptl->ptl_mtables[cpt];
 
-       if (cfs_list_empty(&ptl->ptl_msg_stealing) &&
-           cfs_list_empty(&ptl->ptl_msg_delayed) &&
+       if (list_empty(&ptl->ptl_msg_stealing) &&
+           list_empty(&ptl->ptl_msg_delayed) &&
            !lnet_mt_test_exhausted(mtable, me->me_pos))
                return;
 
        lnet_ptl_lock(ptl);
        head = &ptl->ptl_msg_stealing;
  again:
-       cfs_list_for_each_entry_safe(msg, tmp, head, msg_list) {
+       list_for_each_entry_safe(msg, tmp, head, msg_list) {
                struct lnet_match_info  info;
                lnet_hdr_t              *hdr;
                int                     rc;
@@ -656,7 +682,8 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
                LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
 
                hdr   = &msg->msg_hdr;
-               info.mi_id.nid  = hdr->src_nid;
+               /* Multi-Rail: Primary peer NID */
+               info.mi_id.nid  = msg->msg_initiator;
                info.mi_id.pid  = hdr->src_pid;
                info.mi_opc     = LNET_MD_OP_PUT;
                info.mi_portal  = hdr->msg.put.ptl_index;
@@ -675,7 +702,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
 
                /* Hurrah! This _is_ a match */
                LASSERT((rc & LNET_MATCHMD_FINISH) != 0);
-               cfs_list_del_init(&msg->msg_list);
+               list_del_init(&msg->msg_list);
 
                if (head == &ptl->ptl_msg_stealing) {
                        if (exhausted)
@@ -685,15 +712,15 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
                }
 
                if ((rc & LNET_MATCHMD_OK) != 0) {
-                       cfs_list_add_tail(&msg->msg_list, matches);
+                       list_add_tail(&msg->msg_list, matches);
 
                        CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
-                              "match "LPU64" offset %d length %d.\n",
+                              "match %llu offset %d length %d.\n",
                               libcfs_id2str(info.mi_id),
                               info.mi_portal, info.mi_mbits,
                               info.mi_roffset, info.mi_rlength);
                } else {
-                       cfs_list_add_tail(&msg->msg_list, drops);
+                       list_add_tail(&msg->msg_list, drops);
                }
 
                if (exhausted)
@@ -714,7 +741,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
        lnet_ptl_unlock(ptl);
 }
 
-void
+static void
 lnet_ptl_cleanup(struct lnet_portal *ptl)
 {
        struct lnet_match_table *mtable;
@@ -723,17 +750,12 @@ lnet_ptl_cleanup(struct lnet_portal *ptl)
        if (ptl->ptl_mtables == NULL) /* uninitialized portal */
                return;
 
-       LASSERT(cfs_list_empty(&ptl->ptl_msg_delayed));
-       LASSERT(cfs_list_empty(&ptl->ptl_msg_stealing));
-#ifndef __KERNEL__
-# ifdef HAVE_LIBPTHREAD
-       pthread_mutex_destroy(&ptl->ptl_lock);
-# endif
-#endif
+       LASSERT(list_empty(&ptl->ptl_msg_delayed));
+       LASSERT(list_empty(&ptl->ptl_msg_stealing));
        cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
-               cfs_list_t      *mhash;
-               lnet_me_t       *me;
-               int             j;
+               struct list_head *mhash;
+               lnet_me_t        *me;
+               int               j;
 
                if (mtable->mt_mhash == NULL) /* uninitialized match-table */
                        continue;
@@ -741,11 +763,11 @@ lnet_ptl_cleanup(struct lnet_portal *ptl)
                mhash = mtable->mt_mhash;
                /* cleanup ME */
                for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
-                       while (!cfs_list_empty(&mhash[j])) {
-                               me = cfs_list_entry(mhash[j].next,
-                                                   lnet_me_t, me_list);
+                       while (!list_empty(&mhash[j])) {
+                               me = list_entry(mhash[j].next,
+                                               lnet_me_t, me_list);
                                CERROR("Active ME %p on exit\n", me);
-                               cfs_list_del(&me->me_list);
+                               list_del(&me->me_list);
                                lnet_me_free(me);
                        }
                }
@@ -757,11 +779,11 @@ lnet_ptl_cleanup(struct lnet_portal *ptl)
        ptl->ptl_mtables = NULL;
 }
 
-int
+static int
 lnet_ptl_setup(struct lnet_portal *ptl, int index)
 {
        struct lnet_match_table *mtable;
-       cfs_list_t              *mhash;
+       struct list_head        *mhash;
        int                     i;
        int                     j;
 
@@ -773,15 +795,9 @@ lnet_ptl_setup(struct lnet_portal *ptl, int index)
        }
 
        ptl->ptl_index = index;
-       CFS_INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
-       CFS_INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
-#ifdef __KERNEL__
+       INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
+       INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
        spin_lock_init(&ptl->ptl_lock);
-#else
-# ifdef HAVE_LIBPTHREAD
-       pthread_mutex_init(&ptl->ptl_lock, NULL);
-# endif
-#endif
        cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
                /* the extra entry is for MEs with ignore bits */
                LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i,
@@ -797,7 +813,7 @@ lnet_ptl_setup(struct lnet_portal *ptl, int index)
                       LNET_MT_EXHAUSTED_BMAP);
                mtable->mt_mhash = mhash;
                for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
-                       CFS_INIT_LIST_HEAD(&mhash[j]);
+                       INIT_LIST_HEAD(&mhash[j]);
 
                mtable->mt_portal = index;
                mtable->mt_cpt = i;
@@ -874,7 +890,7 @@ lnet_portals_create(void)
  *
  * \param portal Index of the portal to enable the lazy attribute on.
  *
- * \retval 0       On success.
+ * \retval 0      On success.
  * \retval -EINVAL If \a portal is not a valid index.
  */
 int
@@ -900,20 +916,11 @@ LNetSetLazyPortal(int portal)
 }
 EXPORT_SYMBOL(LNetSetLazyPortal);
 
-/**
- * Turn off the lazy portal attribute. Delayed requests on the portal,
- * if any, will be all dropped when this function returns.
- *
- * \param portal Index of the portal to disable the lazy attribute on.
- *
- * \retval 0       On success.
- * \retval -EINVAL If \a portal is not a valid index.
- */
 int
-LNetClearLazyPortal(int portal)
+lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
 {
        struct lnet_portal      *ptl;
-       CFS_LIST_HEAD           (zombies);
+       struct list_head        zombies = LIST_HEAD_INIT(zombies);
 
        if (portal < 0 || portal >= the_lnet.ln_nportals)
                return -EINVAL;
@@ -929,21 +936,48 @@ LNetClearLazyPortal(int portal)
                return 0;
        }
 
-       if (the_lnet.ln_shutdown)
-               CWARN("Active lazy portal %d on exit\n", portal);
-       else
-               CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
+       if (ni != NULL) {
+               struct lnet_msg *msg, *tmp;
+
+               /* grab all messages which are on the NI passed in */
+               list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
+                                        msg_list) {
+                       if (msg->msg_txni == ni || msg->msg_rxni == ni)
+                               list_move(&msg->msg_list, &zombies);
+               }
+       } else {
+               if (the_lnet.ln_shutdown)
+                       CWARN("Active lazy portal %d on exit\n", portal);
+               else
+                       CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
 
-       /* grab all the blocked messages atomically */
-       cfs_list_splice_init(&ptl->ptl_msg_delayed, &zombies);
+               /* grab all the blocked messages atomically */
+               list_splice_init(&ptl->ptl_msg_delayed, &zombies);
 
-       lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
+               lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
+       }
 
        lnet_ptl_unlock(ptl);
        lnet_res_unlock(LNET_LOCK_EX);
 
-       lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");
+       lnet_drop_delayed_msg_list(&zombies, reason);
 
        return 0;
 }
+
+/**
+ * Turn off the lazy portal attribute. Delayed requests on the portal,
+ * if any, will be all dropped when this function returns.
+ *
+ * \param portal Index of the portal to disable the lazy attribute on.
+ *
+ * \retval 0      On success.
+ * \retval -EINVAL If \a portal is not a valid index.
+ */
+int
+LNetClearLazyPortal(int portal)
+{
+       return lnet_clear_lazy_portal(NULL, portal,
+                                     "Clearing lazy portal attr");
+}
 EXPORT_SYMBOL(LNetClearLazyPortal);