Whamcloud - gitweb
b=21619 hash MEs on RDMA portal
authorLiang Zhen <Zhen.Liang@Sun.COM>
Tue, 16 Mar 2010 20:14:58 +0000 (13:14 -0700)
committerJohann Lombardi <johann@sun.com>
Fri, 19 Mar 2010 10:54:14 +0000 (11:54 +0100)
i=isaac
i=maxim

RDMA portal can have very long ME list on client side, which will trigger
soft lockup because of long searching on list. Hash MEs on RDMA portal can
resolve this problem.

lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/lnet/lib-me.c
lnet/lnet/lib-move.c

index f26a857..6f17060 100644 (file)
@@ -88,9 +88,27 @@ static inline int lnet_md_unlinkable (lnet_libmd_t *md)
                 lnet_md_exhausted(md));
 }
 
+static inline unsigned int
+lnet_match_hash_value(lnet_process_id_t id, __u64 mbits)
+{
+        unsigned int   val;
+
+        val  = (unsigned int)(mbits + (mbits >> 32));
+        val += (unsigned int)(id.nid + (id.nid >> 32));
+        val += (unsigned int)(id.pid);
+
+        return val;
+}
+
+static inline unsigned int
+lnet_match_to_hash(lnet_process_id_t id, __u64 mbits)
+{
+        return lnet_match_hash_value(id, mbits) % LNET_PORTAL_HASH_SIZE;
+}
+
 #ifdef __KERNEL__
-#define LNET_LOCK()        spin_lock(&the_lnet.ln_lock)                 
-#define LNET_UNLOCK()      spin_unlock(&the_lnet.ln_lock)               
+#define LNET_LOCK()        spin_lock(&the_lnet.ln_lock)
+#define LNET_UNLOCK()      spin_unlock(&the_lnet.ln_lock)
 #define LNET_MUTEX_DOWN(m) mutex_down(m)
 #define LNET_MUTEX_UP(m)   mutex_up(m)
 #else
@@ -429,6 +447,62 @@ lnet_handle2me (lnet_handle_me_t *handle)
         return (lh_entry (lh, lnet_me_t, me_lh));
 }
 
+static inline int
+lnet_portal_is_lazy(lnet_portal_t *ptl)
+{
+        return !!(ptl->ptl_options & LNET_PTL_LAZY);
+}
+
+static inline int
+lnet_portal_is_unique(lnet_portal_t *ptl)
+{
+        return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE); 
+}
+
+static inline int
+lnet_portal_is_wildcard(lnet_portal_t *ptl)
+{
+        return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
+}
+
+static inline void
+lnet_portal_setopt(lnet_portal_t *ptl, int opt)
+{
+        ptl->ptl_options |= opt;
+}
+
+static inline void
+lnet_portal_unsetopt(lnet_portal_t *ptl, int opt)
+{
+        ptl->ptl_options &= ~opt;
+}
+
+static inline int
+lnet_match_is_unique(lnet_process_id_t match_id,
+                     __u64 match_bits, __u64 ignore_bits)
+{
+        return ignore_bits == 0 &&
+               match_id.nid != LNET_NID_ANY &&
+               match_id.pid != LNET_PID_ANY;
+}
+
+static inline struct list_head *
+lnet_portal_me_head(int index, lnet_process_id_t id, __u64 mbits)
+{
+        lnet_portal_t *ptl = &the_lnet.ln_portals[index];
+
+        if (lnet_portal_is_wildcard(ptl)) {
+                return &ptl->ptl_mlist;
+        } else if (lnet_portal_is_unique(ptl)) {
+                LASSERT (ptl->ptl_mhash != NULL);
+                return &ptl->ptl_mhash[lnet_match_to_hash(id, mbits)];
+        }
+        return NULL;
+}
+
+struct list_head *lnet_portal_mhash_alloc(void);
+void lnet_portal_mhash_free(struct list_head *mhash);
+
 static inline void
 lnet_peer_addref_locked(lnet_peer_t *lp)
 {
index f34ffb8..609cdb8 100644 (file)
@@ -502,12 +502,18 @@ typedef struct {
 
 /* Options for lnet_portal_t::ptl_options */
 #define LNET_PTL_LAZY               (1 << 0)
+#define LNET_PTL_MATCH_UNIQUE       (1 << 1)    /* unique match, for RDMA */
+#define LNET_PTL_MATCH_WILDCARD     (1 << 2)    /* wildcard match, request portal */
+
+#define LNET_PORTAL_HASH_SIZE        113        /* ME hash size of RDMA portal (prime) */
+
 typedef struct {
-        struct list_head ptl_ml;  /* match list */
-        struct list_head ptl_msgq; /* messages blocking for MD */
-        __u64            ptl_ml_version;    /* validity stamp, only changed for new attached MD */
-        __u64            ptl_msgq_version;  /* validity stamp */
-        unsigned int     ptl_options;
+        struct list_head *ptl_mhash;            /* match hash */
+        struct list_head  ptl_mlist;            /* match list */
+        struct list_head  ptl_msgq;             /* messages blocking for MD */
+        __u64             ptl_ml_version;       /* validity stamp, only changed for new attached MD */
+        __u64             ptl_msgq_version;     /* validity stamp */
+        unsigned int      ptl_options;
 } lnet_portal_t;
 
 /* Router Checker states */
index 2423c5c..94031c3 100644 (file)
@@ -586,6 +586,39 @@ lnet_invalidate_handle (lnet_libhandle_t *lh)
         list_del (&lh->lh_hash_chain);
 }
 
+struct list_head *
+lnet_portal_mhash_alloc(void)
+{
+        struct list_head *mhash;
+        int               i;
+
+        LIBCFS_ALLOC(mhash, sizeof(struct list_head) * LNET_PORTAL_HASH_SIZE);
+        if (mhash == NULL)
+                return NULL;
+
+        for (i = 0; i < LNET_PORTAL_HASH_SIZE; i++)
+                CFS_INIT_LIST_HEAD(&mhash[i]);
+
+        return mhash;
+}
+
+void
+lnet_portal_mhash_free(struct list_head *mhash)
+{
+        int     i;
+
+        for (i = 0; i < LNET_PORTAL_HASH_SIZE; i++) {
+                while (!list_empty(&mhash[i])) {
+                        lnet_me_t *me = list_entry (mhash[i].next,
+                                                    lnet_me_t, me_list);
+                        CERROR ("Active ME %p on exit portal mhash\n", me);
+                        list_del (&me->me_list);
+                        lnet_me_free (me);
+                }
+        }
+        LIBCFS_FREE(mhash, sizeof(struct list_head) * LNET_PORTAL_HASH_SIZE);
+}
+
 int
 lnet_init_finalizers(void)
 {
@@ -712,7 +745,7 @@ lnet_prepare(lnet_pid_t requested_pid)
         }
 
         for (i = 0; i < the_lnet.ln_nportals; i++) {
-                CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml));
+                CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_mlist));
                 CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq));
                 the_lnet.ln_portals[i].ptl_options = 0;
         }
@@ -751,21 +784,26 @@ lnet_unprepare (void)
         for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
                 LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
 
-                while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) {
-                        lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next,
+                while (!list_empty (&the_lnet.ln_portals[idx].ptl_mlist)) {
+                        lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_mlist.next,
                                                     lnet_me_t, me_list);
 
-                        CERROR ("Active me %p on exit\n", me);
+                        CERROR ("Active ME %p on exit\n", me);
                         list_del (&me->me_list);
                         lnet_me_free (me);
                 }
+
+                if (the_lnet.ln_portals[idx].ptl_mhash != NULL) {
+                        LASSERT (lnet_portal_is_unique(&the_lnet.ln_portals[idx]));
+                        lnet_portal_mhash_free(the_lnet.ln_portals[idx].ptl_mhash);
+                }
         }
 
         while (!list_empty (&the_lnet.ln_active_mds)) {
                 lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next,
                                                lnet_libmd_t, md_list);
 
-                CERROR ("Active md %p on exit\n", md);
+                CERROR ("Active MD %p on exit\n", md);
                 list_del_init (&md->md_list);
                 lnet_md_free (md);
         }
@@ -774,7 +812,7 @@ lnet_unprepare (void)
                 lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next,
                                             lnet_eq_t, eq_list);
 
-                CERROR ("Active eq %p on exit\n", eq);
+                CERROR ("Active EQ %p on exit\n", eq);
                 list_del (&eq->eq_list);
                 lnet_eq_free (eq);
         }
index 90131a1..52b0626 100644 (file)
 
 #include <lnet/lib-lnet.h>
 
+static int
+lnet_me_match_portal(lnet_portal_t *ptl, lnet_process_id_t id,
+                     __u64 match_bits, __u64 ignore_bits)
+{
+        struct list_head *mhash = NULL;
+        int               unique;
+
+        LASSERT (!(lnet_portal_is_unique(ptl) &&
+                   lnet_portal_is_wildcard(ptl)));
+
+        /* prefer to check w/o any lock */
+        unique = lnet_match_is_unique(id, match_bits, ignore_bits);
+        if (likely(lnet_portal_is_unique(ptl) ||
+                   lnet_portal_is_wildcard(ptl)))
+                goto match;
+
+        /* unset, new portal */
+        if (unique) {
+                mhash = lnet_portal_mhash_alloc();
+                if (mhash == NULL)
+                        return -ENOMEM;
+        }
+
+        LNET_LOCK();
+        if (lnet_portal_is_unique(ptl) ||
+            lnet_portal_is_wildcard(ptl)) {
+                /* someone set it before me */
+                if (mhash != NULL)
+                        lnet_portal_mhash_free(mhash);
+                LNET_UNLOCK();
+                goto match;
+        }
+
+        /* still not set */
+        LASSERT (ptl->ptl_mhash == NULL);
+        if (unique) {
+                ptl->ptl_mhash = mhash;
+                lnet_portal_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
+        } else {
+                lnet_portal_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
+        }
+        LNET_UNLOCK();
+        return 0;
+
+ match:
+        if (lnet_portal_is_unique(ptl) && !unique)
+                return -EPERM;
+
+        if (lnet_portal_is_wildcard(ptl) && unique)
+                return -EPERM;
+
+        return 0;
+}
+
 int
 LNetMEAttach(unsigned int portal,
              lnet_process_id_t match_id,
@@ -49,7 +103,10 @@ LNetMEAttach(unsigned int portal,
              lnet_unlink_t unlink, lnet_ins_pos_t pos,
              lnet_handle_me_t *handle)
 {
-        lnet_me_t     *me;
+        lnet_me_t        *me;
+        lnet_portal_t    *ptl;
+        struct list_head *head;
+        int               rc;
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
@@ -57,6 +114,11 @@ LNetMEAttach(unsigned int portal,
         if (portal >= the_lnet.ln_nportals)
                 return -EINVAL;
 
+        ptl = &the_lnet.ln_portals[portal];
+        rc = lnet_me_match_portal(ptl, match_id, match_bits, ignore_bits);
+        if (rc != 0)
+                return rc;
+
         me = lnet_me_alloc();
         if (me == NULL)
                 return -ENOMEM;
@@ -72,10 +134,13 @@ LNetMEAttach(unsigned int portal,
 
         lnet_initialise_handle (&me->me_lh, LNET_COOKIE_TYPE_ME);
 
+        head = lnet_portal_me_head(portal, match_id, match_bits);
+        LASSERT (head != NULL);
+
         if (pos == LNET_INS_AFTER)
-                list_add_tail(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml));
+                list_add_tail(&me->me_list, head);
         else
-                list_add(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml));
+                list_add(&me->me_list, head);
 
         lnet_me2handle(handle, me);
 
@@ -93,6 +158,7 @@ LNetMEInsert(lnet_handle_me_t current_meh,
 {
         lnet_me_t     *current_me;
         lnet_me_t     *new_me;
+        lnet_portal_t *ptl;
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
@@ -111,6 +177,16 @@ LNetMEInsert(lnet_handle_me_t current_meh,
                 return -ENOENT;
         }
 
+        LASSERT (current_me->me_portal < the_lnet.ln_nportals);
+
+        ptl = &the_lnet.ln_portals[current_me->me_portal];
+        if (lnet_portal_is_unique(ptl)) {
+                /* nosense to insertion on unique portal */
+                lnet_me_free (new_me);
+                LNET_UNLOCK();
+                return -EPERM;
+        }
+
         new_me->me_portal = current_me->me_portal;
         new_me->me_match_id = match_id;
         new_me->me_match_bits = match_bits;
index 43482fa..9e350eb 100644 (file)
@@ -155,6 +155,7 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src,
               lnet_libmd_t **md_out)
 {
         lnet_portal_t    *ptl = &the_lnet.ln_portals[index];
+        struct list_head *head;
         lnet_me_t        *me;
         lnet_me_t        *tmp;
         lnet_libmd_t     *md;
@@ -169,7 +170,11 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src,
                 return LNET_MATCHMD_DROP;
         }
 
-        list_for_each_entry_safe (me, tmp, &ptl->ptl_ml, me_list) {
+        head = lnet_portal_me_head(index, src, match_bits);
+        if (head == NULL) /* nobody posted anything on this portal */
+                goto out;
+
+        list_for_each_entry_safe (me, tmp, head, me_list) {
                 md = me->me_md;
 
                 /* ME attached but MD not attached yet */
@@ -198,8 +203,9 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src,
                 /* not reached */
         }
 
+ out:
         if (op_mask == LNET_MD_OP_GET ||
-            (ptl->ptl_options & LNET_PTL_LAZY) == 0)
+            !lnet_portal_is_lazy(ptl))
                 return LNET_MATCHMD_DROP;
 
         return LNET_MATCHMD_NONE;
@@ -1562,9 +1568,7 @@ LNetSetLazyPortal(int portal)
         CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
 
         LNET_LOCK();
-
-        ptl->ptl_options |= LNET_PTL_LAZY;
-
+        lnet_portal_setopt(ptl, LNET_PTL_LAZY);
         LNET_UNLOCK();
 
         return 0;
@@ -1582,7 +1586,7 @@ LNetClearLazyPortal(int portal)
 
         LNET_LOCK();
 
-        if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) {
+        if (!lnet_portal_is_lazy(ptl)) {
                 LNET_UNLOCK();
                 return 0;
         }
@@ -1597,7 +1601,7 @@ LNetClearLazyPortal(int portal)
         list_del_init(&ptl->ptl_msgq);
 
         ptl->ptl_msgq_version++;
-        ptl->ptl_options &= ~LNET_PTL_LAZY;
+        lnet_portal_unsetopt(ptl, LNET_PTL_LAZY);
 
         LNET_UNLOCK();
 
@@ -1652,12 +1656,13 @@ lnet_match_blocked_msg(lnet_libmd_t *md)
         struct list_head *tmp;
         struct list_head *entry;
         lnet_msg_t       *msg;
+        lnet_portal_t    *ptl;
         lnet_me_t        *me  = md->md_me;
-        lnet_portal_t    *ptl = &the_lnet.ln_portals[me->me_portal];
 
         LASSERT (me->me_portal < the_lnet.ln_nportals);
 
-        if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) {
+        ptl = &the_lnet.ln_portals[me->me_portal];
+        if (!lnet_portal_is_lazy(ptl)) {
                 LASSERT (list_empty(&ptl->ptl_msgq));
                 return;
         }
@@ -1761,7 +1766,6 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
         hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
 
         index = hdr->msg.put.ptl_index;
-        ptl = &the_lnet.ln_portals[index];
 
         LNET_LOCK();
 
@@ -1780,6 +1784,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
                 return 0;
 
         case LNET_MATCHMD_NONE:
+                ptl = &the_lnet.ln_portals[index];
                 version = ptl->ptl_ml_version;
 
                 rc = 0;
@@ -1788,7 +1793,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
 
                 if (rc == 0 &&
                     !the_lnet.ln_shutdown &&
-                    ((ptl->ptl_options & LNET_PTL_LAZY) != 0)) {
+                    lnet_portal_is_lazy(ptl)) {
                         if (version != ptl->ptl_ml_version)
                                 goto again;