Whamcloud - gitweb
LU-56 lnet: match-table for Portals
authorLiang Zhen <liang@whamcloud.com>
Tue, 5 Jun 2012 10:02:55 +0000 (18:02 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 22 Jun 2012 07:56:40 +0000 (03:56 -0400)
Create sub-object named as "match-table" for each Portal, MEs will
be attached match-table instead of Portal.
Although we only have one match-table for each Portal in this patch,
but in upcoming changes, we will create multiple match-tables
for each Portal:
- unique-match Portal
  MEs will be scattered to different match-tables by match info
- wildcard Portal
  LND threads just grab ME/MD from match-table corresponding to
  current CPT (CPU partition).

We also did some code cleanup for delayed message in this patch.

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I2b24723c3bd2a6664f2b241840de19d5f43be11f
Reviewed-on: http://review.whamcloud.com/3043
Reviewed-by: Doug Oucharek <doug@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/lnet/lib-md.c
lnet/lnet/lib-me.c
lnet/lnet/lib-move.c
lnet/lnet/lib-ptl.c

index 4c06475..ab5dea5 100644 (file)
@@ -86,13 +86,6 @@ static inline int lnet_md_unlinkable (lnet_libmd_t *md)
                 lnet_md_exhausted(md));
 }
 
                 lnet_md_exhausted(md));
 }
 
-static inline unsigned int
-lnet_match_to_hash(lnet_process_id_t id, __u64 mbits)
-{
-        mbits += id.nid + id.pid;
-        return cfs_hash_long((unsigned long)mbits, LNET_PORTAL_HASH_BITS);
-}
-
 #ifdef __KERNEL__
 #define LNET_LOCK()        cfs_spin_lock(&the_lnet.ln_lock)
 #define LNET_UNLOCK()      cfs_spin_unlock(&the_lnet.ln_lock)
 #ifdef __KERNEL__
 #define LNET_LOCK()        cfs_spin_lock(&the_lnet.ln_lock)
 #define LNET_UNLOCK()      cfs_spin_unlock(&the_lnet.ln_lock)
@@ -639,6 +632,7 @@ void lnet_return_tx_credits_locked(lnet_msg_t *msg);
 void lnet_return_rx_credits_locked(lnet_msg_t *msg);
 
 /* portals functions */
 void lnet_return_rx_credits_locked(lnet_msg_t *msg);
 
 /* portals functions */
+/* portals attributes */
 static inline int
 lnet_ptl_is_lazy(lnet_portal_t *ptl)
 {
 static inline int
 lnet_ptl_is_lazy(lnet_portal_t *ptl)
 {
@@ -669,30 +663,32 @@ lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt)
        ptl->ptl_options &= ~opt;
 }
 
        ptl->ptl_options &= ~opt;
 }
 
-static inline cfs_list_t *
-lnet_ptl_me_head(int index, lnet_process_id_t id, __u64 mbits)
-{
-       lnet_portal_t *ptl = the_lnet.ln_portals[index];
-
-       if (lnet_ptl_is_wildcard(ptl)) {
-               return &ptl->ptl_mlist;
-       } else if (lnet_ptl_is_unique(ptl)) {
-               LASSERT(ptl->ptl_mhash != NULL);
-               return &ptl->ptl_mhash[lnet_match_to_hash(id, mbits)];
-       }
-       return NULL;
-}
-
+/* match-table functions */
+cfs_list_t *lnet_mt_match_head(struct lnet_match_table *mtable,
+                              lnet_process_id_t id, __u64 mbits);
+struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
+                                          lnet_process_id_t id, __u64 mbits,
+                                          __u64 ignore_bits,
+                                          lnet_ins_pos_t pos);
+struct lnet_match_table *lnet_mt_of_match(unsigned int index,
+                                         lnet_process_id_t id, __u64 mbits);
+int lnet_mt_match_md(struct lnet_match_table *mtable,
+                    int op_mask, lnet_process_id_t src,
+                    unsigned int rlength, unsigned int roffset,
+                    __u64 match_bits, lnet_msg_t *msg);
+
+/* portals match/attach functions */
+void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
+                       cfs_list_t *matches, cfs_list_t *drops);
+void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md);
+int lnet_ptl_match_md(unsigned int index, int op_mask, lnet_process_id_t src,
+                     unsigned int rlength, unsigned int roffset,
+                     __u64 match_bits, lnet_msg_t *msg);
+
+/* initialized and finalize portals */
 int lnet_portals_create(void);
 void lnet_portals_destroy(void);
 
 int lnet_portals_create(void);
 void lnet_portals_destroy(void);
 
-int lnet_ptl_type_match(struct lnet_portal *ptl, lnet_process_id_t id,
-                       __u64 mbits, __u64 ignore_bits);
-void lnet_match_blocked_msg(lnet_libmd_t *md);
-int lnet_match_md(int index, int op_mask, lnet_process_id_t src,
-                 unsigned int rlength, unsigned int roffset,
-                 __u64 match_bits, lnet_msg_t *msg);
-
 /* message functions */
 int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
                 lnet_nid_t fromnid, void *private, int rdma_req);
 /* message functions */
 int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
                 lnet_nid_t fromnid, void *private, int rdma_req);
index 3cd61b8..a9f6322 100644 (file)
@@ -191,8 +191,16 @@ typedef struct lnet_msg {
        lnet_nid_t              msg_from;
        __u32                   msg_type;
 
        lnet_nid_t              msg_from;
        __u32                   msg_type;
 
-       unsigned int            msg_rx_committed:1;
+       /* commited for sending */
        unsigned int            msg_tx_committed:1;
        unsigned int            msg_tx_committed:1;
+       /* queued for tx credit */
+       unsigned int            msg_tx_delayed:1;
+       /* commited for receiving */
+       unsigned int            msg_rx_committed:1;
+       /* queued for RX buffer */
+       unsigned int            msg_rx_delayed:1;
+       /* ready for pending on RX delay list */
+       unsigned int            msg_rx_ready_delay:1;
 
         unsigned int          msg_vmflush:1;      /* VM trying to free memory */
         unsigned int          msg_target_is_router:1; /* sending to a router */
 
         unsigned int          msg_vmflush:1;      /* VM trying to free memory */
         unsigned int          msg_target_is_router:1; /* sending to a router */
@@ -200,7 +208,6 @@ typedef struct lnet_msg {
         unsigned int          msg_ack:1;          /* ack on finalize (PUT) */
         unsigned int          msg_sending:1;      /* outgoing message */
         unsigned int          msg_receiving:1;    /* being received */
         unsigned int          msg_ack:1;          /* ack on finalize (PUT) */
         unsigned int          msg_sending:1;      /* outgoing message */
         unsigned int          msg_receiving:1;    /* being received */
-        unsigned int          msg_delayed:1;      /* had to Q for buffer or tx credit */
         unsigned int          msg_txcredit:1;     /* taken an NI send credit */
         unsigned int          msg_peertxcredit:1; /* taken a peer send credit */
         unsigned int          msg_rtrcredit:1;    /* taken a globel router credit */
         unsigned int          msg_txcredit:1;     /* taken an NI send credit */
         unsigned int          msg_peertxcredit:1; /* taken a peer send credit */
         unsigned int          msg_rtrcredit:1;    /* taken a globel router credit */
@@ -550,17 +557,27 @@ enum {
 #define LNET_PTL_MATCH_WILDCARD     (1 << 2)    /* wildcard match, request portal */
 
 /* ME hash of RDMA portal */
 #define LNET_PTL_MATCH_WILDCARD     (1 << 2)    /* wildcard match, request portal */
 
 /* ME hash of RDMA portal */
-#define LNET_PORTAL_HASH_BITS        8
-#define LNET_PORTAL_HASH_SIZE       (1 << LNET_PORTAL_HASH_BITS)
+#define LNET_MT_HASH_BITS              8
+#define LNET_MT_HASH_SIZE              (1 << LNET_MT_HASH_BITS)
+
+/* portal match table */
+struct lnet_match_table {
+       /* reserved for upcoming patches, CPU partition ID */
+       unsigned int            mt_cpt;
+       unsigned int            mt_portal;      /* portal index */
+       cfs_list_t              mt_mlist;       /* matching list */
+       cfs_list_t              *mt_mhash;      /* matching hash */
+};
 
 typedef struct lnet_portal {
        unsigned int            ptl_index;      /* portal ID, reserved */
 
 typedef struct lnet_portal {
        unsigned int            ptl_index;      /* portal ID, reserved */
-        cfs_list_t       *ptl_mhash;            /* match hash */
-        cfs_list_t        ptl_mlist;            /* match list */
-        cfs_list_t        ptl_msgq;             /* messages blocking for MD */
-        __u64             ptl_ml_version;       /* validity stamp, only changed for new attached MD */
-        __u64             ptl_msgq_version;     /* validity stamp */
-        unsigned int      ptl_options;
+       /* flags on this portal: lazy, unique... */
+       unsigned int            ptl_options;
+       /* Now we only have single instance for each portal,
+        * will have instance per CPT in upcoming patches */
+       struct lnet_match_table *ptl_mtable;
+       /* messages blocking for MD */
+       cfs_list_t              ptl_msgq;
 } lnet_portal_t;
 
 #define LNET_LH_HASH_BITS      12
 } lnet_portal_t;
 
 #define LNET_LH_HASH_BITS      12
index 87b2613..c189fb9 100644 (file)
@@ -53,8 +53,8 @@ lnet_md_unlink(lnet_libmd_t *md)
                 /* Disassociate from ME (if any), and unlink it if it was created
                  * with LNET_UNLINK */
                 if (me != NULL) {
                 /* Disassociate from ME (if any), and unlink it if it was created
                  * with LNET_UNLINK */
                 if (me != NULL) {
-                        md->md_me = NULL;
-                        me->me_md = NULL;
+                       /* detach MD from portal */
+                       lnet_ptl_detach_md(me, md);
                         if (me->me_unlink == LNET_UNLINK)
                                 lnet_me_unlink(me);
                 }
                         if (me->me_unlink == LNET_UNLINK)
                                 lnet_me_unlink(me);
                 }
@@ -263,6 +263,8 @@ int
 LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
              lnet_unlink_t unlink, lnet_handle_md_t *handle)
 {
 LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
              lnet_unlink_t unlink, lnet_handle_md_t *handle)
 {
+       CFS_LIST_HEAD   (matches);
+       CFS_LIST_HEAD   (drops);
         lnet_me_t     *me;
         lnet_libmd_t  *md;
         int            rc;
         lnet_me_t     *me;
         lnet_libmd_t  *md;
         int            rc;
@@ -299,17 +301,17 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
        if (rc != 0)
                goto failed;
 
        if (rc != 0)
                goto failed;
 
-       the_lnet.ln_portals[me->me_portal]->ptl_ml_version++;
-
-       me->me_md = md;
-       md->md_me = me;
+       /* attach this MD to portal of ME and check if it matches any
+        * blocked msgs on this portal */
+       lnet_ptl_attach_md(me, md, &matches, &drops);
 
        lnet_md2handle(handle, md);
 
 
        lnet_md2handle(handle, md);
 
-       /* check if this MD matches any blocked msgs */
-       lnet_match_blocked_msg(md);   /* expects LNET_LOCK held */
-
        LNET_UNLOCK();
        LNET_UNLOCK();
+
+       lnet_drop_delayed_msg_list(&drops, "Bad match");
+       lnet_recv_delayed_msg_list(&matches);
+
        return 0;
 
  failed:
        return 0;
 
  failed:
index d798b19..72d4514 100644 (file)
@@ -77,10 +77,9 @@ LNetMEAttach(unsigned int portal,
              lnet_unlink_t unlink, lnet_ins_pos_t pos,
              lnet_handle_me_t *handle)
 {
              lnet_unlink_t unlink, lnet_ins_pos_t pos,
              lnet_handle_me_t *handle)
 {
+       struct lnet_match_table *mtable;
         lnet_me_t        *me;
         lnet_me_t        *me;
-        lnet_portal_t    *ptl;
         cfs_list_t       *head;
         cfs_list_t       *head;
-        int               rc;
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
@@ -88,9 +87,9 @@ LNetMEAttach(unsigned int portal,
         if ((int)portal >= the_lnet.ln_nportals)
                 return -EINVAL;
 
         if ((int)portal >= the_lnet.ln_nportals)
                 return -EINVAL;
 
-       ptl = the_lnet.ln_portals[portal];
-       rc = lnet_ptl_type_match(ptl, match_id, match_bits, ignore_bits);
-       if (!rc)
+       mtable = lnet_mt_of_attach(portal, match_id,
+                                  match_bits, ignore_bits, pos);
+       if (mtable == NULL) /* can't match portal type */
                return -EPERM;
 
         me = lnet_me_alloc();
                return -EPERM;
 
         me = lnet_me_alloc();
@@ -107,7 +106,7 @@ LNetMEAttach(unsigned int portal,
         me->me_md = NULL;
 
        lnet_res_lh_initialize(&the_lnet.ln_me_container, &me->me_lh);
         me->me_md = NULL;
 
        lnet_res_lh_initialize(&the_lnet.ln_me_container, &me->me_lh);
-       head = lnet_ptl_me_head(portal, match_id, match_bits);
+       head = lnet_mt_match_head(mtable, match_id, match_bits);
         LASSERT (head != NULL);
 
         if (pos == LNET_INS_AFTER)
         LASSERT (head != NULL);
 
         if (pos == LNET_INS_AFTER)
@@ -248,12 +247,15 @@ LNetMEUnlink(lnet_handle_me_t meh)
 void
 lnet_me_unlink(lnet_me_t *me)
 {
 void
 lnet_me_unlink(lnet_me_t *me)
 {
-        cfs_list_del (&me->me_list);
+       cfs_list_del(&me->me_list);
 
 
-        if (me->me_md != NULL) {
-                me->me_md->md_me = NULL;
-                lnet_md_unlink(me->me_md);
-        }
+       if (me->me_md != NULL) {
+               lnet_libmd_t *md = me->me_md;
+
+               /* detach MD from portal of this ME */
+               lnet_ptl_detach_md(me, md);
+               lnet_md_unlink(md);
+       }
 
        lnet_res_lh_invalidate(&me->me_lh);
        lnet_me_free_locked(me);
 
        lnet_res_lh_invalidate(&me->me_lh);
        lnet_me_free_locked(me);
index 3605f4c..ad3d45d 100644 (file)
@@ -712,38 +712,43 @@ lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg)
 }
 
 int
 }
 
 int
-lnet_eager_recv_locked(lnet_msg_t *msg)
+lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
 {
 {
-        lnet_peer_t *peer;
-        lnet_ni_t   *ni;
-        int          rc = 0;
-
-        LASSERT (!msg->msg_delayed);
-        msg->msg_delayed = 1;
-
-        LASSERT (msg->msg_receiving);
-        LASSERT (!msg->msg_sending);
+       int     rc;
+
+       LASSERT(!msg->msg_sending);
+       LASSERT(msg->msg_receiving);
+       LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
+
+       msg->msg_rx_ready_delay = 1;
+       rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
+                                         &msg->msg_private);
+       if (rc != 0) {
+               CERROR("recv from %s / send to %s aborted: "
+                      "eager_recv failed %d\n",
+                      libcfs_nid2str(msg->msg_rxpeer->lp_nid),
+                      libcfs_id2str(msg->msg_target), rc);
+               LASSERT(rc < 0); /* required by my callers */
+       }
 
 
-        peer = msg->msg_rxpeer;
-        ni   = peer->lp_ni;
+       return rc;
+}
 
 
-        if (ni->ni_lnd->lnd_eager_recv != NULL) {
-                LNET_UNLOCK();
+int
+lnet_ni_eager_recv_locked(lnet_ni_t *ni, lnet_msg_t *msg)
+{
+       int     rc;
 
 
-                rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
-                                                  &msg->msg_private);
-                if (rc != 0) {
-                        CERROR("recv from %s / send to %s aborted: "
-                               "eager_recv failed %d\n",
-                               libcfs_nid2str(peer->lp_nid),
-                               libcfs_id2str(msg->msg_target), rc);
-                        LASSERT (rc < 0); /* required by my callers */
-                }
+       if (ni->ni_lnd->lnd_eager_recv == NULL) {
+               msg->msg_rx_ready_delay = 1;
+               return 0;
+       }
 
 
-                LNET_LOCK();
-        }
+       LNET_UNLOCK();
+       rc = lnet_ni_eager_recv(ni, msg);
+       LNET_LOCK();
 
 
-        return rc;
+       return rc;
 }
 
 /* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
 }
 
 /* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
@@ -859,8 +864,8 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send)
         lnet_ni_t   *ni = lp->lp_ni;
 
         /* non-lnet_send() callers have checked before */
         lnet_ni_t   *ni = lp->lp_ni;
 
         /* non-lnet_send() callers have checked before */
-        LASSERT (!do_send || msg->msg_delayed);
-        LASSERT (!msg->msg_receiving);
+       LASSERT(!do_send || msg->msg_tx_delayed);
+       LASSERT(!msg->msg_receiving);
 
         /* NB 'lp' is always the next hop */
         if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
 
         /* NB 'lp' is always the next hop */
         if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
@@ -890,7 +895,7 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send)
                         lp->lp_mintxcredits = lp->lp_txcredits;
 
                 if (lp->lp_txcredits < 0) {
                         lp->lp_mintxcredits = lp->lp_txcredits;
 
                 if (lp->lp_txcredits < 0) {
-                        msg->msg_delayed = 1;
+                       msg->msg_tx_delayed = 1;
                         cfs_list_add_tail(&msg->msg_list, &lp->lp_txq);
                         return EAGAIN;
                 }
                         cfs_list_add_tail(&msg->msg_list, &lp->lp_txq);
                         return EAGAIN;
                 }
@@ -907,7 +912,7 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send)
                         ni->ni_mintxcredits = ni->ni_txcredits;
 
                 if (ni->ni_txcredits < 0) {
                         ni->ni_mintxcredits = ni->ni_txcredits;
 
                 if (ni->ni_txcredits < 0) {
-                        msg->msg_delayed = 1;
+                       msg->msg_tx_delayed = 1;
                         cfs_list_add_tail(&msg->msg_list, &ni->ni_txq);
                         return EAGAIN;
                 }
                         cfs_list_add_tail(&msg->msg_list, &ni->ni_txq);
                         return EAGAIN;
                 }
@@ -954,8 +959,8 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
         LASSERT (msg->msg_receiving);
         LASSERT (!msg->msg_sending);
 
         LASSERT (msg->msg_receiving);
         LASSERT (!msg->msg_sending);
 
-        /* non-lnet_parse callers only send delayed messages */
-        LASSERT (!do_recv || msg->msg_delayed);
+       /* non-lnet_parse callers only receive delayed messages */
+       LASSERT(!do_recv || msg->msg_rx_delayed);
 
         if (!msg->msg_peerrtrcredit) {
                 LASSERT ((lp->lp_rtrcredits < 0) ==
 
         if (!msg->msg_peerrtrcredit) {
                 LASSERT ((lp->lp_rtrcredits < 0) ==
@@ -968,7 +973,8 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
 
                 if (lp->lp_rtrcredits < 0) {
                         /* must have checked eager_recv before here */
 
                 if (lp->lp_rtrcredits < 0) {
                         /* must have checked eager_recv before here */
-                        LASSERT (msg->msg_delayed);
+                       LASSERT(msg->msg_rx_ready_delay);
+                       msg->msg_rx_delayed = 1;
                         cfs_list_add_tail(&msg->msg_list, &lp->lp_rtrq);
                         return EAGAIN;
                 }
                         cfs_list_add_tail(&msg->msg_list, &lp->lp_rtrq);
                         return EAGAIN;
                 }
@@ -987,7 +993,8 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
 
                 if (rbp->rbp_credits < 0) {
                         /* must have checked eager_recv before here */
 
                 if (rbp->rbp_credits < 0) {
                         /* must have checked eager_recv before here */
-                        LASSERT (msg->msg_delayed);
+                       LASSERT(msg->msg_rx_ready_delay);
+                       msg->msg_rx_delayed = 1;
                         cfs_list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
                         return EAGAIN;
                 }
                         cfs_list_add_tail(&msg->msg_list, &rbp->rbp_msgs);
                         return EAGAIN;
                 }
@@ -1031,7 +1038,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
                         cfs_list_del(&msg2->msg_list);
 
                         LASSERT(msg2->msg_txpeer->lp_ni == ni);
                         cfs_list_del(&msg2->msg_list);
 
                         LASSERT(msg2->msg_txpeer->lp_ni == ni);
-                        LASSERT(msg2->msg_delayed);
+                       LASSERT(msg2->msg_tx_delayed);
 
                         (void) lnet_post_send_locked(msg2, 1);
                 }
 
                         (void) lnet_post_send_locked(msg2, 1);
                 }
@@ -1053,8 +1060,8 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
                                               lnet_msg_t, msg_list);
                         cfs_list_del(&msg2->msg_list);
 
                                               lnet_msg_t, msg_list);
                         cfs_list_del(&msg2->msg_list);
 
-                        LASSERT (msg2->msg_txpeer == txpeer);
-                        LASSERT (msg2->msg_delayed);
+                       LASSERT(msg2->msg_txpeer == txpeer);
+                       LASSERT(msg2->msg_tx_delayed);
 
                         (void) lnet_post_send_locked(msg2, 1);
                 }
 
                         (void) lnet_post_send_locked(msg2, 1);
                 }
@@ -1366,7 +1373,7 @@ lnet_recv_put(lnet_ni_t *ni, lnet_msg_t *msg)
        msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
                        (msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
 
        msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
                        (msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
 
-       lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_delayed,
+       lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed,
                     msg->msg_offset, msg->msg_wanted, hdr->payload_length);
 }
 
                     msg->msg_offset, msg->msg_wanted, hdr->payload_length);
 }
 
@@ -1375,11 +1382,9 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
 {
         int               rc;
         int               index;
 {
         int               rc;
         int               index;
-        __u64             version;
         lnet_hdr_t       *hdr = &msg->msg_hdr;
         unsigned int      rlength = hdr->payload_length;
         lnet_process_id_t src= {0};
         lnet_hdr_t       *hdr = &msg->msg_hdr;
         unsigned int      rlength = hdr->payload_length;
         lnet_process_id_t src= {0};
-        lnet_portal_t    *ptl;
 
         src.nid = hdr->src_nid;
         src.pid = hdr->src_pid;
 
         src.nid = hdr->src_nid;
         src.pid = hdr->src_pid;
@@ -1391,46 +1396,27 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
 
         index = hdr->msg.put.ptl_index;
 
 
         index = hdr->msg.put.ptl_index;
 
-        LNET_LOCK();
+       msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
 
  again:
 
  again:
-        rc = lnet_match_md(index, LNET_MD_OP_PUT, src,
-                           rlength, hdr->msg.put.offset,
-                          hdr->msg.put.match_bits, msg);
+       rc = lnet_ptl_match_md(index, LNET_MD_OP_PUT, src,
+                              rlength, hdr->msg.put.offset,
+                              hdr->msg.put.match_bits, msg);
         switch (rc) {
         default:
                 LBUG();
 
         case LNET_MATCHMD_OK:
         switch (rc) {
         default:
                 LBUG();
 
         case LNET_MATCHMD_OK:
-                LNET_UNLOCK();
                lnet_recv_put(ni, msg);
                 return 0;
 
         case LNET_MATCHMD_NONE:
                lnet_recv_put(ni, msg);
                 return 0;
 
         case LNET_MATCHMD_NONE:
-               ptl = the_lnet.ln_portals[index];
-                version = ptl->ptl_ml_version;
-
-                rc = 0;
-                if (!msg->msg_delayed)
-                        rc = lnet_eager_recv_locked(msg);
-
-                if (rc == 0 &&
-                    !the_lnet.ln_shutdown &&
-                   lnet_ptl_is_lazy(ptl)) {
-                        if (version != ptl->ptl_ml_version)
-                                goto again;
-
-                        cfs_list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
-                        ptl->ptl_msgq_version++;
-                        LNET_UNLOCK();
+               if (msg->msg_rx_delayed) /* attached on delayed list */
+                       return 0;
 
 
-                        CDEBUG(D_NET, "Delaying PUT from %s portal %d match "
-                               LPU64" offset %d length %d: no match \n",
-                               libcfs_id2str(src), index,
-                               hdr->msg.put.match_bits,
-                               hdr->msg.put.offset, rlength);
-                        return 0;
-                }
+               rc = lnet_ni_eager_recv(ni, msg);
+               if (rc == 0)
+                       goto again;
                 /* fall through */
 
         case LNET_MATCHMD_DROP:
                 /* fall through */
 
         case LNET_MATCHMD_DROP:
@@ -1439,7 +1425,6 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
                         libcfs_id2str(src), index,
                         hdr->msg.put.match_bits,
                         hdr->msg.put.offset, rlength, rc);
                         libcfs_id2str(src), index,
                         hdr->msg.put.match_bits,
                         hdr->msg.put.offset, rlength, rc);
-                LNET_UNLOCK();
 
                 return ENOENT;          /* +ve: OK but no match */
         }
 
                 return ENOENT;          /* +ve: OK but no match */
         }
@@ -1462,11 +1447,10 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
         hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
         hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
 
         hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
         hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
 
-        LNET_LOCK();
-
-        rc = lnet_match_md(hdr->msg.get.ptl_index, LNET_MD_OP_GET, src,
-                           hdr->msg.get.sink_length, hdr->msg.get.src_offset,
-                          hdr->msg.get.match_bits, msg);
+       rc = lnet_ptl_match_md(hdr->msg.get.ptl_index, LNET_MD_OP_GET, src,
+                              hdr->msg.get.sink_length,
+                              hdr->msg.get.src_offset,
+                              hdr->msg.get.match_bits, msg);
         if (rc == LNET_MATCHMD_DROP) {
                 CNETERR("Dropping GET from %s portal %d match "LPU64
                         " offset %d length %d\n",
         if (rc == LNET_MATCHMD_DROP) {
                 CNETERR("Dropping GET from %s portal %d match "LPU64
                         " offset %d length %d\n",
@@ -1475,14 +1459,11 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
                         hdr->msg.get.match_bits,
                         hdr->msg.get.src_offset,
                         hdr->msg.get.sink_length);
                         hdr->msg.get.match_bits,
                         hdr->msg.get.src_offset,
                         hdr->msg.get.sink_length);
-                LNET_UNLOCK();
                 return ENOENT;                  /* +ve: OK but no match */
         }
 
         LASSERT (rc == LNET_MATCHMD_OK);
 
                 return ENOENT;                  /* +ve: OK but no match */
         }
 
         LASSERT (rc == LNET_MATCHMD_OK);
 
-       LNET_UNLOCK();
-
        lnet_build_msg_event(msg, LNET_EVENT_GET);
 
        reply_wmd = hdr->msg.get.return_wmd;
        lnet_build_msg_event(msg, LNET_EVENT_GET);
 
        reply_wmd = hdr->msg.get.return_wmd;
@@ -1879,7 +1860,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
                 LNET_LOCK();
                 if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
                     lnet_msg2bufpool(msg)->rbp_credits <= 0) {
                 LNET_LOCK();
                 if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
                     lnet_msg2bufpool(msg)->rbp_credits <= 0) {
-                        rc = lnet_eager_recv_locked(msg);
+                       rc = lnet_ni_eager_recv_locked(ni, msg);
                         if (rc != 0) {
                                 LNET_UNLOCK();
                                 goto free_drop;
                         if (rc != 0) {
                                 LNET_UNLOCK();
                                 goto free_drop;
@@ -1949,7 +1930,7 @@ lnet_drop_delayed_msg_list(cfs_list_t *head, char *reason)
                id.pid = msg->msg_hdr.src_pid;
 
                LASSERT(msg->msg_md == NULL);
                id.pid = msg->msg_hdr.src_pid;
 
                LASSERT(msg->msg_md == NULL);
-               LASSERT(msg->msg_delayed);
+               LASSERT(msg->msg_rx_delayed);
                LASSERT(msg->msg_rxpeer != NULL);
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
 
                LASSERT(msg->msg_rxpeer != NULL);
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
 
@@ -1992,7 +1973,7 @@ lnet_recv_delayed_msg_list(cfs_list_t *head)
                id.nid = msg->msg_hdr.src_nid;
                id.pid = msg->msg_hdr.src_pid;
 
                id.nid = msg->msg_hdr.src_nid;
                id.pid = msg->msg_hdr.src_pid;
 
-               LASSERT(msg->msg_delayed);
+               LASSERT(msg->msg_rx_delayed);
                LASSERT(msg->msg_md != NULL);
                LASSERT(msg->msg_rxpeer != NULL);
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
                LASSERT(msg->msg_md != NULL);
                LASSERT(msg->msg_rxpeer != NULL);
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
index 5932ad7..ca24586 100644 (file)
 
 #include <lnet/lib-lnet.h>
 
 
 #include <lnet/lib-lnet.h>
 
-int
-lnet_ptl_type_match(struct lnet_portal *ptl, lnet_process_id_t match_id,
+static int
+lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
                    __u64 mbits, __u64 ignore_bits)
 {
                    __u64 mbits, __u64 ignore_bits)
 {
-       int unique;
+       struct lnet_portal      *ptl = the_lnet.ln_portals[index];
+       int                     unique;
 
        unique = ignore_bits == 0 &&
                 match_id.nid != LNET_NID_ANY &&
 
        unique = ignore_bits == 0 &&
                 match_id.nid != LNET_NID_ANY &&
@@ -158,42 +159,88 @@ lnet_try_match_md(int index, int op_mask, lnet_process_id_t src,
        return LNET_MATCHMD_OK;
 }
 
        return LNET_MATCHMD_OK;
 }
 
+struct lnet_match_table *
+lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
+                 __u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
+{
+       struct lnet_portal *ptl;
+
+       LASSERT(index < the_lnet.ln_nportals);
+
+       if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
+               return NULL;
+
+       ptl = the_lnet.ln_portals[index];
+       /* NB: Now we only have one match-table for each portal,
+        * and will have match-table per CPT in upcoming changes,
+        * ME will be scattered to different match-tables based
+        * on attaching information */
+       return ptl->ptl_mtable;
+}
+
+struct lnet_match_table *
+lnet_mt_of_match(unsigned int index, lnet_process_id_t id, __u64 mbits)
+{
+       struct lnet_portal *ptl;
+
+       LASSERT(index < the_lnet.ln_nportals);
+
+       ptl = the_lnet.ln_portals[index];
+       if (!lnet_ptl_is_unique(ptl) &&
+           !lnet_ptl_is_wildcard(ptl) && !lnet_ptl_is_lazy(ptl))
+               return NULL;
+
+       /* NB: Now we only have one match-table for each portal,
+        * and will have match-table per CPT in upcoming changes,
+        * request will be scattered to different match-tables based
+        * on matching information */
+       return ptl->ptl_mtable;
+}
+
+cfs_list_t *
+lnet_mt_match_head(struct lnet_match_table *mtable,
+                  lnet_process_id_t id, __u64 mbits)
+{
+       struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
+
+       if (lnet_ptl_is_wildcard(ptl)) {
+               return &mtable->mt_mlist;
+
+       } else if (lnet_ptl_is_unique(ptl)) {
+               unsigned long hash = mbits + id.nid + id.pid;
+
+               hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
+               return &mtable->mt_mhash[hash];
+       }
+
+       return NULL;
+}
+
 int
 int
-lnet_match_md(int index, int op_mask, lnet_process_id_t src,
-             unsigned int rlength, unsigned int roffset,
-             __u64 match_bits, lnet_msg_t *msg)
+lnet_mt_match_md(struct lnet_match_table *mtable,
+                int op_mask, lnet_process_id_t src,
+                unsigned int rlength, unsigned int roffset,
+                __u64 match_bits, lnet_msg_t *msg)
 {
 {
-       struct lnet_portal      *ptl = the_lnet.ln_portals[index];
        cfs_list_t              *head;
        lnet_me_t               *me;
        lnet_me_t               *tmp;
        cfs_list_t              *head;
        lnet_me_t               *me;
        lnet_me_t               *tmp;
-       lnet_libmd_t            *md;
        int                     rc;
 
        int                     rc;
 
-       CDEBUG(D_NET, "Request from %s of length %d into portal %d "
-              "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits);
-
-       if (index < 0 || index >= the_lnet.ln_nportals) {
-               CERROR("Invalid portal %d not in [0-%d]\n",
-                      index, the_lnet.ln_nportals);
-               return LNET_MATCHMD_DROP;
-       }
-
-       head = lnet_ptl_me_head(index, src, match_bits);
+       head = lnet_mt_match_head(mtable, src, match_bits);
        if (head == NULL) /* nobody posted anything on this portal */
                goto out;
 
        cfs_list_for_each_entry_safe(me, tmp, head, me_list) {
        if (head == NULL) /* nobody posted anything on this portal */
                goto out;
 
        cfs_list_for_each_entry_safe(me, tmp, head, me_list) {
-               md = me->me_md;
-
                /* ME attached but MD not attached yet */
                /* ME attached but MD not attached yet */
-               if (md == NULL)
+               if (me->me_md == NULL)
                        continue;
 
                        continue;
 
-               LASSERT(me == md->md_me);
+               LASSERT(me == me->me_md->md_me);
 
 
-               rc = lnet_try_match_md(index, op_mask, src, rlength,
-                                      roffset, match_bits, md, msg);
+               rc = lnet_try_match_md(mtable->mt_portal,
+                                      op_mask, src, rlength, roffset,
+                                      match_bits, me->me_md, msg);
                switch (rc) {
                default:
                        LBUG();
                switch (rc) {
                default:
                        LBUG();
@@ -212,43 +259,97 @@ lnet_match_md(int index, int op_mask, lnet_process_id_t src,
 
  out:
        if (op_mask == LNET_MD_OP_GET ||
 
  out:
        if (op_mask == LNET_MD_OP_GET ||
-           !lnet_ptl_is_lazy(ptl))
+           !lnet_ptl_is_lazy(the_lnet.ln_portals[mtable->mt_portal]))
                return LNET_MATCHMD_DROP;
 
        return LNET_MATCHMD_NONE;
 }
 
                return LNET_MATCHMD_DROP;
 
        return LNET_MATCHMD_NONE;
 }
 
-/* called with LNET_LOCK held */
-void
-lnet_match_blocked_msg(lnet_libmd_t *md)
+int
+lnet_ptl_match_md(unsigned int index, int op_mask, lnet_process_id_t src,
+                 unsigned int rlength, unsigned int roffset,
+                 __u64 match_bits, lnet_msg_t *msg)
 {
 {
-       CFS_LIST_HEAD           (drops);
-       CFS_LIST_HEAD           (matches);
-       cfs_list_t              *tmp;
-       cfs_list_t              *entry;
-       lnet_msg_t              *msg;
+       struct lnet_match_table *mtable;
        struct lnet_portal      *ptl;
        struct lnet_portal      *ptl;
-       lnet_me_t               *me  = md->md_me;
+       int                     rc;
 
 
-       LASSERT(me->me_portal < (unsigned int)the_lnet.ln_nportals);
+       CDEBUG(D_NET, "Request from %s of length %d into portal %d "
+              "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits);
 
 
-       ptl = the_lnet.ln_portals[me->me_portal];
-       if (!lnet_ptl_is_lazy(ptl)) {
-               LASSERT(cfs_list_empty(&ptl->ptl_msgq));
-               return;
+       if (index >= the_lnet.ln_nportals) {
+               CERROR("Invalid portal %d not in [0-%d]\n",
+                      index, the_lnet.ln_nportals);
+               return LNET_MATCHMD_DROP;
        }
 
        }
 
+       mtable = lnet_mt_of_match(index, src, match_bits);
+       if (mtable == NULL) {
+               CDEBUG(D_NET, "Drop early message from %s of length %d into "
+                             "portal %d MB="LPX64"\n",
+                             libcfs_id2str(src), rlength, index, match_bits);
+               return LNET_MATCHMD_DROP;
+       }
+
+       ptl = the_lnet.ln_portals[index];
+       LNET_LOCK();
+
+       if (the_lnet.ln_shutdown) {
+               rc =  LNET_MATCHMD_DROP;
+               goto out;
+       }
+
+       rc = lnet_mt_match_md(mtable, op_mask, src, rlength,
+                             roffset, match_bits, msg);
+       if (rc != LNET_MATCHMD_NONE) /* matched or dropping */
+               goto out;
+
+       if (!msg->msg_rx_ready_delay)
+               goto out;
+
+       LASSERT(!msg->msg_rx_delayed);
+       msg->msg_rx_delayed = 1;
+       cfs_list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
+
+       CDEBUG(D_NET,
+              "Delaying %s from %s portal %d MB "LPX64" offset %d len %d\n",
+              op_mask == LNET_MD_OP_PUT ? "PUT" : "GET",
+              libcfs_id2str(src), index, match_bits, roffset, rlength);
+ out:
+       LNET_UNLOCK();
+       return rc;
+}
+
+void
+lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
+{
+       LASSERT(me->me_md == md && md->md_me == me);
+
+       me->me_md = NULL;
+       md->md_me = NULL;
+}
+
+/* called with LNET_LOCK held */
+void
+lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
+                  cfs_list_t *matches, cfs_list_t *drops)
+{
+       struct lnet_portal      *ptl = the_lnet.ln_portals[me->me_portal];
+       lnet_msg_t              *tmp;
+       lnet_msg_t              *msg;
+
        LASSERT(md->md_refcount == 0); /* a brand new MD */
 
        LASSERT(md->md_refcount == 0); /* a brand new MD */
 
-       cfs_list_for_each_safe(entry, tmp, &ptl->ptl_msgq) {
+       me->me_md = md;
+       md->md_me = me;
+
+       cfs_list_for_each_entry_safe(msg, tmp, &ptl->ptl_msgq, msg_list) {
                int               rc;
                int               index;
                lnet_hdr_t       *hdr;
                lnet_process_id_t src;
 
                int               rc;
                int               index;
                lnet_hdr_t       *hdr;
                lnet_process_id_t src;
 
-               msg = cfs_list_entry(entry, lnet_msg_t, msg_list);
-
-               LASSERT(msg->msg_delayed);
+               LASSERT(msg->msg_rx_delayed);
 
                hdr   = &msg->msg_hdr;
                index = hdr->msg.put.ptl_index;
 
                hdr   = &msg->msg_hdr;
                index = hdr->msg.put.ptl_index;
@@ -266,10 +367,9 @@ lnet_match_blocked_msg(lnet_libmd_t *md)
 
                /* Hurrah! This _is_ a match */
                cfs_list_del(&msg->msg_list);
 
                /* Hurrah! This _is_ a match */
                cfs_list_del(&msg->msg_list);
-               ptl->ptl_msgq_version++;
 
                if (rc == LNET_MATCHMD_OK) {
 
                if (rc == LNET_MATCHMD_OK) {
-                       cfs_list_add_tail(&msg->msg_list, &matches);
+                       cfs_list_add_tail(&msg->msg_list, matches);
 
                        CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
                               "match "LPU64" offset %d length %d.\n",
 
                        CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
                               "match "LPU64" offset %d length %d.\n",
@@ -281,79 +381,98 @@ lnet_match_blocked_msg(lnet_libmd_t *md)
                } else {
                        LASSERT(rc == LNET_MATCHMD_DROP);
 
                } else {
                        LASSERT(rc == LNET_MATCHMD_DROP);
 
-                       cfs_list_add_tail(&msg->msg_list, &drops);
+                       cfs_list_add_tail(&msg->msg_list, drops);
                }
 
                if (lnet_md_exhausted(md))
                        break;
        }
                }
 
                if (lnet_md_exhausted(md))
                        break;
        }
-
-       LNET_UNLOCK();
-
-       lnet_drop_delayed_msg_list(&drops, "Bad match");
-       lnet_recv_delayed_msg_list(&matches);
-
-       LNET_LOCK();
 }
 
 void
 lnet_ptl_cleanup(struct lnet_portal *ptl)
 {
 }
 
 void
 lnet_ptl_cleanup(struct lnet_portal *ptl)
 {
-       lnet_me_t               *me;
-       int                     j;
+       struct lnet_match_table *mtable;
 
        LASSERT(cfs_list_empty(&ptl->ptl_msgq));
 
        LASSERT(cfs_list_empty(&ptl->ptl_msgq));
-       LASSERT(cfs_list_empty(&ptl->ptl_mlist));
 
 
-       if (ptl->ptl_mhash == NULL) /* uninitialized portal */
+       if (ptl->ptl_mtable == NULL) /* uninitialized portal */
                return;
 
                return;
 
-       /* cleanup ME */
-       while (!cfs_list_empty(&ptl->ptl_mlist)) {
-               me = cfs_list_entry(ptl->ptl_mlist.next,
-                                   lnet_me_t, me_list);
-               CERROR("Active wildcard ME %p on exit\n", me);
-               cfs_list_del(&me->me_list);
-               lnet_me_free(me);
-       }
+       do { /* iterate over match-tables when we have percpt match-table */
+               cfs_list_t      *mhash;
+               lnet_me_t       *me;
+               int             j;
+
+               mtable = ptl->ptl_mtable;
+
+               if (mtable->mt_mhash == NULL) /* uninitialized match-table */
+                       continue;
 
 
-       for (j = 0; j < LNET_PORTAL_HASH_SIZE; j++) {
-               while (!cfs_list_empty(&ptl->ptl_mhash[j])) {
-                       me = cfs_list_entry(ptl->ptl_mhash[j].next,
-                                      lnet_me_t, me_list);
-                       CERROR("Active unique ME %p on exit\n", me);
+               mhash = mtable->mt_mhash;
+               /* cleanup ME */
+               while (!cfs_list_empty(&mtable->mt_mlist)) {
+                       me = cfs_list_entry(mtable->mt_mlist.next,
+                                           lnet_me_t, me_list);
+                       CERROR("Active wildcard ME %p on exit\n", me);
                        cfs_list_del(&me->me_list);
                        lnet_me_free(me);
                }
                        cfs_list_del(&me->me_list);
                        lnet_me_free(me);
                }
-       }
 
 
-       LIBCFS_FREE(ptl->ptl_mhash,
-                   LNET_PORTAL_HASH_SIZE * sizeof(ptl->ptl_mhash[0]));
-       ptl->ptl_mhash = NULL; /* mark it as finalized */
+               for (j = 0; j < LNET_MT_HASH_SIZE; j++) {
+                       while (!cfs_list_empty(&mhash[j])) {
+                               me = cfs_list_entry(mhash[j].next,
+                                                   lnet_me_t, me_list);
+                               CERROR("Active unique ME %p on exit\n", me);
+                               cfs_list_del(&me->me_list);
+                               lnet_me_free(me);
+                       }
+               }
+
+               LIBCFS_FREE(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
+       } while (0);
+
+       LIBCFS_FREE(ptl->ptl_mtable, sizeof(*mtable));
+       ptl->ptl_mtable = NULL;
 }
 
 int
 lnet_ptl_setup(struct lnet_portal *ptl, int index)
 {
 }
 
 int
 lnet_ptl_setup(struct lnet_portal *ptl, int index)
 {
+       struct lnet_match_table *mtable;
        cfs_list_t              *mhash;
        cfs_list_t              *mhash;
-       int                     i;
+       int                     j;
 
        ptl->ptl_index = index;
        CFS_INIT_LIST_HEAD(&ptl->ptl_msgq);
 
        ptl->ptl_index = index;
        CFS_INIT_LIST_HEAD(&ptl->ptl_msgq);
-       CFS_INIT_LIST_HEAD(&ptl->ptl_mlist);
 
 
-       LIBCFS_ALLOC(mhash, sizeof(*mhash) * LNET_PORTAL_HASH_SIZE);
-       if (mhash == NULL) {
+       LIBCFS_ALLOC(mtable, sizeof(*mtable));
+       if (mtable == NULL) {
                CERROR("Failed to create match table for portal %d\n", index);
                return -ENOMEM;
        }
 
                CERROR("Failed to create match table for portal %d\n", index);
                return -ENOMEM;
        }
 
-       for (i = 0; i < LNET_PORTAL_HASH_SIZE; i++)
-               CFS_INIT_LIST_HEAD(&mhash[i]);
+       ptl->ptl_mtable = mtable;
+       do { /* iterate over match-tables when we have percpt match-table */
+               LIBCFS_ALLOC(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE);
+               if (mhash == NULL) {
+                       CERROR("Failed to create match hash for portal %d\n",
+                              index);
+                       goto failed;
+               }
+
+               mtable->mt_mhash = mhash;
+               for (j = 0; j < LNET_MT_HASH_SIZE; j++)
+                       CFS_INIT_LIST_HEAD(&mhash[j]);
 
 
-       ptl->ptl_mhash = mhash; /* initialized */
+               CFS_INIT_LIST_HEAD(&mtable->mt_mlist);
+               mtable->mt_portal = index;
+       } while (0);
 
        return 0;
 
        return 0;
+ failed:
+       lnet_ptl_cleanup(ptl);
+       return -ENOMEM;
 }
 
 void
 }
 
 void
@@ -477,7 +596,6 @@ LNetClearLazyPortal(int portal)
        /* grab all the blocked messages atomically */
        cfs_list_splice_init(&ptl->ptl_msgq, &zombies);
 
        /* grab all the blocked messages atomically */
        cfs_list_splice_init(&ptl->ptl_msgq, &zombies);
 
-       ptl->ptl_msgq_version++;
        lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
 
        LNET_UNLOCK();
        lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
 
        LNET_UNLOCK();