From 64a677269f5262702bd0e9c9de7378bd5b256bd0 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Thu, 12 Jul 2012 00:47:01 +0800 Subject: [PATCH 1/1] LU-1622 lnet: hash MEs on wildcard portal This patch addressed a few more issues in BZ21619. One issue is, we should always attach ME with ignore-bits on a list instead of hash-table, no matter it's wildcard portal or unique portal, because message could match buffers with various match-bits if they also had ignore-bits, which means if user set both match-bits and ignore-bits for MEs on unique portal, incoming message could never be able to find them because they only search MEs hashed by match-bits even those MEs with different match-bits can also fit them after ignoring some bits. the reason that nobody complained about it is because Lustre doesn't have such use-case(posting ME with ignore-bits on unique portal). The second issue fixed by this patch is, If multiple services share one portal but use match-bits to identify different buffer types, we still have performance issue because all buffers are attached on one list and searching for typed buffer could be long and expensive. So we should hash buffers on wildcard portal as well. This is the reason that LNet selftest performance is not so good if we mixed PING test and BRW test, because PING and BRW are sharing on portal, so request buffers for them are attached on the same list, BRW requests are processed much slower than PING requests, so there will be a long search for request buffer of PING. Signed-off-by: Liang Zhen Change-Id: I0d2c6330dd231d369e2a86ced2b8374c0c96dbf9 Reviewed-on: http://review.whamcloud.com/3376 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-types.h | 16 ++++++- lnet/lnet/lib-me.c | 7 ++- lnet/lnet/lib-ptl.c | 101 ++++++++++++++++++++++++++++++++---------- 3 files changed, 98 insertions(+), 26 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index f4ef8ff..4d3d614 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -260,6 +260,7 @@ typedef struct lnet_me { lnet_libhandle_t me_lh; lnet_process_id_t me_match_id; unsigned int me_portal; + unsigned int me_pos; /* hash offset in mt_hash */ __u64 me_match_bits; __u64 me_ignore_bits; lnet_unlink_t me_unlink; @@ -601,6 +602,16 @@ struct lnet_match_info { /* ME hash of RDMA portal */ #define LNET_MT_HASH_BITS 8 #define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) +#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) +/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, + * the last entry is reserved for MEs with ignore-bits */ +#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE +/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which + * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the + * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */ +#define LNET_MT_BITS_U64 6 /* 2^6 bits */ +#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) +#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) /* portal match table */ struct lnet_match_table { @@ -608,9 +619,10 @@ struct lnet_match_table { unsigned int mt_cpt; unsigned int mt_portal; /* portal index */ /* match table is set as "enabled" if there's non-exhausted MD - * attached on mt_mlist, it's only valide for wildcard portal */ + * attached on mt_mhash, it's only valide for wildcard portal */ unsigned int mt_enabled; - cfs_list_t mt_mlist; /* matching list */ + /* bitmap to flag whether MEs on mt_hash are exhausted or not */ + __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; cfs_list_t *mt_mhash; /* matching hash */ }; diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index f203e30..8df9917 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -107,8 +107,12 @@ LNetMEAttach(unsigned int portal, lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt], &me->me_lh); - head = lnet_mt_match_head(mtable, match_id, match_bits); + if (ignore_bits != 0) + head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; + else + head = lnet_mt_match_head(mtable, match_id, match_bits); + me->me_pos = head - &mtable->mt_mhash[0]; if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL) cfs_list_add_tail(&me->me_list, head); else @@ -182,6 +186,7 @@ LNetMEInsert(lnet_handle_me_t current_meh, return -EPERM; } + new_me->me_pos = current_me->me_pos; new_me->me_portal = current_me->me_portal; new_me->me_match_id = match_id; new_me->me_match_bits = match_bits; diff --git a/lnet/lnet/lib-ptl.c b/lnet/lnet/lib-ptl.c index d68c4fc..527a28a 100644 --- a/lnet/lnet/lib-ptl.c +++ b/lnet/lnet/lib-ptl.c @@ -311,6 +311,49 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) return ptl->ptl_mtables[cpt]; } +static int +lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos) +{ + __u64 *bmap; + int i; + + if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) + return 0; + + if (pos < 0) { /* check all bits */ + for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) { + if (mtable->mt_exhausted[i] != (__u64)(-1)) + return 0; + } + return 1; + } + + LASSERT(pos <= LNET_MT_HASH_IGNORE); + /* mtable::mt_mhash[pos] is marked as exhausted or not */ + bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; + pos &= (1 << LNET_MT_BITS_U64) - 1; + + return ((*bmap) & (1ULL << pos)) != 0; +} + +static void +lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted) +{ + __u64 *bmap; + + LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])); + LASSERT(pos <= LNET_MT_HASH_IGNORE); + + /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */ + bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; + pos &= (1 << LNET_MT_BITS_U64) - 1; + + if (!exhausted) + *bmap &= ~(1ULL << pos); + else + *bmap |= 1ULL << pos; +} + cfs_list_t * lnet_mt_match_head(struct lnet_match_table *mtable, lnet_process_id_t id, __u64 mbits) @@ -318,8 +361,7 @@ lnet_mt_match_head(struct lnet_match_table *mtable, struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal]; if (lnet_ptl_is_wildcard(ptl)) { - return &mtable->mt_mlist; - + return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK]; } else { unsigned long hash = mbits + id.nid + id.pid; @@ -339,12 +381,16 @@ lnet_mt_match_md(struct lnet_match_table *mtable, int exhausted = 0; int rc; - /* NB: only wildcard portal can return LNET_MATCHMD_EXHAUSTED */ + /* any ME with ignore bits? */ + if (!cfs_list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE])) + head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; + else + head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); + again: + /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */ if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) exhausted = LNET_MATCHMD_EXHAUSTED; - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); - cfs_list_for_each_entry_safe(me, tmp, head, me_list) { /* ME attached but MD not attached yet */ if (me->me_md == NULL) @@ -363,6 +409,17 @@ lnet_mt_match_md(struct lnet_match_table *mtable, } } + if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */ + lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1); + if (!lnet_mt_test_exhausted(mtable, -1)) + exhausted = 0; + } + + if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { + head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); + goto again; /* re-check MEs w/o ignore-bits */ + } + if (info->mi_opc == LNET_MD_OP_GET || !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal])) return LNET_MATCHMD_DROP | exhausted; @@ -585,7 +642,7 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, if (cfs_list_empty(&ptl->ptl_msg_stealing) && cfs_list_empty(&ptl->ptl_msg_delayed) && - mtable->mt_enabled) + !lnet_mt_test_exhausted(mtable, me->me_pos)) return; lnet_ptl_lock(ptl); @@ -648,8 +705,11 @@ lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, goto again; } - if (lnet_ptl_is_wildcard(ptl) && !exhausted && !mtable->mt_enabled) - lnet_ptl_enable_mt(ptl, cpt); + if (lnet_ptl_is_wildcard(ptl) && !exhausted) { + lnet_mt_set_exhausted(mtable, me->me_pos, 0); + if (!mtable->mt_enabled) + lnet_ptl_enable_mt(ptl, cpt); + } lnet_ptl_unlock(ptl); } @@ -680,25 +740,17 @@ lnet_ptl_cleanup(struct lnet_portal *ptl) mhash = mtable->mt_mhash; /* cleanup ME */ - while (!cfs_list_empty(&mtable->mt_mlist)) { - me = cfs_list_entry(mtable->mt_mlist.next, - lnet_me_t, me_list); - CERROR("Active wildcard ME %p on exit\n", me); - cfs_list_del(&me->me_list); - lnet_me_free(me); - } - - for (j = 0; j < LNET_MT_HASH_SIZE; j++) { + for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) { while (!cfs_list_empty(&mhash[j])) { me = cfs_list_entry(mhash[j].next, lnet_me_t, me_list); - CERROR("Active unique ME %p on exit\n", me); + CERROR("Active ME %p on exit\n", me); cfs_list_del(&me->me_list); lnet_me_free(me); } } - - LIBCFS_FREE(mhash, sizeof(*mhash) * LNET_MT_HASH_SIZE); + /* the extra entry is for MEs with ignore bits */ + LIBCFS_FREE(mhash, sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1)); } cfs_percpt_free(ptl->ptl_mtables); @@ -731,19 +783,22 @@ lnet_ptl_setup(struct lnet_portal *ptl, int index) # endif #endif cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) { + /* the extra entry is for MEs with ignore bits */ LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i, - sizeof(*mhash) * LNET_MT_HASH_SIZE); + sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1)); if (mhash == NULL) { CERROR("Failed to create match hash for portal %d\n", index); goto failed; } + memset(&mtable->mt_exhausted[0], -1, + sizeof(mtable->mt_exhausted[0]) * + LNET_MT_EXHAUSTED_BMAP); mtable->mt_mhash = mhash; - for (j = 0; j < LNET_MT_HASH_SIZE; j++) + for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) CFS_INIT_LIST_HEAD(&mhash[j]); - CFS_INIT_LIST_HEAD(&mtable->mt_mlist); mtable->mt_portal = index; mtable->mt_cpt = i; } -- 1.8.3.1