From db49fbf00d24edc8347006241f314fb0f82f1b63 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Fri, 17 Apr 2020 11:27:54 +1000 Subject: [PATCH] LU-10391 lnet: switch to large lnet_processid for matching Change lnet_libhandle.me_match_id and lnet_match_info.mi_id to struct lnet_processid, so they support large nids. This requires changing LNetMEAttach(), lnet_mt_match_head(), lnet_mt_of_attach(), lnet_ptl_match_type(), lnet_match2mt() to accept a pointer to lnet_processid rather than an lnet_process_id. Test-Parameters: trivial Test-Parameters: serverversion=2.12 serverdistro=el7.9 testlist=runtests Test-Parameters: clientversion=2.12 testlist=runtests Signed-off-by: Mr NeilBrown Change-Id: I6957b467bb9af84e20a4525db6351694f4d2a7af Reviewed-on: https://review.whamcloud.com/43597 Reviewed-by: James Simmons Reviewed-by: Chris Horn Tested-by: jenkins Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- lnet/include/lnet/api.h | 2 +- lnet/include/lnet/lib-lnet.h | 4 ++-- lnet/include/lnet/lib-types.h | 4 ++-- lnet/lnet/api-ni.c | 10 +++++----- lnet/lnet/lib-me.c | 4 ++-- lnet/lnet/lib-move.c | 10 +++++----- lnet/lnet/lib-ptl.c | 45 +++++++++++++++++++++++-------------------- lnet/selftest/rpc.c | 10 +++++++--- lustre/ptlrpc/niobuf.c | 21 ++++++++++++-------- 9 files changed, 61 insertions(+), 49 deletions(-) diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h index 8871124..c139be1 100644 --- a/lnet/include/lnet/api.h +++ b/lnet/include/lnet/api.h @@ -96,7 +96,7 @@ bool LNetIsPeerLocal(lnet_nid_t nid); * @{ */ struct lnet_me * LNetMEAttach(unsigned int portal, - struct lnet_process_id match_id_in, + struct lnet_processid *match_id_in, __u64 match_bits_in, __u64 ignore_bits_in, enum lnet_unlink unlink_in, diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 2c5b004..d5334c0 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -671,9 +671,9 @@ lnet_ptl_unsetopt(struct lnet_portal *ptl, int opt) /* match-table functions */ struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable, - struct lnet_process_id id, __u64 mbits); + struct lnet_processid *id, __u64 mbits); struct lnet_match_table *lnet_mt_of_attach(unsigned int index, - struct lnet_process_id id, + struct lnet_processid *id, __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos); int lnet_mt_match_md(struct lnet_match_table *mtable, diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 9150b74..45a420f 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -190,7 +190,7 @@ struct lnet_libhandle { struct lnet_me { struct list_head me_list; int me_cpt; - struct lnet_process_id me_match_id; + struct lnet_processid me_match_id; unsigned int me_portal; unsigned int me_pos; /* hash offset in mt_hash */ __u64 me_match_bits; @@ -959,7 +959,7 @@ enum lnet_match_flags { /* parameter for matching operations (GET, PUT) */ struct lnet_match_info { __u64 mi_mbits; - struct lnet_process_id mi_id; + struct lnet_processid mi_id; unsigned int mi_cpt; unsigned int mi_opc; unsigned int mi_portal; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index f86a31f..c4d6e6c 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1918,8 +1918,8 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf, struct lnet_handle_md *ping_mdh, int ni_count, bool set_eq) { - struct lnet_process_id id = { - .nid = LNET_NID_ANY, + struct lnet_processid id = { + .nid = LNET_ANY_NID, .pid = LNET_PID_ANY }; struct lnet_me *me; @@ -1937,7 +1937,7 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf, } /* Ping target ME/MD */ - me = LNetMEAttach(LNET_RESERVED_PORTAL, id, + me = LNetMEAttach(LNET_RESERVED_PORTAL, &id, LNET_PROTO_PING_MATCHBITS, 0, LNET_UNLINK, LNET_INS_AFTER); if (IS_ERR(me)) { @@ -2135,12 +2135,12 @@ again: int lnet_push_target_post(struct lnet_ping_buffer *pbuf, struct lnet_handle_md *mdhp) { - struct lnet_process_id id = { LNET_NID_ANY, LNET_PID_ANY }; + struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY }; struct lnet_md md = { NULL }; struct lnet_me *me; int rc; - me = LNetMEAttach(LNET_RESERVED_PORTAL, id, + me = LNetMEAttach(LNET_RESERVED_PORTAL, &id, LNET_PROTO_PING_MATCHBITS, 0, LNET_UNLINK, LNET_INS_AFTER); if (IS_ERR(me)) { diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index b4231d0..8d7c9ee 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -66,7 +66,7 @@ */ struct lnet_me * LNetMEAttach(unsigned int portal, - struct lnet_process_id match_id, + struct lnet_processid *match_id, __u64 match_bits, __u64 ignore_bits, enum lnet_unlink unlink, enum lnet_ins_pos pos) { @@ -94,7 +94,7 @@ LNetMEAttach(unsigned int portal, lnet_res_lock(mtable->mt_cpt); me->me_portal = portal; - me->me_match_id = match_id; + me->me_match_id = *match_id; me->me_match_bits = match_bits; me->me_ignore_bits = ignore_bits; me->me_unlink = unlink; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 0b3c922..3259946 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -4165,7 +4165,7 @@ lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg) hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); /* Primary peer NID. */ - info.mi_id.nid = msg->msg_initiator; + lnet_nid4_to_nid(msg->msg_initiator, &info.mi_id.nid); info.mi_id.pid = hdr->src_pid; info.mi_opc = LNET_MD_OP_PUT; info.mi_portal = hdr->msg.put.ptl_index; @@ -4203,7 +4203,7 @@ lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg) case LNET_MATCHMD_DROP: CNETERR("Dropping PUT from %s portal %d match %llu" " offset %d length %d: %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, + libcfs_idstr(&info.mi_id), info.mi_portal, info.mi_mbits, info.mi_roffset, info.mi_rlength, rc); return -ENOENT; /* -ve: OK but no match */ @@ -4228,7 +4228,7 @@ lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) source_id.nid = hdr->src_nid; source_id.pid = hdr->src_pid; /* Primary peer NID */ - info.mi_id.nid = msg->msg_initiator; + lnet_nid4_to_nid(msg->msg_initiator, &info.mi_id.nid); info.mi_id.pid = hdr->src_pid; info.mi_opc = LNET_MD_OP_GET; info.mi_portal = hdr->msg.get.ptl_index; @@ -4241,7 +4241,7 @@ lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) if (rc == LNET_MATCHMD_DROP) { CNETERR("Dropping GET from %s portal %d match %llu" " offset %d length %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, + libcfs_idstr(&info.mi_id), info.mi_portal, info.mi_mbits, info.mi_roffset, info.mi_rlength); return -ENOENT; /* -ve: OK but no match */ } @@ -4273,7 +4273,7 @@ lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) /* didn't get as far as lnet_ni_send() */ CERROR("%s: Unable to send REPLY for GET from %s: %d\n", libcfs_nidstr(&ni->ni_nid), - libcfs_id2str(info.mi_id), rc); + libcfs_idstr(&info.mi_id), rc); lnet_finalize(msg, rc); } diff --git a/lnet/lnet/lib-ptl.c b/lnet/lnet/lib-ptl.c index 10238a4..9392e0e 100644 --- a/lnet/lnet/lib-ptl.c +++ b/lnet/lnet/lib-ptl.c @@ -43,15 +43,15 @@ module_param(portal_rotor, int, 0644); MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions"); static int -lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id, +lnet_ptl_match_type(unsigned int index, struct lnet_processid *match_id, __u64 mbits, __u64 ignore_bits) { struct lnet_portal *ptl = the_lnet.ln_portals[index]; int unique; - unique = ignore_bits == 0 && - match_id.nid != LNET_NID_ANY && - match_id.pid != LNET_PID_ANY; + unique = (ignore_bits == 0 && + !LNET_NID_IS_ANY(&match_id->nid) && + match_id->pid != LNET_PID_ANY); LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl)); @@ -153,8 +153,8 @@ lnet_try_match_md(struct lnet_libmd *md, return LNET_MATCHMD_NONE; /* mismatched ME nid/pid? */ - if (me->me_match_id.nid != LNET_NID_ANY && - me->me_match_id.nid != info->mi_id.nid) + if (!LNET_NID_IS_ANY(&me->me_match_id.nid) && + !nid_same(&me->me_match_id.nid, &info->mi_id.nid)) return LNET_MATCHMD_NONE; if (me->me_match_id.pid != LNET_PID_ANY && @@ -185,7 +185,7 @@ lnet_try_match_md(struct lnet_libmd *md, /* this packet _really_ is too big */ CERROR("Matching packet from %s, match %llu" " length %d too big: %d left, %d allowed\n", - libcfs_id2str(info->mi_id), info->mi_mbits, + libcfs_idstr(&info->mi_id), info->mi_mbits, info->mi_rlength, md->md_length - offset, mlength); return LNET_MATCHMD_DROP; @@ -195,7 +195,7 @@ lnet_try_match_md(struct lnet_libmd *md, CDEBUG(D_NET, "Incoming %s index %x from %s of " "length %d/%d into md %#llx [%d] + %d\n", (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get", - info->mi_portal, libcfs_id2str(info->mi_id), mlength, + info->mi_portal, libcfs_idstr(&info->mi_id), mlength, info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset); lnet_msg_attach_md(msg, md, offset, mlength); @@ -214,18 +214,18 @@ lnet_try_match_md(struct lnet_libmd *md, } static struct lnet_match_table * -lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits) +lnet_match2mt(struct lnet_portal *ptl, struct lnet_processid *id, __u64 mbits) { if (LNET_CPT_NUMBER == 1) return ptl->ptl_mtables[0]; /* the only one */ /* if it's a unique portal, return match-table hashed by NID */ return lnet_ptl_is_unique(ptl) ? - ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL; + ptl->ptl_mtables[lnet_nid2cpt(&id->nid, NULL)] : NULL; } struct lnet_match_table * -lnet_mt_of_attach(unsigned int index, struct lnet_process_id id, +lnet_mt_of_attach(unsigned int index, struct lnet_processid *id, __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos) { struct lnet_portal *ptl; @@ -274,7 +274,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)); - mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits); + mtable = lnet_match2mt(ptl, &info->mi_id, info->mi_mbits); if (mtable != NULL) return mtable; @@ -355,14 +355,14 @@ lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted) struct list_head * lnet_mt_match_head(struct lnet_match_table *mtable, - struct lnet_process_id id, __u64 mbits) + struct lnet_processid *id, __u64 mbits) { struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal]; if (lnet_ptl_is_wildcard(ptl)) { return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK]; } else { - unsigned long hash = mbits + id.nid + id.pid; + unsigned long hash = mbits + nidhash(&id->nid) + id->pid; LASSERT(lnet_ptl_is_unique(ptl)); hash = hash_long(hash, LNET_MT_HASH_BITS); @@ -384,7 +384,8 @@ lnet_mt_match_md(struct lnet_match_table *mtable, if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE])) head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; else - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); + head = lnet_mt_match_head(mtable, &info->mi_id, + info->mi_mbits); again: /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */ if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) @@ -415,7 +416,8 @@ lnet_mt_match_md(struct lnet_match_table *mtable, } if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); + head = lnet_mt_match_head(mtable, &info->mi_id, + info->mi_mbits); goto again; /* re-check MEs w/o ignore-bits */ } @@ -565,8 +567,9 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) struct lnet_portal *ptl; int rc; - CDEBUG(D_NET, "Request from %s of length %d into portal %d " - "MB=%#llx\n", libcfs_id2str(info->mi_id), + CDEBUG(D_NET, + "Request from %s of length %d into portal %d MB=%#llx\n", + libcfs_idstr(&info->mi_id), info->mi_rlength, info->mi_portal, info->mi_mbits); if (info->mi_portal >= the_lnet.ln_nportals) { @@ -624,7 +627,7 @@ lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) CDEBUG(D_NET, "Delaying %s from %s ptl %d MB %#llx off %d len %d\n", info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET", - libcfs_id2str(info->mi_id), info->mi_portal, + libcfs_idstr(&info->mi_id), info->mi_portal, info->mi_mbits, info->mi_roffset, info->mi_rlength); } goto out0; @@ -682,7 +685,7 @@ lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md, hdr = &msg->msg_hdr; /* Multi-Rail: Primary peer NID */ - info.mi_id.nid = msg->msg_initiator; + lnet_nid4_to_nid(msg->msg_initiator, &info.mi_id.nid); info.mi_id.pid = hdr->src_pid; info.mi_opc = LNET_MD_OP_PUT; info.mi_portal = hdr->msg.put.ptl_index; @@ -715,7 +718,7 @@ lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md, CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d " "match %llu offset %d length %d.\n", - libcfs_id2str(info.mi_id), + libcfs_idstr(&info.mi_id), info.mi_portal, info.mi_mbits, info.mi_roffset, info.mi_rlength); } else { diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c index 8ed2d0f..85288d3 100644 --- a/lnet/selftest/rpc.c +++ b/lnet/selftest/rpc.c @@ -351,14 +351,18 @@ srpc_remove_service(struct srpc_service *sv) static int srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, - int len, int options, struct lnet_process_id peer, + int len, int options, struct lnet_process_id peer4, struct lnet_handle_md *mdh, struct srpc_event *ev) { int rc; struct lnet_md md; struct lnet_me *me; + struct lnet_processid peer; - me = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK, + peer.pid = peer4.pid; + lnet_nid4_to_nid(peer4.nid, &peer.nid); + + me = LNetMEAttach(portal, &peer, matchbits, 0, LNET_UNLINK, local ? LNET_INS_LOCAL : LNET_INS_AFTER); if (IS_ERR(me)) { rc = PTR_ERR(me); @@ -384,7 +388,7 @@ srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); + libcfs_id2str(peer4), portal, matchbits); return 0; } diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index eea686e..bd8f0d4 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -314,7 +314,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; - struct lnet_process_id peer; + struct lnet_processid peer; int rc = 0; int posted_md; int total_md; @@ -344,7 +344,9 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) desc->bd_failure = 0; - peer = desc->bd_import->imp_connection->c_peer; + peer.pid = desc->bd_import->imp_connection->c_peer.pid; + lnet_nid4_to_nid(desc->bd_import->imp_connection->c_peer.nid, + &peer.nid); LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); @@ -380,7 +382,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_ATTACH)) { rc = -ENOMEM; } else { - me = LNetMEAttach(desc->bd_portal, peer, mbits, 0, + me = LNetMEAttach(desc->bd_portal, &peer, mbits, 0, LNET_UNLINK, LNET_INS_AFTER); rc = PTR_ERR_OR_ZERO(me); } @@ -419,7 +421,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) /* Holler if peer manages to touch buffers before he knows the mbits */ if (desc->bd_refs != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", - desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), + desc->bd_import->imp_obd->obd_name, libcfs_idstr(&peer), total_md - desc->bd_refs); spin_unlock(&desc->bd_lock); @@ -706,6 +708,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) int mpflag = 0; bool rep_mbits = false; struct lnet_handle_md bulk_cookie; + struct lnet_processid peer; struct ptlrpc_connection *connection; struct lnet_me *reply_me = NULL; struct lnet_md reply_md; @@ -845,12 +848,14 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_repmsg = NULL; } + peer.pid = connection->c_peer.pid; + lnet_nid4_to_nid(connection->c_peer.nid, &peer.nid); if (request->rq_bulk && OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH)) { reply_me = ERR_PTR(-ENOMEM); } else { reply_me = LNetMEAttach(request->rq_reply_portal, - connection->c_peer, + &peer, rep_mbits ? request->rq_mbits : request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER); @@ -973,8 +978,8 @@ EXPORT_SYMBOL(ptl_send_rpc); int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) { struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service; - static struct lnet_process_id match_id = { - .nid = LNET_NID_ANY, + static struct lnet_processid match_id = { + .nid = LNET_ANY_NID, .pid = LNET_PID_ANY }; int rc; @@ -991,7 +996,7 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) * which means buffer can only be attached on local CPT, and LND * threads can find it by grabbing a local lock */ me = LNetMEAttach(service->srv_req_portal, - match_id, 0, ~0, LNET_UNLINK, + &match_id, 0, ~0, LNET_UNLINK, rqbd->rqbd_svcpt->scp_cpt >= 0 ? LNET_INS_LOCAL : LNET_INS_AFTER); if (IS_ERR(me)) { -- 1.8.3.1