From 8f819877be20240028bc34eb5173e6dd90acdd29 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Thu, 8 May 2025 22:07:37 +0300 Subject: [PATCH] LU-18986 mgs: server part of new registration protocol Rework mgs_target_reg() to handle new protocol along with old one for older targets It handles old protocol with NIDs either in mti_nids or in mti_nidlist[], and new protocol with NIDs in mtn_inline_list[] or bulk All NIDs are put in mti_nidlist[] as result of request processing, so that eliminates need in extra changes in further code path Test-Parameters: testlist=runtests ossversion=EXA6.3.2 Signed-off-by: Mikhail Pershin Change-Id: I41dd487c37136e24328914e33c9ce056be013aae Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/59206 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/include/uapi/linux/lustre/lustre_idl.h | 1 + lustre/mgs/mgs_handler.c | 118 +++++++++++++++++++++----- lustre/ptlrpc/client.c | 4 +- 3 files changed, 103 insertions(+), 20 deletions(-) diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 093ccc1..382aeab 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -955,6 +955,7 @@ struct ptlrpc_body_v2 { OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \ OBD_CONNECT_PINGLESS |\ OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER | \ + OBD_CONNECT_MGS_NIDLIST | \ OBD_CONNECT_FLAGS2) #define MGS_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS | \ diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 5a1799f..88470b0 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -330,13 +330,16 @@ static int mgs_target_reg(struct tgt_session_info *tsi) { struct obd_device *obd = tsi->tsi_exp->exp_obd; struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); - struct mgs_target_info *mti, *rep_mti; + struct mgs_target_info *mti, *reply_mti, *request_mti; + struct mgs_target_nidlist *mtn = NULL; + struct ptlrpc_bulk_desc *desc = NULL; struct fs_db *b_fsdb = NULL; /* barrier fsdb */ struct fs_db *c_fsdb = NULL; /* config fsdb */ char barrier_name[20]; - size_t mti_len = 0; + size_t mti_buflen, mti_alloc = 0; int opc; int rc = 0; + bool nidlist; ENTRY; rc = lu_env_refill((struct lu_env *)tsi->tsi_env); @@ -345,16 +348,87 @@ static int mgs_target_reg(struct tgt_session_info *tsi) tgt_counter_incr(tsi->tsi_exp, LPROC_MGS_TARGET_REG); - mti = req_capsule_client_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO); - if (mti == NULL) { - DEBUG_REQ(D_HA, tgt_ses_req(tsi), "no mgs_send_param"); - RETURN(err_serious(-EFAULT)); + nidlist = exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_MGS_NIDLIST; + + request_mti = req_capsule_client_get(tsi->tsi_pill, + &RMF_MGS_TARGET_INFO); + if (!request_mti) { + DEBUG_REQ(D_HA, tgt_ses_req(tsi), "no mgs_target_info"); + RETURN(err_serious(-EPROTO)); + } + mti_buflen = req_capsule_get_size(tsi->tsi_pill, &RMF_MGS_TARGET_INFO, + RCL_CLIENT); + + /* Compatibility code for older targets, process mti as is */ + if (!nidlist) { + int limit; + + mti = request_mti; + /* sanity check for mti_nid_count */ + if (mti_buflen > sizeof(*mti)) + limit = (mti_buflen - sizeof(*mti)) / MTN_NIDSTR_SIZE; + else + limit = MTI_NIDS_MAX; + + if (mti->mti_nid_count > limit) { + CWARN("%s: bad NID count in mti: %d, req limit: %d\n", + mti->mti_svname, mti->mti_nid_count, limit); + mti->mti_nid_count = limit; + } + goto process; } + req_capsule_extend(tsi->tsi_pill, &RQF_MGS_TARGET_REG_NIDLIST); + if (!req_capsule_field_present(tsi->tsi_pill, &RMF_MGS_TARGET_NIDLIST, + RCL_CLIENT)) { + DEBUG_REQ(D_HA, tgt_ses_req(tsi), "no mgs_target_nidlist"); + RETURN(err_serious(-EPROTO)); + } + + /* new protocol with nidlist */ + mtn = req_capsule_client_get(tsi->tsi_pill, &RMF_MGS_TARGET_NIDLIST); + mti_alloc = sizeof(*mti) + NIDLIST_SIZE(mtn->mtn_nids); + OBD_ALLOC_LARGE(mti, mti_alloc); + if (!mti) + RETURN(err_serious(-ENOMEM)); + + if (mtn->mtn_flags & NIDLIST_IN_BULK) { + int pages; + size_t nidlist_size = NIDLIST_SIZE(mtn->mtn_nids); + + pages = DIV_ROUND_UP((sizeof(*mti) & ~PAGE_MASK) + + nidlist_size, PAGE_SIZE); + desc = ptlrpc_prep_bulk_exp(tsi->tsi_pill->rc_req, + pages, PTLRPC_BULK_OPS_COUNT, + PTLRPC_BULK_GET_SINK, + MGS_BULK_PORTAL, + &ptlrpc_bulk_kiov_nopin_ops); + if (!desc) + GOTO(out_mti_free, rc = err_serious(-ENOMEM)); + + desc->bd_frag_ops->add_iov_frag(desc, mti->mti_nidlist, + nidlist_size); + tsi->tsi_pill->rc_req->rq_bulk_write = 1; + rc = sptlrpc_svc_prep_bulk(tsi->tsi_pill->rc_req, desc); + if (rc != 0) + GOTO(out_free, rc = err_serious(rc)); + + rc = target_bulk_io(tsi->tsi_pill->rc_req->rq_export, desc); + if (rc < 0) + GOTO(out_free, rc = err_serious(rc)); + } else { + memcpy(mti->mti_nidlist, mtn->mtn_inline_list, + NIDLIST_SIZE(mtn->mtn_nids)); + } + *mti = *request_mti; + mti->mti_nid_count = mtn->mtn_nids; + mti->mti_flags |= LDD_F_LARGE_NID; + +process: + /* at this point all NIDs are in mti */ down_read(&mgs->mgs_barrier_rwsem); - if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data, - IMP_RECOV)) + if (OCD_HAS_FLAG(&tsi->tsi_exp->exp_connect_data, IMP_RECOV)) opc = mti->mti_flags & LDD_F_OPC_MASK; else opc = LDD_F_OPC_REG; @@ -527,29 +601,35 @@ out_norevoke: if (rc) mti->mti_flags |= LDD_F_ERROR; - /* send back the whole mti in the reply */ - if (target_supports_large_nid(mti)) { - size_t len = offsetof(struct mgs_target_info, mti_nidlist); + /* Compatibility code: + * if large mti was received, send back the same buffer size as that + * MGC expects, so avoid buffer size mismatch errors on MGC side + */ + if (mti_buflen > sizeof(*mti)) { int err; - mti_len = mti->mti_nid_count * LNET_NIDSTR_SIZE; err = req_capsule_server_grow(tsi->tsi_pill, - &RMF_MGS_TARGET_INFO, - len + mti_len); + &RMF_MGS_TARGET_INFO, mti_buflen); if (err < 0) - RETURN(err); + GOTO(out_fsdb, rc = err_serious(err)); } - rep_mti = req_capsule_server_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO); - *rep_mti = *mti; - if (target_supports_large_nid(mti)) - memcpy(rep_mti->mti_nidlist, mti->mti_nidlist, mti_len); + reply_mti = req_capsule_server_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO); + *reply_mti = *mti; /* Flush logs to disk */ dt_sync(tsi->tsi_env, mgs->mgs_bottom); + +out_fsdb: if (b_fsdb) mgs_put_fsdb(mgs, b_fsdb); if (c_fsdb) mgs_put_fsdb(mgs, c_fsdb); +out_free: + ptlrpc_free_bulk(desc); +out_mti_free: + if (mti_alloc) + OBD_FREE_LARGE(mti, mti_alloc); + RETURN(rc); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 6a84e11..483c392 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -306,7 +306,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) { ENTRY; - LASSERT(desc != NULL); + if (!desc) + return; + LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ LASSERT(desc->bd_refs == 0); /* network hands off */ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); -- 1.8.3.1