From 9d05024e3a1a8309835d19167448f05e904e2a3e Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Mon, 27 Feb 2023 15:41:19 -0800 Subject: [PATCH] LU-14668 lnet: add 'force' option to lnetctl peer del Add --force option to 'lnetctl peer del' command. If the peer has primary NID locked, this option allows for the peer to be deleted manually: lnetctl peer del --prim_nid --force Add --prim_lock option to 'lnetctl peer add' command. If specified, the primary NID of the peer is locked such that it is going to be the NID used to identify the peer in communications with Lustre layer. Lustre-change: https://review.whamcloud.com/50149 Lustre-commit: f1b2d8d60c593a670b36006bcf9b040549d8c13a Test-Parameters: trivial Signed-off-by: Serguei Smirnov Change-Id: Ia6001856cfbce7b0c3288cff9b244b569d259647 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50970 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger --- lnet/include/lnet/lib-lnet.h | 5 +++-- lnet/include/uapi/linux/lnet/lnet-dlc.h | 4 +++- lnet/lnet/api-ni.c | 6 ++++-- lnet/lnet/peer.c | 15 ++++++++++----- lnet/utils/lnetconfig/liblnetconfig.c | 24 ++++++++++++++---------- lnet/utils/lnetconfig/liblnetconfig.h | 5 +++-- lnet/utils/lnetctl.c | 29 +++++++++++++++++++++++++---- lustre/doc/lnetctl.8 | 6 ++++++ lustre/tests/sanity-lnet.sh | 18 ++++++++++++++++++ 9 files changed, 86 insertions(+), 26 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index f2bf44d..4cd4ccb 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -905,8 +905,9 @@ struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id); bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid); int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid); -int lnet_user_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr); -int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid); +int lnet_user_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr, + bool lock_prim); +int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, int force); int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk); int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, char alivness[LNET_MAX_STR_LEN], diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 72fb847..7ee051f 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -269,7 +269,9 @@ struct lnet_ioctl_peer_cfg { struct libcfs_ioctl_hdr prcfg_hdr; lnet_nid_t prcfg_prim_nid; lnet_nid_t prcfg_cfg_nid; - __u32 prcfg_count; + __u32 prcfg_count; /* ADD_PEER_NI: used for 'lock_prim' option + * DEL_PEER_NI: used for 'force' option + */ __u32 prcfg_mr; __u32 prcfg_state; __u32 prcfg_size; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 6431b43..4666323 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3928,7 +3928,8 @@ LNetCtl(unsigned int cmd, void *arg) mutex_lock(&the_lnet.ln_api_mutex); rc = lnet_user_add_peer_ni(cfg->prcfg_prim_nid, cfg->prcfg_cfg_nid, - cfg->prcfg_mr); + cfg->prcfg_mr, + cfg->prcfg_count == 1); mutex_unlock(&the_lnet.ln_api_mutex); return rc; } @@ -3941,7 +3942,8 @@ LNetCtl(unsigned int cmd, void *arg) mutex_lock(&the_lnet.ln_api_mutex); rc = lnet_del_peer_ni(cfg->prcfg_prim_nid, - cfg->prcfg_cfg_nid); + cfg->prcfg_cfg_nid, + cfg->prcfg_count); mutex_unlock(&the_lnet.ln_api_mutex); return rc; } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 30f3aa2..f4f46ae 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -1517,7 +1517,9 @@ lnet_peer_add(lnet_nid_t nid, unsigned flags) * was discovered manually, but is recreated via Lustre * with PRIMARY_lock */ - lnet_peer_del(lp); + rc = lnet_peer_del(lp); + if (rc) + goto out; } /* Create peer, peer_net, and peer_ni. */ @@ -1863,9 +1865,12 @@ __must_hold(&the_lnet.ln_api_mutex) return lnet_peer_add_nid(lp, nid, flags); } -int lnet_user_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr) +int lnet_user_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr, + bool lock_prim) { - return lnet_add_peer_ni(prim_nid, nid, mr, LNET_PEER_CONFIGURED); + int fl = LNET_PEER_CONFIGURED | (LNET_PEER_LOCK_PRIMARY * lock_prim); + + return lnet_add_peer_ni(prim_nid, nid, mr, fl); } static int @@ -1914,7 +1919,7 @@ lnet_reset_peer(struct lnet_peer *lp) * being modified/deleted by a different thread. */ int -lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid) +lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, int force) { struct lnet_peer *lp; struct lnet_peer_ni *lpni; @@ -1946,7 +1951,7 @@ lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid) lnet_net_unlock(LNET_LOCK_EX); if (nid == LNET_NID_ANY || nid == lp->lp_primary_nid) { - if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) { + if (!force && lp->lp_state & LNET_PEER_LOCK_PRIMARY) { CERROR("peer %s created by Lustre. Must preserve primary NID, but will remove other NIDs\n", libcfs_nid2str(lp->lp_primary_nid)); return lnet_reset_peer(lp); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 4ba97d4..dcdabdf 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -644,7 +644,7 @@ int lustre_lnet_discover_nid(char *ping_nids, int force, int seq_no, } static int lustre_lnet_handle_peer_nidlist(lnet_nid_t *nidlist, int num_nids, - bool is_mr, __u32 cmd, + bool is_mr, int option, __u32 cmd, char *cmd_type, char *err_str) { struct lnet_ioctl_peer_cfg data; @@ -659,6 +659,7 @@ static int lustre_lnet_handle_peer_nidlist(lnet_nid_t *nidlist, int num_nids, data.prcfg_mr = is_mr; data.prcfg_prim_nid = nidlist[0]; data.prcfg_cfg_nid = LNET_NID_ANY; + data.prcfg_count = option; rc = dispatch_peer_ni_cmd(cmd, &data, err_str, cmd_type); @@ -674,6 +675,7 @@ static int lustre_lnet_handle_peer_nidlist(lnet_nid_t *nidlist, int num_nids, data.prcfg_mr = is_mr; data.prcfg_prim_nid = nidlist[0]; data.prcfg_cfg_nid = nidlist[nid_idx]; + data.prcfg_count = option; rc = dispatch_peer_ni_cmd(cmd, &data, err_str, cmd_type); @@ -689,6 +691,7 @@ static int lustre_lnet_handle_peer_nidlist(lnet_nid_t *nidlist, int num_nids, LIBCFS_IOC_INIT_V2(data, prcfg_hdr); data.prcfg_prim_nid = nidlist[0]; data.prcfg_cfg_nid = LNET_NID_ANY; + data.prcfg_count = option; rc = dispatch_peer_ni_cmd(cmd, &data, err_str, cmd_type); } @@ -698,8 +701,8 @@ static int lustre_lnet_handle_peer_nidlist(lnet_nid_t *nidlist, int num_nids, static int lustre_lnet_mod_peer_nidlist(lnet_nid_t pnid, lnet_nid_t *lnet_nidlist, - int cmd, int num_nids, bool is_mr, int seq_no, - struct cYAML **err_rc) + int cmd, int num_nids, bool is_mr, int option, + int seq_no, struct cYAML **err_rc) { int rc = LUSTRE_CFG_RC_NO_ERR; char err_str[LNET_MAX_STR_LEN]; @@ -720,8 +723,8 @@ lustre_lnet_mod_peer_nidlist(lnet_nid_t pnid, lnet_nid_t *lnet_nidlist, (num_nids - 1)); rc = lustre_lnet_handle_peer_nidlist(lnet_nidlist2, - num_nids, is_mr, ioc_cmd, - cmd_str, err_str); + num_nids, is_mr, option, + ioc_cmd, cmd_str, err_str); out: if (lnet_nidlist2) free(lnet_nidlist2); @@ -748,8 +751,8 @@ replace_sep(char *str, char sep, char newsep) } } -int lustre_lnet_modify_peer(char *prim_nid, char *nids, bool is_mr, - int cmd, int seq_no, struct cYAML **err_rc) +int lustre_lnet_modify_peer(char *prim_nid, char *nids, bool is_mr, int cmd, + int option, int seq_no, struct cYAML **err_rc) { int num_nids, rc; char err_str[LNET_MAX_STR_LEN] = "Error"; @@ -788,7 +791,7 @@ int lustre_lnet_modify_peer(char *prim_nid, char *nids, bool is_mr, rc = lustre_lnet_mod_peer_nidlist(pnid, lnet_nidlist, cmd, num_nids, is_mr, - -1, err_rc); + option, -1, err_rc); out: if (rc != LUSTRE_CFG_RC_NO_ERR) @@ -4561,6 +4564,7 @@ static int handle_yaml_peer_common(struct cYAML *tree, struct cYAML **show_rc, struct cYAML *seq_no, *prim_nid, *mr, *peer_nis; lnet_nid_t lnet_nidlist[LNET_MAX_NIDS_PER_PEER]; lnet_nid_t pnid = LNET_NID_ANY; + int force = 0; seq_no = cYAML_get_object_item(tree, "seq_no"); seqn = seq_no ? seq_no->cy_valueint : -1; @@ -4627,8 +4631,8 @@ static int handle_yaml_peer_common(struct cYAML *tree, struct cYAML **show_rc, } rc = lustre_lnet_mod_peer_nidlist(pnid, lnet_nidlist, cmd, - num_nids, mr_value, seqn, - err_rc); + num_nids, mr_value, force, + seqn, err_rc); failed: if (nidstr) diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index 055b4d3..1cae395 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -581,11 +581,12 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, * nids - a comma separated string of nids * is_mr - Specifies whether this peer is MR capable. * cmd - CONFIG or DELETE + * force - whether force-deleting a peer with locked primary nid * seq_no - sequence number of the command * err_rc - YAML structure of the resultant return code */ -int lustre_lnet_modify_peer(char *prim_nid, char *nids, bool is_mr, - int cmd, int seq_no, struct cYAML **err_rc); +int lustre_lnet_modify_peer(char *prim_nid, char *nids, bool is_mr, int cmd, + int force, int seq_no, struct cYAML **err_rc); /* * lustre_lnet_show_peer diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 906d6e0..313b1d2 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -254,12 +254,14 @@ command_t peer_cmds[] = { "\t--prim_nid: Primary NID of the peer.\n" "\t--nid: one or more peer NIDs\n" "\t--non_mr: create this peer as not Multi-Rail capable\n" - "\t--ip2nets: specify a range of nids per peer"}, + "\t--ip2nets: specify a range of nids per peer\n" + "\t--lock_prim: lock primary nid\n"}, {"del", jt_del_peer_nid, 0, "delete a peer NID\n" "\t--prim_nid: Primary NID of the peer.\n" "\t--nid: list of NIDs to remove. If none provided,\n" "\t peer is deleted\n" - "\t--ip2nets: specify a range of nids per peer"}, + "\t--ip2nets: specify a range of nids per peer\n" + "\t--force: force-delete locked primary NID\n"}, {"show", jt_show_peer, 0, "show peer information\n" "\t--nid: NID of peer to filter on.\n" "\t--verbose: display detailed output per peer." @@ -2023,12 +2025,15 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) char *prim_nid = NULL, *nidstr = NULL; char err_str[LNET_MAX_STR_LEN] = "Error"; struct cYAML *err_rc = NULL; + int force_lock = 0; - const char *const short_opts = "k:mn:"; + const char *const short_opts = "k:m:n:f:l"; const struct option long_opts[] = { { .name = "prim_nid", .has_arg = required_argument, .val = 'k' }, { .name = "non_mr", .has_arg = no_argument, .val = 'm' }, { .name = "nid", .has_arg = required_argument, .val = 'n' }, + { .name = "force", .has_arg = no_argument, .val = 'f' }, + { .name = "lock_prim", .has_arg = no_argument, .val = 'l' }, { .name = NULL } }; rc = check_cmd(peer_cmds, "peer", "add", 2, argc, argv); @@ -2053,6 +2058,22 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) } is_mr = false; break; + case 'f': + if (cmd == LNETCTL_ADD_CMD) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, LNET_MAX_STR_LEN, + "Unrecognized option '-%c'", opt); + } + force_lock = 1; + break; + case 'l': + if (cmd == LNETCTL_DEL_CMD) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, LNET_MAX_STR_LEN, + "Unrecognized option '-%c'", opt); + } + force_lock = 1; + break; case '?': print_help(peer_cmds, "peer", cmd == LNETCTL_ADD_CMD ? "add" : "del"); @@ -2062,7 +2083,7 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) } rc = lustre_lnet_modify_peer(prim_nid, nidstr, is_mr, cmd, - -1, &err_rc); + force_lock, -1, &err_rc); if (rc != LUSTRE_CFG_RC_NO_ERR) goto out; diff --git a/lustre/doc/lnetctl.8 b/lustre/doc/lnetctl.8 index 204424f..0a97585 100644 --- a/lustre/doc/lnetctl.8 +++ b/lustre/doc/lnetctl.8 @@ -125,6 +125,9 @@ Configure an LNET peer with at least one supplied NID\. The primary NID must be \-\-non_mr: create this peer as not Multi-Rail capable\. . .br +\-\-lock_prim: lock primary NID of the peer for the purpose of identification with Lustre\. +. +.br .TP \fBlnetctl peer\fR del @@ -139,6 +142,9 @@ Delete a peer NID. The primary NID must be specified. If the removed NID is th \-\-prim_nid: Primary NID of the peer\. . .br +\-\-force: optional, use to delete a peer with primary NID locked\. +. +.br .TP \fBlnetctl peer\fR show diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 0807942..5fdabba 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -972,6 +972,24 @@ EOF } run_test 25 "Delete all secondary nids from peer (tcp, gni and o2ib)" +test_26() { + reinit_dlc || return $? + + do_lnetctl peer add --prim_nid 1.1.1.1@tcp --lock_prim || + error "Peer add with --lock_prim option failed $?" + do_lnetctl peer del --prim_nid 1.1.1.1@tcp || + error "Peer del failed $?" + $LNETCTL peer show --nid 1.1.1.1@tcp | grep -q 1.1.1.1@tcp || + error "1.1.1.1@tcp is not listed" + do_lnetctl peer del --prim_nid 1.1.1.1@tcp --force || + error "Peer del --force failed $?" + do_lnetctl peer show --nid 1.1.1.1@tcp && + error "failed to delete 1.1.1.1@tcp" + + return 0 +} +run_test 26 "Delete peer with primary nid locked" + test_99a() { reinit_dlc || return $? -- 1.8.3.1