From 2be10428ac22426c5868b699b6c0b80c040465dc Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Fri, 12 Oct 2018 11:30:34 -0700 Subject: [PATCH 1/1] LU-11514 lnet: separate ni state from recovery To make the code more readable we make the ni_state an enumerated type, and create a separate bit filed to track the recovery state. Both of these are protected by the lnet_ni_lock() Signed-off-by: Amir Shehata Change-Id: I5acfccecffd5dbb07c9ad3b1c7651cf291b85cb8 Reviewed-on: https://review.whamcloud.com/33361 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Sonia Sharma Reviewed-by: Doug Oucharek Reviewed-by: Olaf Weber Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-types.h | 24 ++++++++++++++++-------- lnet/lnet/api-ni.c | 8 +++----- lnet/lnet/config.c | 2 +- lnet/lnet/lib-move.c | 22 ++++++++++++---------- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 7433239..bd1df1e 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -325,12 +325,17 @@ enum lnet_net_state { LNET_NET_STATE_DELETING }; -#define LNET_NI_STATE_INIT (1 << 0) -#define LNET_NI_STATE_ACTIVE (1 << 1) -#define LNET_NI_STATE_FAILED (1 << 2) -#define LNET_NI_STATE_RECOVERY_PENDING (1 << 3) -#define LNET_NI_STATE_RECOVERY_FAILED (1 << 4) -#define LNET_NI_STATE_DELETING (1 << 5) +enum lnet_ni_state { + /* initial state when NI is created */ + LNET_NI_STATE_INIT = 0, + /* set when NI is brought up */ + LNET_NI_STATE_ACTIVE, + /* set when NI is being shutdown */ + LNET_NI_STATE_DELETING, +}; + +#define LNET_NI_RECOVERY_PENDING BIT(0) +#define LNET_NI_RECOVERY_FAILED BIT(1) enum lnet_stats_type { LNET_STATS_TYPE_SEND = 0, @@ -454,8 +459,11 @@ struct lnet_ni { /* my health status */ struct lnet_ni_status *ni_status; - /* NI FSM */ - __u32 ni_state; + /* NI FSM. Protected by lnet_ni_lock() */ + enum lnet_ni_state ni_state; + + /* Recovery state. Protected by lnet_ni_lock() */ + __u32 ni_recovery_state; /* per NI LND tunables */ struct lnet_lnd_tunables ni_lnd_tunables; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index b12ed66..1b3aa1a4 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1899,7 +1899,7 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net) list_del_init(&ni->ni_netlist); /* the ni should be in deleting state. If it's not it's * a bug */ - LASSERT(ni->ni_state & LNET_NI_STATE_DELETING); + LASSERT(ni->ni_state == LNET_NI_STATE_DELETING); cfs_percpt_for_each(ref, j, ni->ni_refs) { if (*ref == 0) continue; @@ -1948,8 +1948,7 @@ lnet_shutdown_lndni(struct lnet_ni *ni) lnet_net_lock(LNET_LOCK_EX); lnet_ni_lock(ni); - ni->ni_state |= LNET_NI_STATE_DELETING; - ni->ni_state &= ~LNET_NI_STATE_ACTIVE; + ni->ni_state = LNET_NI_STATE_DELETING; lnet_ni_unlock(ni); lnet_ni_unlink_locked(ni); lnet_incr_dlc_seq(); @@ -2087,8 +2086,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun) } lnet_ni_lock(ni); - ni->ni_state |= LNET_NI_STATE_ACTIVE; - ni->ni_state &= ~LNET_NI_STATE_INIT; + ni->ni_state = LNET_NI_STATE_ACTIVE; lnet_ni_unlock(ni); /* We keep a reference on the loopback net through the loopback NI */ diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 077e435..240a27d 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -482,7 +482,7 @@ lnet_ni_alloc_common(struct lnet_net *net, char *iface) ni->ni_net_ns = NULL; ni->ni_last_alive = ktime_get_real_seconds(); - ni->ni_state |= LNET_NI_STATE_INIT; + ni->ni_state = LNET_NI_STATE_INIT; list_add_tail(&ni->ni_netlist, &net->net_ni_added); /* diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 3e2624c..e217e95 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2890,7 +2890,8 @@ lnet_unlink_ni_recovery_mdh_locked(struct lnet_ni *ni, int cpt, bool force) LNetInvalidateMDHandle(&recovery_mdh); - if (ni->ni_state & LNET_NI_STATE_RECOVERY_PENDING || force) { + if (ni->ni_recovery_state & LNET_NI_RECOVERY_PENDING || + force) { recovery_mdh = ni->ni_ping_mdh; LNetInvalidateMDHandle(&ni->ni_ping_mdh); } @@ -2943,7 +2944,7 @@ lnet_recover_local_nis(void) lnet_net_lock(0); lnet_ni_lock(ni); - if (!(ni->ni_state & LNET_NI_STATE_ACTIVE) || + if (ni->ni_state != LNET_NI_STATE_ACTIVE || healthv == LNET_MAX_HEALTH_VALUE) { list_del_init(&ni->ni_recovery); lnet_unlink_ni_recovery_mdh_locked(ni, 0, false); @@ -2958,9 +2959,9 @@ lnet_recover_local_nis(void) * But we want to keep the local_ni on the recovery queue * so we can continue the attempts to recover it. */ - if (ni->ni_state & LNET_NI_STATE_RECOVERY_FAILED) { + if (ni->ni_recovery_state & LNET_NI_RECOVERY_FAILED) { lnet_unlink_ni_recovery_mdh_locked(ni, 0, true); - ni->ni_state &= ~LNET_NI_STATE_RECOVERY_FAILED; + ni->ni_recovery_state &= ~LNET_NI_RECOVERY_FAILED; } lnet_ni_unlock(ni); @@ -2971,8 +2972,8 @@ lnet_recover_local_nis(void) libcfs_nid2str(ni->ni_nid)); lnet_ni_lock(ni); - if (!(ni->ni_state & LNET_NI_STATE_RECOVERY_PENDING)) { - ni->ni_state |= LNET_NI_STATE_RECOVERY_PENDING; + if (!(ni->ni_recovery_state & LNET_NI_RECOVERY_PENDING)) { + ni->ni_recovery_state |= LNET_NI_RECOVERY_PENDING; lnet_ni_unlock(ni); LIBCFS_ALLOC(ev_info, sizeof(*ev_info)); @@ -2980,7 +2981,8 @@ lnet_recover_local_nis(void) CERROR("out of memory. Can't recover %s\n", libcfs_nid2str(ni->ni_nid)); lnet_ni_lock(ni); - ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING; + ni->ni_recovery_state &= + ~LNET_NI_RECOVERY_PENDING; lnet_ni_unlock(ni); continue; } @@ -3052,7 +3054,7 @@ lnet_recover_local_nis(void) lnet_ni_lock(ni); if (rc) - ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING; + ni->ni_recovery_state &= ~LNET_NI_RECOVERY_PENDING; } lnet_ni_unlock(ni); } @@ -3467,9 +3469,9 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info, return; } lnet_ni_lock(ni); - ni->ni_state &= ~LNET_NI_STATE_RECOVERY_PENDING; + ni->ni_recovery_state &= ~LNET_NI_RECOVERY_PENDING; if (status) - ni->ni_state |= LNET_NI_STATE_RECOVERY_FAILED; + ni->ni_recovery_state |= LNET_NI_RECOVERY_FAILED; lnet_ni_unlock(ni); lnet_net_unlock(0); -- 1.8.3.1