From: Mr NeilBrown Date: Mon, 22 Jun 2020 03:57:02 +0000 (+1000) Subject: LU-12678 lnet: clarify initialization of lpni_refcount X-Git-Tag: 2.13.56~127 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=5bcb17cf1d476982e3f2b2b4f43d7ff673bf9e1d LU-12678 lnet: clarify initialization of lpni_refcount This refcount is not explicitly initialized, so is implicitly initialized to zero. This prohibits the use lnet_peer_ni_addref_locked() for taking the first reference, so a couple of places open-code the atomic_inc just in case. There is code in lnet_peer_add_nid() which drops a reference before accessing the structure. This isn't actually wrong, but it looks weird. lnet_destroy_peer_ni_locked() makes assumptions about the content of the structure, so it cannot be used on a partially initialized structure. All these special cases make the code harder to understand. This patch cleans this up: - lpni_refcount is now initialized to one, so the called for lnet_peer_ni_alloc() now owns a reference and must be sure to release it. - lnet_peer_attach_peer_ni() now consumes a reference to the lpni. A pointer returned by lnet_peer_ni_alloc() is most often passed to lnet_peer_attach_peer_ni() so these to changes largely cancel each other out - not completely - The two 'atomic_inc' calls are changed to 'lnet_peer_ni_addref_locked(). - A LIBCFS_FREE() is replaced by lnet_peer_ni_decref_locked(), and that function is improved to cope with lpni_hashlist being empty, or ->lpni_net being NULL. - lnet_peer_add_nid() now holds a reference on the lpni until it don't need it any more, then explicity drops it. This should make no functional change, but should make the code a little less confusing. Signed-off-by: Mr NeilBrown Change-Id: Iec312e637d1e7b6eb14f2c363843403dd5cf8e8f Reviewed-on: https://review.whamcloud.com/39120 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 20f78fb..dedeac8 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -167,6 +167,7 @@ lnet_peer_ni_alloc(lnet_nid_t nid) INIT_LIST_HEAD(&lpni->lpni_recovery); INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list); LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh); + atomic_set(&lpni->lpni_refcount, 1); spin_lock_init(&lpni->lpni_lock); @@ -194,7 +195,7 @@ lnet_peer_ni_alloc(lnet_nid_t nid) * list so it can be easily found and revisited. */ /* FIXME: per-net implementation instead? */ - atomic_inc(&lpni->lpni_refcount); + lnet_peer_ni_addref_locked(lpni); list_add_tail(&lpni->lpni_on_remote_peer_ni_list, &the_lnet.ln_remote_peer_ni_list); } @@ -1236,9 +1237,9 @@ lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id) * may be attached to a different peer, in which case it will be * properly detached first. The whole operation is done atomically. * - * Always returns 0. This is the last function called from functions - * that do return an int, so returning 0 here allows the compiler to - * do a tail call. + * This function consumes the reference on lpni and Always returns 0. + * This is the last function called from functions that do return an + * int, so returning 0 here allows the compiler to do a tail call. */ static int lnet_peer_attach_peer_ni(struct lnet_peer *lp, @@ -1257,8 +1258,7 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]); ptable->pt_version++; - /* This is the 1st refcount on lpni. */ - atomic_inc(&lpni->lpni_refcount); + lnet_peer_ni_addref_locked(lpni); } /* Detach the peer_ni from an existing peer, if necessary. */ @@ -1306,11 +1306,12 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, spin_unlock(&lp->lp_lock); lp->lp_nnis++; - lnet_net_unlock(LNET_LOCK_EX); CDEBUG(D_NET, "peer %s NID %s flags %#x\n", libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(lpni->lpni_nid), flags); + lnet_peer_ni_decref_locked(lpni); + lnet_net_unlock(LNET_LOCK_EX); return 0; } @@ -1431,17 +1432,16 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) * it is not connected to this peer and was configured * by DLC. */ - lnet_peer_ni_decref_locked(lpni); if (lpni->lpni_peer_net->lpn_peer == lp) - goto out; + goto out_free_lpni; if (lnet_peer_ni_is_configured(lpni)) { rc = -EEXIST; - goto out; + goto out_free_lpni; } /* If this is the primary NID, destroy the peer. */ if (lnet_peer_ni_is_primary(lpni)) { struct lnet_peer *rtr_lp = - lpni->lpni_peer_net->lpn_peer; + lpni->lpni_peer_net->lpn_peer; int rtr_refcount = rtr_lp->lp_rtr_refcount; /* * if we're trying to delete a router it means @@ -1453,17 +1453,18 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) lnet_rtr_transfer_to_peer(rtr_lp, lp); } lnet_peer_del(lpni->lpni_peer_net->lpn_peer); + lnet_peer_ni_decref_locked(lpni); lpni = lnet_peer_ni_alloc(nid); if (!lpni) { rc = -ENOMEM; - goto out; + goto out_free_lpni; } } } else { lpni = lnet_peer_ni_alloc(nid); if (!lpni) { rc = -ENOMEM; - goto out; + goto out_free_lpni; } } @@ -1486,9 +1487,7 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags); out_free_lpni: - /* If the peer_ni was allocated above its peer_net pointer is NULL */ - if (!lpni->lpni_peer_net) - LIBCFS_FREE(lpni, sizeof(*lpni)); + lnet_peer_ni_decref_locked(lpni); out: CDEBUG(D_NET, "peer %s NID %s flags %#x: %d\n", libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), @@ -1711,19 +1710,22 @@ lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni) lpni->lpni_peer_net = NULL; lpni->lpni_net = NULL; - /* remove the peer ni from the zombie list */ - ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; - spin_lock(&ptable->pt_zombie_lock); - list_del_init(&lpni->lpni_hashlist); - ptable->pt_zombies--; - spin_unlock(&ptable->pt_zombie_lock); + if (!list_empty(&lpni->lpni_hashlist)) { + /* remove the peer ni from the zombie list */ + ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; + spin_lock(&ptable->pt_zombie_lock); + list_del_init(&lpni->lpni_hashlist); + ptable->pt_zombies--; + spin_unlock(&ptable->pt_zombie_lock); + } if (lpni->lpni_pref_nnids > 1) CFS_FREE_PTR_ARRAY(lpni->lpni_pref.nids, lpni->lpni_pref_nnids); LIBCFS_FREE(lpni, sizeof(*lpni)); - lnet_peer_net_decref_locked(lpn); + if (lpn) + lnet_peer_net_decref_locked(lpn); } struct lnet_peer_ni *