Whamcloud - gitweb
git://git.whamcloud.com
/
fs
/
lustre-release.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
LU-15509 lnet: Ping buffer ref leak in lnet_peer_data_present
[fs/lustre-release.git]
/
lnet
/
lnet
/
peer.c
diff --git
a/lnet/lnet/peer.c
b/lnet/lnet/peer.c
index
620d406
..
ae71e25
100644
(file)
--- a/
lnet/lnet/peer.c
+++ b/
lnet/lnet/peer.c
@@
-637,7
+637,7
@@
lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
{
struct lnet_peer_ni *lp;
struct lnet_peer_ni *tmp;
-
lnet_nid_t
gw_nid;
+
struct lnet_nid
gw_nid;
int i;
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
@@
-649,12
+649,10
@@
lnet_peer_table_del_rtrs_locked(struct lnet_net *net,
if (!lnet_isrouter(lp))
continue;
- /* FIXME handle large-addr nid */
- gw_nid = lnet_nid_to_nid4(
- &lp->lpni_peer_net->lpn_peer->lp_primary_nid);
+ gw_nid = lp->lpni_peer_net->lpn_peer->lp_primary_nid;
lnet_net_unlock(LNET_LOCK_EX);
- lnet_del_route(LNET_NET_ANY, gw_nid);
+ lnet_del_route(LNET_NET_ANY,
&
gw_nid);
lnet_net_lock(LNET_LOCK_EX);
}
}
@@
-1441,21
+1439,20
@@
unlock:
}
EXPORT_SYMBOL(LNetAddPeer);
-/* FIXME support large-addr nid */
-lnet_nid_t
-LNetPrimaryNID(lnet_nid_t nid)
+void LNetPrimaryNID(struct lnet_nid *nid)
{
struct lnet_peer *lp;
struct lnet_peer_ni *lpni;
-
lnet_nid_t primary_nid = nid
;
+
struct lnet_nid orig
;
int rc = 0;
int cpt;
- if (nid == LNET_NID_LO_0)
- return LNET_NID_LO_0;
+ if (!nid || nid_is_lo0(nid))
+ return;
+ orig = *nid;
cpt = lnet_net_lock_current();
- lpni = lnet_
nid2peerni_locked(nid, LNET_NID_ANY
, cpt);
+ lpni = lnet_
peerni_by_nid_locked(nid, NULL
, cpt);
if (IS_ERR(lpni)) {
rc = PTR_ERR(lpni);
goto out_unlock;
@@
-1482,7
+1479,7
@@
LNetPrimaryNID(lnet_nid_t nid)
* and lookup the lpni again
*/
lnet_peer_ni_decref_locked(lpni);
- lpni = lnet_
find_peer_ni
_locked(nid);
+ lpni = lnet_
peer_ni_find
_locked(nid);
if (!lpni) {
rc = -ENOENT;
goto out_unlock;
@@
-1497,15
+1494,14
@@
LNetPrimaryNID(lnet_nid_t nid)
if (lnet_is_discovery_disabled(lp))
break;
}
-
primary_nid = lnet_nid_to_nid4(&lp->lp_primary_nid)
;
+
*nid = lp->lp_primary_nid
;
out_decref:
lnet_peer_ni_decref_locked(lpni);
out_unlock:
lnet_net_unlock(cpt);
- CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nid2str(nid),
- libcfs_nid2str(primary_nid), rc);
- return primary_nid;
+ CDEBUG(D_NET, "NID %s primary NID %s rc %d\n", libcfs_nidstr(&orig),
+ libcfs_nidstr(nid), rc);
}
EXPORT_SYMBOL(LNetPrimaryNID);
@@
-1848,19
+1844,22
@@
out:
/*
* lpni creation initiated due to traffic either sending or receiving.
+ * Callers must hold ln_api_mutex
+ * Ref taken on lnet_peer_ni returned by this function
*/
-static
int
+static
struct lnet_peer_ni *
lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
+__must_hold(&the_lnet.ln_api_mutex)
{
- struct lnet_peer *lp;
- struct lnet_peer_net *lpn;
+ struct lnet_peer *lp
= NULL
;
+ struct lnet_peer_net *lpn
= NULL
;
struct lnet_peer_ni *lpni;
unsigned flags = 0;
int rc = 0;
if (LNET_NID_IS_ANY(nid)) {
rc = -EINVAL;
- goto out;
+ goto out
_err
;
}
/* lnet_net_lock is not needed here because ln_api_lock is held */
@@
-1872,7
+1871,6
@@
lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
* traffic, we just assume everything is ok and
* return.
*/
- lnet_peer_ni_decref_locked(lpni);
goto out;
}
@@
-1880,24
+1878,31
@@
lnet_peer_ni_traffic_add(struct lnet_nid *nid, struct lnet_nid *pref)
rc = -ENOMEM;
lp = lnet_peer_alloc(nid);
if (!lp)
- goto out;
+ goto out
_err
;
lpn = lnet_peer_net_alloc(LNET_NID_NET(nid));
if (!lpn)
- goto out_
free_lp
;
+ goto out_
err
;
lpni = lnet_peer_ni_alloc(nid);
if (!lpni)
- goto out_
free_lpn
;
+ goto out_
err
;
lnet_peer_ni_set_non_mr_pref_nid(lpni, pref);
- return lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
+ /* lnet_peer_attach_peer_ni() always returns 0 */
+ rc = lnet_peer_attach_peer_ni(lp, lpn, lpni, flags);
-out_free_lpn:
- LIBCFS_FREE(lpn, sizeof(*lpn));
-out_free_lp:
- LIBCFS_FREE(lp, sizeof(*lp));
+ lnet_peer_ni_addref_locked(lpni);
+
+out_err:
+ if (rc) {
+ if (lpn)
+ LIBCFS_FREE(lpn, sizeof(*lpn));
+ if (lp)
+ LIBCFS_FREE(lp, sizeof(*lp));
+ lpni = ERR_PTR(rc);
+ }
out:
CDEBUG(D_NET, "peer %s: %d\n", libcfs_nidstr(nid), rc);
- return
rc
;
+ return
lpni
;
}
/*
@@
-2066,10
+2071,10
@@
lnet_destroy_peer_ni_locked(struct kref *ref)
}
struct lnet_peer_ni *
-lnet_nid2peerni_ex(struct lnet_nid *nid, int cpt)
+lnet_nid2peerni_ex(struct lnet_nid *nid)
+__must_hold(&the_lnet.ln_api_mutex)
{
struct lnet_peer_ni *lpni = NULL;
- int rc;
if (the_lnet.ln_state != LNET_STATE_RUNNING)
return ERR_PTR(-ESHUTDOWN);
@@
-2082,19
+2087,11
@@
lnet_nid2peerni_ex(struct lnet_nid *nid, int cpt)
if (lpni)
return lpni;
- lnet_net_unlock(
cpt
);
+ lnet_net_unlock(
LNET_LOCK_EX
);
- rc = lnet_peer_ni_traffic_add(nid, NULL);
- if (rc) {
- lpni = ERR_PTR(rc);
- goto out_net_relock;
- }
+ lpni = lnet_peer_ni_traffic_add(nid, NULL);
- lpni = lnet_peer_ni_find_locked(nid);
- LASSERT(lpni);
-
-out_net_relock:
- lnet_net_lock(cpt);
+ lnet_net_lock(LNET_LOCK_EX);
return lpni;
}
@@
-2108,7
+2105,6
@@
lnet_peerni_by_nid_locked(struct lnet_nid *nid,
struct lnet_nid *pref, int cpt)
{
struct lnet_peer_ni *lpni = NULL;
- int rc;
if (the_lnet.ln_state != LNET_STATE_RUNNING)
return ERR_PTR(-ESHUTDOWN);
@@
-2136,30
+2132,18
@@
lnet_peerni_by_nid_locked(struct lnet_nid *nid,
lnet_net_unlock(cpt);
mutex_lock(&the_lnet.ln_api_mutex);
/*
- *
Shutdown is only set
under the ln_api_lock, so a single
+ *
the_lnet.ln_state is only modified
under the ln_api_lock, so a single
* check here is sufficent.
*/
- if (the_lnet.ln_state != LNET_STATE_RUNNING) {
- lpni = ERR_PTR(-ESHUTDOWN);
- goto out_mutex_unlock;
- }
+ if (the_lnet.ln_state == LNET_STATE_RUNNING)
+ lpni = lnet_peer_ni_traffic_add(nid, pref);
- rc = lnet_peer_ni_traffic_add(nid, pref);
- if (rc) {
- lpni = ERR_PTR(rc);
- goto out_mutex_unlock;
- }
-
- lpni = lnet_peer_ni_find_locked(nid);
- LASSERT(lpni);
-
-out_mutex_unlock:
mutex_unlock(&the_lnet.ln_api_mutex);
lnet_net_lock(cpt);
/* Lock has been dropped, check again for shutdown. */
if (the_lnet.ln_state != LNET_STATE_RUNNING) {
- if (!IS_ERR(lpni))
+ if (!IS_ERR
_OR_NULL
(lpni))
lnet_peer_ni_decref_locked(lpni);
lpni = ERR_PTR(-ESHUTDOWN);
}
@@
-2595,6
+2579,8
@@
again:
break;
if (lnet_peer_is_uptodate(lp))
break;
+ if (lp->lp_state & LNET_PEER_MARK_DELETED)
+ break;
lnet_peer_queue_for_discovery(lp);
count++;
CDEBUG(D_NET, "Discovery attempt # %d\n", count);
@@
-2639,7
+2625,9
@@
again:
rc = lp->lp_dc_error;
else if (!block)
CDEBUG(D_NET, "non-blocking discovery\n");
- else if (!lnet_peer_is_uptodate(lp) && !lnet_is_discovery_disabled(lp))
+ else if (!lnet_peer_is_uptodate(lp) &&
+ !(lnet_is_discovery_disabled(lp) ||
+ (lp->lp_state & LNET_PEER_MARK_DELETED)))
goto again;
CDEBUG(D_NET, "peer %s NID %s: %d. %s\n",
@@
-3260,12
+3248,15
@@
__must_hold(&lp->lp_lock)
if (lp->lp_state & LNET_PEER_MARK_DELETED)
return 0;
- if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
- return -ESHUTDOWN;
-
spin_unlock(&lp->lp_lock);
mutex_lock(&the_lnet.ln_api_mutex);
+ if (the_lnet.ln_state != LNET_STATE_RUNNING ||
+ the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) {
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ spin_lock(&lp->lp_lock);
+ return -ESHUTDOWN;
+ }
lnet_net_lock(LNET_LOCK_EX);
/* remove the peer from the discovery work
@@
-3343,8
+3334,10
@@
__must_hold(&lp->lp_lock)
* down, and our reference count may be all that is keeping it
* alive. Don't do any work on it.
*/
- if (list_empty(&lp->lp_peer_list))
+ if (list_empty(&lp->lp_peer_list)) {
+ lnet_ping_buffer_decref(pbuf);
goto out;
+ }
flags = LNET_PEER_DISCOVERED;
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
@@
-3364,12
+3357,16
@@
__must_hold(&lp->lp_lock)
* primary NID to the correct value here. Moreover, this peer
* can show up with only the loopback NID in the ping buffer.
*/
- if (pbuf->pb_info.pi_nnis <= 1)
+ if (pbuf->pb_info.pi_nnis <= 1) {
+ lnet_ping_buffer_decref(pbuf);
goto out;
+ }
nid = pbuf->pb_info.pi_ni[1].ns_nid;
if (nid_is_lo0(&lp->lp_primary_nid)) {
rc = lnet_peer_set_primary_nid(lp, nid, flags);
- if (!rc)
+ if (rc)
+ lnet_ping_buffer_decref(pbuf);
+ else
rc = lnet_peer_merge_data(lp, pbuf);
/*
* if the primary nid of the peer is present in the ping info returned
@@
-3392,6
+3389,7
@@
__must_hold(&lp->lp_lock)
CERROR("Primary NID error %s versus %s: %d\n",
libcfs_nidstr(&lp->lp_primary_nid),
libcfs_nid2str(nid), rc);
+ lnet_ping_buffer_decref(pbuf);
} else {
rc = lnet_peer_merge_data(lp, pbuf);
}
@@
-3574,7
+3572,7
@@
static int lnet_peer_send_push(struct lnet_peer *lp)
__must_hold(&lp->lp_lock)
{
struct lnet_ping_buffer *pbuf;
- struct lnet_process
_
id id;
+ struct lnet_processid id;
struct lnet_md md;
int cpt;
int rc;
@@
-3621,13
+3619,13
@@
__must_hold(&lp->lp_lock)
lnet_peer_addref_locked(lp);
id.pid = LNET_PID_LUSTRE;
if (!LNET_NID_IS_ANY(&lp->lp_disc_dst_nid))
- id.nid = l
net_nid_to_nid4(&lp->lp_disc_dst_nid)
;
+ id.nid = l
p->lp_disc_dst_nid
;
else
- id.nid = l
net_nid_to_nid4(&lp->lp_primary_nid)
;
+ id.nid = l
p->lp_primary_nid
;
lnet_net_unlock(cpt);
- rc = LNetPut(
lnet_nid_to_nid4(&lp->lp_disc_src_nid)
, lp->lp_push_mdh,
- LNET_ACK_REQ, id, LNET_RESERVED_PORTAL,
+ rc = LNetPut(
&lp->lp_disc_src_nid
, lp->lp_push_mdh,
+ LNET_ACK_REQ,
&
id, LNET_RESERVED_PORTAL,
LNET_PROTO_PING_MATCHBITS, 0, 0);
/*
@@
-3954,8
+3952,10
@@
void lnet_peer_discovery_stop(void)
else
wake_up(&the_lnet.ln_dc_waitq);
+ mutex_unlock(&the_lnet.ln_api_mutex);
wait_event(the_lnet.ln_dc_waitq,
the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);
+ mutex_lock(&the_lnet.ln_api_mutex);
LASSERT(list_empty(&the_lnet.ln_dc_request));
LASSERT(list_empty(&the_lnet.ln_dc_working));