Whamcloud - gitweb
git://git.whamcloud.com
/
fs
/
lustre-release.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
LU-6245 libcfs: remove types abstraction from libcfs/LNet code
[fs/lustre-release.git]
/
lnet
/
klnds
/
gnilnd
/
gnilnd_conn.c
diff --git
a/lnet/klnds/gnilnd/gnilnd_conn.c
b/lnet/klnds/gnilnd/gnilnd_conn.c
index
948cc1c
..
066fe1e
100644
(file)
--- a/
lnet/klnds/gnilnd/gnilnd_conn.c
+++ b/
lnet/klnds/gnilnd/gnilnd_conn.c
@@
-38,11
+38,15
@@
kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk)
{
gni_return_t rrc;
__u32 flags = GNI_MEM_READWRITE;
+ static unsigned long reg_to;
+ int rfto = *kgnilnd_tunables.kgn_reg_fail_timeout;
if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
flags |= GNI_MEM_PHYS_CONT;
}
+ fma_blk->gnm_hold_timeout = 0;
+
/* make sure we are mapping a clean block */
LASSERTF(fma_blk->gnm_hndl.qword1 == 0UL, "fma_blk %p dirty\n", fma_blk);
@@
-50,14
+54,25
@@
kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk)
fma_blk->gnm_blk_size, device->gnd_rcv_fma_cqh,
flags, &fma_blk->gnm_hndl);
if (rrc != GNI_RC_SUCCESS) {
- /* XXX Nic: need a way to silence this for runtime stuff that is ok to fail
- * -- like when under MDD or GART pressure on big systems
- */
+ if (rfto != GNILND_REGFAILTO_DISABLE) {
+ if (reg_to == 0) {
+ reg_to = jiffies + cfs_time_seconds(rfto);
+ } else if (time_after(jiffies, reg_to)) {
+ CERROR("FATAL:fmablk registration has failed "
+ "for %ld seconds.\n",
+ cfs_duration_sec(jiffies - reg_to) +
+ rfto);
+ LBUG();
+ }
+ }
+
CNETERR("register fmablk failed 0x%p mbox_size %d flags %u\n",
fma_blk, fma_blk->gnm_mbox_size, flags);
RETURN(-ENOMEM);
}
+ reg_to = 0;
+
/* PHYS_CONT memory isn't really mapped, at least not in GART -
* but all mappings chew up a MDD
*/
@@
-81,6
+96,19
@@
kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
gni_smsg_attr_t smsg_attr;
unsigned long fmablk_vers;
+#if defined(CONFIG_CRAY_XT) && !defined(CONFIG_CRAY_COMPUTE)
+ /* We allocate large blocks of memory here potentially leading
+ * to memory exhaustion during massive reconnects during a network
+ * outage. Limit the amount of fma blocks to use by always keeping
+ * a percent of pages free initially set to 25% of total memory. */
+ if (global_page_state(NR_FREE_PAGES) < kgnilnd_data.free_pages_limit) {
+ LCONSOLE_INFO("Exceeding free page limit of %ld. "
+ "Free pages available %ld\n",
+ kgnilnd_data.free_pages_limit,
+ global_page_state(NR_FREE_PAGES));
+ return -ENOMEM;
+ }
+#endif
/* we'll use fmablk_vers and the gnd_fmablk_mutex to gate access
* to this allocation code. Everyone will sample the version
* before and after getting the mutex. If it has changed,
@@
-151,7
+179,7
@@
kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
num_mbox, fma_blk->gnm_blk_size, fma_blk->gnm_mbox_size,
*kgnilnd_tunables.kgn_mbox_per_block);
-
LIBCFS_ALLOC(fma_blk->gnm_block,
fma_blk->gnm_blk_size);
+
fma_blk->gnm_block = kgnilnd_vzalloc(
fma_blk->gnm_blk_size);
if (fma_blk->gnm_block == NULL) {
CNETERR("could not allocate virtual SMSG mailbox memory, %d bytes\n", fma_blk->gnm_blk_size);
rc = -ENOMEM;
@@
-189,7
+217,7
@@
kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
fma_blk->gnm_avail_mboxs = fma_blk->gnm_num_mboxs = num_mbox;
CDEBUG(D_MALLOC, "alloc fmablk 0x%p num %d msg_maxsize %d credits %d "
- "mbox_size %d MDD
"LPX64"."LPX64"
\n",
+ "mbox_size %d MDD
%#llx.%#llx
\n",
fma_blk, num_mbox, smsg_attr.msg_maxsize, smsg_attr.mbox_maxcredit,
fma_blk->gnm_mbox_size, fma_blk->gnm_hndl.qword1,
fma_blk->gnm_hndl.qword2);
@@
-232,8
+260,11
@@
kgnilnd_unmap_fmablk(kgn_device_t *dev, kgn_fma_memblock_t *fma_blk)
gni_return_t rrc;
/* if some held, set hold_timeout from conn timeouts used in this block
- * but not during shutdown, then just nuke and pave */
- if (fma_blk->gnm_held_mboxs && (!kgnilnd_data.kgn_shutdown)) {
+ * but not during shutdown, then just nuke and pave
+ * During a stack reset, we need to deregister with a hold timeout
+ * set so we don't use the same mdd after reset is complete */
+ if ((fma_blk->gnm_held_mboxs && !kgnilnd_data.kgn_shutdown) ||
+ kgnilnd_data.kgn_in_reset) {
fma_blk->gnm_hold_timeout = GNILND_TIMEOUT2DEADMAN;
}
@@
-255,7
+286,9
@@
kgnilnd_unmap_fmablk(kgn_device_t *dev, kgn_fma_memblock_t *fma_blk)
"tried to double unmap or something bad, fma_blk %p (rrc %d)\n",
fma_blk, rrc);
- if (fma_blk->gnm_hold_timeout) {
+ if (fma_blk->gnm_hold_timeout &&
+ !(kgnilnd_data.kgn_in_reset &&
+ fma_blk->gnm_state == GNILND_FMABLK_PHYS)) {
atomic_inc(&dev->gnd_n_mdd_held);
} else {
atomic_dec(&dev->gnd_n_mdd);
@@
-382,7
+415,7
@@
kgnilnd_find_free_mbox(kgn_conn_t *conn)
CDEBUG(D_NET, "conn %p smsg %p fmablk %p "
"allocating SMSG mbox %d buf %p "
- "offset %u hndl
"LPX64"."LPX64"
\n",
+ "offset %u hndl
%#llx.%#llx
\n",
conn, smsg_attr, fma_blk, id,
smsg_attr->msg_buffer, smsg_attr->mbox_offset,
fma_blk->gnm_hndl.qword1,
@@
-472,14
+505,14
@@
kgnilnd_release_mbox(kgn_conn_t *conn, int purgatory_hold)
* > 0 - hold it for now */
if (purgatory_hold == 0) {
CDEBUG(D_NET, "conn %p smsg %p fmablk %p freeing SMSG mbox %d "
- "hndl
"LPX64"."LPX64"
\n",
+ "hndl
%#llx.%#llx
\n",
conn, smsg_attr, fma_blk, id,
fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
fma_blk->gnm_avail_mboxs++;
} else if (purgatory_hold > 0) {
CDEBUG(D_NET, "conn %p smsg %p fmablk %p holding SMSG mbox %d "
- "hndl
"LPX64"."LPX64"
\n",
+ "hndl
%#llx.%#llx
\n",
conn, smsg_attr, fma_blk, id,
fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
@@
-488,7
+521,7
@@
kgnilnd_release_mbox(kgn_conn_t *conn, int purgatory_hold)
conn->gnc_timeout);
} else {
CDEBUG(D_NET, "conn %p smsg %p fmablk %p release SMSG mbox %d "
- "hndl
"LPX64"."LPX64"
\n",
+ "hndl
%#llx.%#llx
\n",
conn, smsg_attr, fma_blk, id,
fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
@@
-908,7
+941,7
@@
kgnilnd_unpack_connreq(kgn_dgram_t *dgram)
}
if (connreq->gncr_peerstamp == 0 || connreq->gncr_connstamp == 0) {
- CERROR("Recived bad timestamps peer
"LPU64" conn "LPU64"
\n",
+ CERROR("Recived bad timestamps peer
%llu conn %llu
\n",
connreq->gncr_peerstamp, connreq->gncr_connstamp);
return -EPROTO;
}
@@
-1406,13
+1439,13
@@
kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
RETURN(0);
}
- CDEBUG(D_NET, "ready
"LPX64"
on device 0x%p\n",
+ CDEBUG(D_NET, "ready
%#llx
on device 0x%p\n",
readyid, dev);
dgram = (kgn_dgram_t *)readyid;
LASSERTF(dgram->gndg_magic == GNILND_DGRAM_MAGIC,
- "dgram 0x%p from id
"LPX64"
with bad magic %x\n",
+ "dgram 0x%p from id
%#llx
with bad magic %x\n",
dgram, readyid, dgram->gndg_magic);
LASSERTF(dgram->gndg_state == GNILND_DGRAM_POSTED ||
@@
-1446,7
+1479,7
@@
kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
spin_unlock(&dev->gnd_dgram_lock);
LASSERTF(grc != GNI_RC_NO_MATCH, "kgni lied! probe_by_id told us that"
- " id
"LPU64"
was ready\n", readyid);
+ " id
%llu
was ready\n", readyid);
CDEBUG(D_NET, "grc %d dgram 0x%p type %s post_state %d "
"remote_addr %u remote_id %u\n", grc, dgram,
@@
-1662,7
+1695,7
@@
kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev)
if (grc != GNI_RC_SUCCESS)
continue;
- CDEBUG(D_NET, "ready
"LPX64"
on device %d->0x%p\n",
+ CDEBUG(D_NET, "ready
%#llx
on device %d->0x%p\n",
readyid, dev->gnd_id, dev);
rc = kgnilnd_probe_for_dgram(dev, &dgram);
@@
-1817,8
+1850,8
@@
kgnilnd_finish_connect(kgn_dgram_t *dgram)
}
if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
- CNETERR("Received connection request from
%s that RCA thinks is"
-
" down.\n",
libcfs_nid2str(her_nid));
+ CNETERR("Received connection request from
down nid %s\n",
+ libcfs_nid2str(her_nid));
peer->gnp_down = GNILND_RCA_NODE_UP;
}
@@
-2170,7
+2203,7
@@
inform_peer:
/* now that we are outside the lock, tell Mommy */
if (peer != NULL) {
- kgnilnd_peer_notify(peer, rc);
+ kgnilnd_peer_notify(peer, rc
, 0
);
kgnilnd_peer_decref(peer);
}
}
@@
-2493,8
+2526,9
@@
kgnilnd_dgram_mover(void *arg)
/* last second chance for others to poke us */
did_something += xchg(&dev->gnd_dgram_ready, GNILND_DGRAM_IDLE);
- /* check flag variables before comittingi even if we did something;
- * if we are after the deadline call schedule */
+ /* check flag variables before committing even if we
+ * did something; if we are after the deadline call
+ * schedule */
if ((!did_something || time_after(jiffies, deadline)) &&
!kgnilnd_data.kgn_shutdown &&
!kgnilnd_data.kgn_quiesce_trigger) {