void cfs_timer_disarm(cfs_timer_t *t);
int cfs_timer_is_armed(cfs_timer_t *t);
cfs_time_t cfs_timer_deadline(cfs_timer_t *t);
+
+/*
+ * Memory
+ */
+#ifndef cfs_memory_pressure_get
+#define cfs_memory_pressure_get() (0)
+#endif
+#ifndef cfs_memory_pressure_set
+#define cfs_memory_pressure_set() do {} while (0)
+#endif
+#ifndef cfs_memory_pressure_clr
+#define cfs_memory_pressure_clr() do {} while (0)
+#endif
+
+static inline int cfs_memory_pressure_get_and_set(void)
+{
+ int old = cfs_memory_pressure_get();
+
+ if (!old)
+ cfs_memory_pressure_set();
+ return old;
+}
+
+static inline void cfs_memory_pressure_restore(int old)
+{
+ if (old)
+ cfs_memory_pressure_set();
+ else
+ cfs_memory_pressure_clr();
+ return;
+}
#endif
#define __cfs_free_page(page) __cfs_free_pages(page, 0)
#define cfs_free_page(p) __free_pages(p, 0)
-#define libcfs_memory_pressure_get() (current->flags & PF_MEMALLOC)
-#define libcfs_memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
-#define libcfs_memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
+#define cfs_memory_pressure_get() (current->flags & PF_MEMALLOC)
+#define cfs_memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
+#define cfs_memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
#if BITS_PER_LONG == 32
/* limit to lowmem on 32-bit systems */
cfs_page_t *page;
struct cfs_trace_page *tage;
+ /* My caller is trying to free memory */
+ if (!cfs_in_interrupt() && cfs_memory_pressure_get())
+ return NULL;
+
/*
* Don't spam console with allocation failures: they will be reported
* by upper layer anyway.
lnet_process_id_t msg_target;
__u32 msg_type;
+ unsigned int msg_vmflush:1; /* VM trying to free memory */
unsigned int msg_target_is_router:1; /* sending to a router */
unsigned int msg_routing:1; /* being forwarded */
unsigned int msg_ack:1; /* ack on finalize (PUT) */
unsigned int payload_offset = lntmsg->msg_offset;
unsigned int payload_nob = lntmsg->msg_len;
kptl_net_t *net = ni->ni_data;
- kptl_peer_t *peer;
+ kptl_peer_t *peer = NULL;
+ int mpflag = 0;
kptl_tx_t *tx;
int nob;
int nfrag;
LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
LASSERT (!cfs_in_interrupt());
+ if (lntmsg->msg_vmflush)
+ mpflag = cfs_memory_pressure_get_and_set();
+
rc = kptllnd_find_target(net, target, &peer);
if (rc != 0)
- return rc;
-
+ goto out;
+
/* NB peer->peer_id does NOT always equal target, be careful with
* which one to use */
switch (type) {
kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov,
NULL, lntmsg->msg_md->md_iov.kiov,
0, lntmsg->msg_md->md_length);
-
+
tx->tx_lnet_msg = lntmsg;
tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr;
kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET,
payload_offset, payload_nob);
#endif
}
-
+
nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]);
kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, target, nob);
kptllnd_tx_launch(peer, tx, nfrag);
out:
- kptllnd_peer_decref(peer);
+ if (lntmsg->msg_vmflush)
+ cfs_memory_pressure_restore(mpflag);
+ if (peer)
+ kptllnd_peer_decref(peer);
return rc;
}
hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen;
hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
*kptllnd_tunables.kptl_max_msg_size;
-
+
new_peer->peer_state = PEER_STATE_WAITING_HELLO;
new_peer->peer_last_matchbits_seen = last_matchbits_seen;
-
+
kptllnd_peer_add_peertable_locked(new_peer);
cfs_write_unlock_irqrestore(g_lock, flags);
- /* NB someone else could get in now and post a message before I post
- * the HELLO, but post_tx/check_sends take care of that! */
+ /* NB someone else could get in now and post a message before I post
+ * the HELLO, but post_tx/check_sends take care of that! */
CDEBUG(D_NETTRACE, "%s: post initial hello %p\n",
libcfs_id2str(new_peer->peer_id), hello_tx);
kptllnd_post_tx(new_peer, hello_tx, 0);
kptllnd_peer_check_sends(new_peer);
-
+
*peerp = new_peer;
return 0;
-
+
unwind_2:
kptllnd_peer_unreserve_buffers();
unwind_1:
int
ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
{
+ int mpflag = 0;
int type = lntmsg->msg_type;
lnet_process_id_t target = lntmsg->msg_target;
unsigned int payload_niov = lntmsg->msg_niov;
desc_size = offsetof(ksock_tx_t,
tx_frags.paged.kiov[payload_niov]);
+ if (lntmsg->msg_vmflush)
+ mpflag = cfs_memory_pressure_get_and_set();
tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size);
if (tx == NULL) {
CERROR("Can't allocate tx desc type %d size %d\n",
type, desc_size);
+ if (lntmsg->msg_vmflush)
+ cfs_memory_pressure_restore(mpflag);
return (-ENOMEM);
}
/* The first fragment will be set later in pro_pack */
rc = ksocknal_launch_packet(ni, tx, target);
+ if (lntmsg->msg_vmflush)
+ cfs_memory_pressure_restore(mpflag);
if (rc == 0)
return (0);
if (lp->lp_txcredits < 0) {
msg->msg_delayed = 1;
- cfs_list_add_tail (&msg->msg_list, &lp->lp_txq);
+ cfs_list_add_tail(&msg->msg_list, &lp->lp_txq);
return EAGAIN;
}
}
if (ni->ni_txcredits < 0) {
msg->msg_delayed = 1;
- cfs_list_add_tail (&msg->msg_list, &ni->ni_txq);
+ cfs_list_add_tail(&msg->msg_list, &ni->ni_txq);
return EAGAIN;
}
}
libcfs_id2str(target));
return -ENOMEM;
}
+ msg->msg_vmflush = !!cfs_memory_pressure_get();
LNET_LOCK();
#define might_sleep_if(c)
#define smp_mb()
-#define libcfs_memory_pressure_get() (0)
-#define libcfs_memory_pressure_put() do {} while (0)
-#define libcfs_memory_pressure_clr() do {} while (0)
-
/* FIXME sys/capability will finally included linux/fs.h thus
* cause numerous trouble on x86-64. as temporary solution for
* build broken at Cray, we copy definition we need from capability.h
rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
rq_early:1, rq_must_unlink:1,
rq_fake:1, /* this fake req */
+ rq_memalloc:1, /* req originated from "kswapd" */
/* server-side flags */
rq_packed_final:1, /* packed final reply */
rq_hp:1, /* high priority RPC */
OSC_FLAGS);
/*
* bug 18881: we can't just break out here when
- * error occurrs after cl_page_prep has been
+ * error occurs after cl_page_prep has been
* called against the page. The correct
* way is to call page's completion routine,
* as in osc_oap_interrupted. For simplicity,
oap->oap_page_off = opg->ops_from;
oap->oap_count = opg->ops_to - opg->ops_from;
/* Give a hint to OST that requests are coming from kswapd - bug19529 */
- if (libcfs_memory_pressure_get())
+ if (cfs_memory_pressure_get())
oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
oap->oap_brw_flags |= OBD_BRW_SYNC;
if (osc_io_srvlock(oio))
if (cmd & OBD_BRW_WRITE) {
/* trigger a write rpc stream as long as there are dirtiers
* waiting for space. as they're waiting, they're not going to
- * create more pages to coallesce with what's waiting.. */
+ * create more pages to coalesce with what's waiting.. */
if (!cfs_list_empty(&cli->cl_cache_waiters)) {
CDEBUG(D_CACHE, "cache waiters forcing RPC\n");
RETURN(1);
enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
struct ldlm_lock *lock = NULL;
struct cl_req_attr crattr;
- int i, rc;
+ int i, rc, mpflag = 0;
ENTRY;
LASSERT(!cfs_list_empty(rpc_list));
+ if (cmd & OBD_BRW_MEMALLOC)
+ mpflag = cfs_memory_pressure_get_and_set();
+
memset(&crattr, 0, sizeof crattr);
OBD_ALLOC(pga, sizeof(*pga) * page_count);
if (pga == NULL)
GOTO(out, req = ERR_PTR(rc));
}
+ if (cmd & OBD_BRW_MEMALLOC)
+ req->rq_memalloc = 1;
+
/* Need to update the timestamps after the request is built in case
* we race with setattr (locally or in queue at OST). If OST gets
* later setattr before earlier BRW (as determined by the request xid),
CFS_INIT_LIST_HEAD(rpc_list);
aa->aa_clerq = clerq;
out:
+ if (cmd & OBD_BRW_MEMALLOC)
+ cfs_memory_pressure_restore(mpflag);
+
capa_put(crattr.cra_capa);
if (IS_ERR(req)) {
if (oa)
* \param cmd OBD_BRW_* macroses
* \param lop pending pages
*
- * \return zero if pages successfully add to send queue.
- * \return not zere if error occurring.
+ * \return zero if no page added to send queue.
+ * \return 1 if pages successfully added to send queue.
+ * \return negative on errors.
*/
static int
osc_send_oap_rpc(const struct lu_env *env, struct client_obd *cli,
CFS_LIST_HEAD(tmp_list);
unsigned int ending_offset;
unsigned starting_offset = 0;
- int srvlock = 0;
+ int srvlock = 0, mem_tight = 0;
struct cl_object *clob = NULL;
ENTRY;
* until completion unlocks it. commit_write submits a page
* as not ready because its unlock will happen unconditionally
* as the call returns. if we race with commit_write giving
- * us that page we dont' want to create a hole in the page
+ * us that page we don't want to create a hole in the page
* stream, so we stop and leave the rpc to be fired by
* another dirtier or kupdated interval (the not ready page
* will still be on the dirty list). we could call in
/* now put the page back in our accounting */
cfs_list_add_tail(&oap->oap_rpc_item, &rpc_list);
+ if (oap->oap_brw_flags & OBD_BRW_MEMALLOC)
+ mem_tight = 1;
if (page_count == 0)
srvlock = !!(oap->oap_brw_flags & OBD_BRW_SRVLOCK);
if (++page_count >= cli->cl_max_pages_per_rpc)
RETURN(0);
}
- req = osc_build_req(env, cli, &rpc_list, page_count, cmd);
+ req = osc_build_req(env, cli, &rpc_list, page_count,
+ mem_tight ? (cmd | OBD_BRW_MEMALLOC) : cmd);
if (IS_ERR(req)) {
LASSERT(cfs_list_empty(&rpc_list));
loi_list_maint(cli, loi);
race_counter++;
}
- /* attempt some inter-object balancing by issueing rpcs
+ /* attempt some inter-object balancing by issuing rpcs
* for each object in turn */
if (!cfs_list_empty(&loi->loi_hp_ready_item))
cfs_list_del_init(&loi->loi_hp_ready_item);
oap->oap_count = count;
oap->oap_brw_flags = brw_flags;
/* Give a hint to OST that requests are coming from kswapd - bug19529 */
- if (libcfs_memory_pressure_get())
+ if (cfs_memory_pressure_get())
oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
cfs_spin_lock(&oap->oap_lock);
oap->oap_async_flags = async_flags;
if ((body->oa.o_flags & OBD_BRW_MEMALLOC) &&
(exp->exp_connection->c_peer.nid == exp->exp_connection->c_self))
- libcfs_memory_pressure_set();
+ cfs_memory_pressure_set();
objcount = req_capsule_get_size(&req->rq_pill, &RMF_OBD_IOOBJ,
RCL_CLIENT) / sizeof(*ioo);
exp->exp_connection->c_remote_uuid.uuid,
libcfs_id2str(req->rq_peer));
}
- libcfs_memory_pressure_clr();
+ cfs_memory_pressure_clr();
RETURN(rc);
}
{
int rc;
int rc2;
+ int mpflag = 0;
struct ptlrpc_connection *connection;
lnet_handle_me_t reply_me_h;
lnet_md_t reply_md;
if (request->rq_resend)
lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+ if (request->rq_memalloc)
+ mpflag = cfs_memory_pressure_get_and_set();
+
rc = sptlrpc_cli_wrap_request(request);
if (rc)
- RETURN(rc);
+ GOTO(out, rc);
/* bulk register should be done after wrap_request() */
if (request->rq_bulk != NULL) {
rc = ptlrpc_register_bulk (request);
if (rc != 0)
- RETURN(rc);
+ GOTO(out, rc);
}
if (!noreply) {
request->rq_request_portal,
request->rq_xid, 0);
if (rc == 0)
- RETURN(rc);
+ GOTO(out, rc);
ptlrpc_req_finished(request);
if (noreply)
- RETURN(rc);
+ GOTO(out, rc);
cleanup_me:
/* MEUnlink is safe; the PUT didn't even get off the ground, and
/* We do sync unlink here as there was no real transfer here so
* the chance to have long unlink to sluggish net is smaller here. */
ptlrpc_unregister_bulk(request, 0);
+ out:
+ if (request->rq_memalloc)
+ cfs_memory_pressure_restore(mpflag);
return rc;
}