/* do mds to ost setattr asynchronously */
if (!rqset) {
/* Do not wait for response. */
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
} else {
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_setattr_interpret;
sa->sa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
else
ptlrpc_set_add_req(rqset, req);
}
sa->sa_upcall = upcall;
sa->sa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
else
ptlrpc_set_add_req(rqset, req);
}
/* Do not wait for response */
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
RETURN(0);
}
page_count, aa, cli->cl_r_in_flight, cli->cl_w_in_flight);
req->rq_interpret_reply = brw_interpret;
- ptlrpcd_add_req(req, PSCOPE_BRW);
+
+ /* XXX: Maybe the caller can check the RPC bulk descriptor to see which
+ * CPU/NUMA node the majority of pages were allocated on, and try
+ * to assign the async RPC to the CPU core (PDL_POLICY_PREFERRED)
+ * to reduce cross-CPU memory traffic.
+ *
+ * But on the other hand, we expect that multiple ptlrpcd threads
+ * and the initial write sponsor can run in parallel, especially
+ * when data checksum is enabled, which is CPU-bound operation and
+ * single ptlrpcd thread cannot process in time. So more ptlrpcd
+ * threads sharing BRW load (with PDL_POLICY_ROUND) seems better.
+ */
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
RETURN(1);
}
req->rq_interpret_reply =
(ptlrpc_interpterer_t)osc_enqueue_interpret;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
else
ptlrpc_set_add_req(rqset, req);
} else if (intent) {
ptlrpc_set_add_req(set, req);
ptlrpc_check_set(NULL, set);
} else
- ptlrpcd_add_req(req, PSCOPE_OTHER);
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
RETURN(0);
}