+/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
+ * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
+ * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
+ * CPU core. But binding all ptlrpcd threads maybe cause response delay
+ * because of some CPU core(s) busy with other loads.
+ *
+ * For example: "ls -l", some async RPCs for statahead are assigned to
+ * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
+ * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
+ * thread, statahead thread, and ptlrpcd thread can run in parallel), under
+ * such case, the statahead async RPCs can not be processed in time, it is
+ * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
+ * be better. But it breaks former data transfer policy.
+ *
+ * So we shouldn't be blind for avoiding the data transfer. We make some
+ * compromise: divide the ptlrpcd threds pool into two parts. One part is
+ * for bound mode, each ptlrpcd thread in this part is bound to some CPU
+ * core. The other part is for free mode, all the ptlrpcd threads in the
+ * part can be scheduled on any CPU core. We specify some partnership
+ * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
+ * and the async RPC load within the partners are shared.
+ *
+ * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
+ * thread can be scheduled in time), and try to guarantee the async RPC
+ * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
+ * on any CPU core).
+ *
+ * As for how to specify the partnership between bound mode ptlrpcd
+ * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
+ * <free bound> pair. In future, we can specify some more complex
+ * partnership based on the patches for CPU partition. But before such
+ * patches are available, we prefer to use the simplest one.
+ */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix ptlrpcd_bind() to use new CPU partition APIs"
+# endif
+static int ptlrpcd_bind(int index, int max)
+{
+ struct ptlrpcd_ctl *pc;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(index <= max - 1);
+ pc = &ptlrpcds->pd_threads[index];
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_NONE:
+ pc->pc_npartners = -1;
+ break;
+ case PDB_POLICY_FULL:
+ pc->pc_npartners = 0;
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+ break;
+ case PDB_POLICY_PAIR:
+ LASSERT(max % 2 == 0);
+ pc->pc_npartners = 1;
+ break;
+ case PDB_POLICY_NEIGHBOR:
+ LASSERT(max >= 3);
+ pc->pc_npartners = 2;
+ break;
+ default:
+ CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
+ rc = -EINVAL;
+ }
+
+ if (rc == 0 && pc->pc_npartners > 0) {
+ OBD_ALLOC(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ if (pc->pc_partners == NULL) {
+ pc->pc_npartners = 0;
+ rc = -ENOMEM;
+ } else {
+ if (index & 0x1)
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_PAIR:
+ if (index & 0x1) {
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[0] = pc;
+ }
+ break;
+ case PDB_POLICY_NEIGHBOR:
+ if (index > 0) {
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[1] = pc;
+ if (index == max - 1) {
+ pc->pc_partners[1] =
+ &ptlrpcds->pd_threads[0];
+ ptlrpcds->pd_threads[0].
+ pc_partners[0] = pc;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ RETURN(rc);
+}
+