+/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
+ * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
+ * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
+ * CPU core. But binding all ptlrpcd threads maybe cause response delay
+ * because of some CPU core(s) busy with other loads.
+ *
+ * For example: "ls -l", some async RPCs for statahead are assigned to
+ * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
+ * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
+ * thread, statahead thread, and ptlrpcd thread can run in parallel), under
+ * such case, the statahead async RPCs can not be processed in time, it is
+ * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
+ * be better. But it breaks former data transfer policy.
+ *
+ * So we shouldn't be blind for avoiding the data transfer. We make some
+ * compromise: divide the ptlrpcd threds pool into two parts. One part is
+ * for bound mode, each ptlrpcd thread in this part is bound to some CPU
+ * core. The other part is for free mode, all the ptlrpcd threads in the
+ * part can be scheduled on any CPU core. We specify some partnership
+ * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
+ * and the async RPC load within the partners are shared.
+ *
+ * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
+ * thread can be scheduled in time), and try to guarantee the async RPC
+ * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
+ * on any CPU core).
+ *
+ * As for how to specify the partnership between bound mode ptlrpcd
+ * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
+ * <free bound> pair. In future, we can specify some more complex
+ * partnership based on the patches for CPU partition. But before such
+ * patches are available, we prefer to use the simplest one.
+ */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix ptlrpcd_bind() to use new CPU partition APIs"
+# endif
+static int ptlrpcd_bind(int index, int max)
+{
+ struct ptlrpcd_ctl *pc;
+ int rc = 0;
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+ struct ptlrpcd_ctl *ppc;
+ int node, i, pidx;
+ cpumask_t mask;
+#endif
+ ENTRY;
+
+ LASSERT(index <= max - 1);
+ pc = &ptlrpcds->pd_threads[index];
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_NONE:
+ pc->pc_npartners = -1;
+ break;
+ case PDB_POLICY_FULL:
+ pc->pc_npartners = 0;
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+ break;
+ case PDB_POLICY_PAIR:
+ LASSERT(max % 2 == 0);
+ pc->pc_npartners = 1;
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+ node = cpu_to_node(index);
+ mask = node_to_cpumask(node);
+ for (i = max; i < cfs_num_online_cpus(); i++)
+ cpu_clear(i, mask);
+ pc->pc_npartners = cpus_weight(mask) - 1;
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+#else
+ LASSERT(max >= 3);
+ pc->pc_npartners = 2;
+#endif
+ break;
+ default:
+ CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
+ rc = -EINVAL;
+ }
+
+ if (rc == 0 && pc->pc_npartners > 0) {
+ OBD_ALLOC(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ if (pc->pc_partners == NULL) {
+ pc->pc_npartners = 0;
+ rc = -ENOMEM;
+ } else {
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_PAIR:
+ if (index & 0x1) {
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[0] = pc;
+ }
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+ /* partners are cores in the same NUMA node.
+ * setup partnership only with ptlrpcd threads
+ * that are already initialized
+ */
+ for (pidx = 0, i = 0; i < index; i++) {
+ if (cpu_isset(i, mask)) {
+ ppc = &ptlrpcds->pd_threads[i];
+ pc->pc_partners[pidx++] = ppc;
+ ppc->pc_partners[ppc->
+ pc_npartners++] = pc;
+ }
+ }
+ /* adjust number of partners to the number
+ * of partnership really setup */
+ pc->pc_npartners = pidx;
+#else
+ if (index & 0x1)
+ cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+ if (index > 0) {
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[1] = pc;
+ if (index == max - 1) {
+ pc->pc_partners[1] =
+ &ptlrpcds->pd_threads[0];
+ ptlrpcds->pd_threads[0].
+ pc_partners[0] = pc;
+ }
+ }
+#endif
+ break;
+ }
+ }
+ }
+
+ RETURN(rc);
+}
+