Whamcloud - gitweb
LU-1144 ptlrpc: implement a NUMA aware ptlrpcd binding policy
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
index 69bd210..4f646a1 100644 (file)
@@ -28,9 +28,8 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -95,7 +94,7 @@ CFS_MODULE_PARM(ptlrpcd_bind_policy, "i", int, 0644,
 #endif
 static struct ptlrpcd *ptlrpcds;
 
-cfs_semaphore_t ptlrpcd_sem;
+cfs_mutex_t ptlrpcd_mutex;
 static int ptlrpcd_users = 0;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
@@ -381,8 +380,8 @@ static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
                                         if (rc > 0)
                                                 CDEBUG(D_RPCTRACE, "transfer %d"
                                                        " async RPCs [%d->%d]\n",
-                                                        rc, pc->pc_index,
-                                                        partner->pc_index);
+                                                        rc, partner->pc_index,
+                                                        pc->pc_index);
                                 }
                                 ptlrpc_reqset_put(ps);
                         } while (rc == 0 && pc->pc_cursor != first);
@@ -414,7 +413,7 @@ static int ptlrpcd(void *arg)
                 int index = pc->pc_index;
 
                 if (index >= 0 && index < cfs_num_possible_cpus()) {
-                        while (!cfs_cpu_online(index)) {
+                        while (!cpu_online(index)) {
                                 if (++index >= cfs_num_possible_cpus())
                                         index = 0;
                         }
@@ -524,6 +523,11 @@ static int ptlrpcd_bind(int index, int max)
 {
         struct ptlrpcd_ctl *pc;
         int rc = 0;
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+        struct ptlrpcd_ctl *ppc;
+        int node, i, pidx;
+        cpumask_t mask;
+#endif
         ENTRY;
 
         LASSERT(index <= max - 1);
@@ -541,8 +545,17 @@ static int ptlrpcd_bind(int index, int max)
                 pc->pc_npartners = 1;
                 break;
         case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+                node = cpu_to_node(index);
+                mask = node_to_cpumask(node);
+                for (i = max; i < cfs_num_online_cpus(); i++)
+                        cpu_clear(i, mask);
+                pc->pc_npartners = cpus_weight(mask) - 1;
+                cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+#else
                 LASSERT(max >= 3);
                 pc->pc_npartners = 2;
+#endif
                 break;
         default:
                 CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
@@ -556,12 +569,10 @@ static int ptlrpcd_bind(int index, int max)
                         pc->pc_npartners = 0;
                         rc = -ENOMEM;
                 } else {
-                        if (index & 0x1)
-                                cfs_set_bit(LIOD_BIND, &pc->pc_flags);
-
                         switch (ptlrpcd_bind_policy) {
                         case PDB_POLICY_PAIR:
                                 if (index & 0x1) {
+                                        cfs_set_bit(LIOD_BIND, &pc->pc_flags);
                                         pc->pc_partners[0] = &ptlrpcds->
                                                 pd_threads[index - 1];
                                         ptlrpcds->pd_threads[index - 1].
@@ -569,6 +580,25 @@ static int ptlrpcd_bind(int index, int max)
                                 }
                                 break;
                         case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+                                /* partners are cores in the same NUMA node.
+                                 * setup partnership only with ptlrpcd threads
+                                 * that are already initialized
+                                 */
+                                for (pidx = 0, i = 0; i < index; i++) {
+                                        if (cpu_isset(i, mask)) {
+                                                ppc = &ptlrpcds->pd_threads[i];
+                                                pc->pc_partners[pidx++] = ppc;
+                                                ppc->pc_partners[ppc->
+                                                          pc_npartners++] = pc;
+                                        }
+                                }
+                                /* adjust number of partners to the number
+                                 * of partnership really setup */
+                                pc->pc_npartners = pidx;
+#else
+                                if (index & 0x1)
+                                        cfs_set_bit(LIOD_BIND, &pc->pc_flags);
                                 if (index > 0) {
                                         pc->pc_partners[0] = &ptlrpcds->
                                                 pd_threads[index - 1];
@@ -581,6 +611,7 @@ static int ptlrpcd_bind(int index, int max)
                                                 pc_partners[0] = pc;
                                         }
                                 }
+#endif
                                 break;
                         }
                 }
@@ -840,18 +871,18 @@ int ptlrpcd_addref(void)
         int rc = 0;
         ENTRY;
 
-        cfs_mutex_down(&ptlrpcd_sem);
+        cfs_mutex_lock(&ptlrpcd_mutex);
         if (++ptlrpcd_users == 1)
                 rc = ptlrpcd_init();
-        cfs_mutex_up(&ptlrpcd_sem);
+        cfs_mutex_unlock(&ptlrpcd_mutex);
         RETURN(rc);
 }
 
 void ptlrpcd_decref(void)
 {
-        cfs_mutex_down(&ptlrpcd_sem);
+        cfs_mutex_lock(&ptlrpcd_mutex);
         if (--ptlrpcd_users == 0)
                 ptlrpcd_fini();
-        cfs_mutex_up(&ptlrpcd_sem);
+        cfs_mutex_unlock(&ptlrpcd_mutex);
 }
 /** @} ptlrpcd */