LU-9859 libcfs: Fix using smp_processor_id() in preemptible context

[fs/lustre-release.git] / libcfs / libcfs / libcfs_cpu.c
diff --git a/libcfs/libcfs/libcfs_cpu.c b/libcfs/libcfs/libcfs_cpu.c

index 6eb9591..aa2c6b4 100644 (file)
--- a/libcfs/libcfs/libcfs_cpu.c
+++ b/libcfs/libcfs/libcfs_cpu.c
@@ -32,12 +32,46 @@
  
  #include <linux/cpu.h>
  #include <linux/sched.h>
-#include <libcfs/libcfs_cpu.h>
  #include <libcfs/libcfs.h>
+#include <libcfs/libcfs_cpu.h>
+
+/** virtual processing unit */
+struct cfs_cpu_partition {
+       /* CPUs mask for this partition */
+       cpumask_var_t                   cpt_cpumask;
+       /* nodes mask for this partition */
+       nodemask_t                      *cpt_nodemask;
+       /* NUMA distance between CPTs */
+       unsigned int                    *cpt_distance;
+       /* spread rotor for NUMA allocator */
+       unsigned int                    cpt_spread_rotor;
+       /* NUMA node if cpt_nodemask is empty */
+       int                             cpt_node;
+};
+
+/** descriptor for CPU partitions */
+struct cfs_cpt_table {
+       /* spread rotor for NUMA allocator */
+       unsigned int                    ctb_spread_rotor;
+       /* maximum NUMA distance between all nodes in table */
+       unsigned int                    ctb_distance;
+       /* # of CPU partitions */
+       int                             ctb_nparts;
+       /* partitions tables */
+       struct cfs_cpu_partition        *ctb_parts;
+       /* shadow HW CPU to CPU partition ID */
+       int                             *ctb_cpu2cpt;
+       /* all cpus in this partition table */
+       cpumask_var_t                   ctb_cpumask;
+       /* shadow HW node to CPU partition ID */
+       int                             *ctb_node2cpt;
+       /* all nodes in this partition table */
+       nodemask_t                      *ctb_nodemask;
+};
  
  /** Global CPU partition table */
-struct cfs_cpt_table *cfs_cpt_table __read_mostly;
-EXPORT_SYMBOL(cfs_cpt_table);
+struct cfs_cpt_table *cfs_cpt_tab __read_mostly;
+EXPORT_SYMBOL(cfs_cpt_tab);
  
  /**
   * modparam for setting number of partitions
@@ -67,7 +101,6 @@ static char *cpu_pattern = "N";
  module_param(cpu_pattern, charp, 0444);
  MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
  
-#ifdef CONFIG_SMP
  struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
  {
         struct cfs_cpt_table *cptab;
@@ -79,31 +112,28 @@ struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
  
         cptab->ctb_nparts = ncpt;
  
-       LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
-       if (!cptab->ctb_cpumask)
+       if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS))
                 goto failed_alloc_cpumask;
  
         LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
         if (!cptab->ctb_nodemask)
                 goto failed_alloc_nodemask;
  
-       LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
-                    nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
+       CFS_ALLOC_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
         if (!cptab->ctb_cpu2cpt)
                 goto failed_alloc_cpu2cpt;
  
         memset(cptab->ctb_cpu2cpt, -1,
                nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
  
-       LIBCFS_ALLOC(cptab->ctb_node2cpt,
-                    nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
+       CFS_ALLOC_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
         if (!cptab->ctb_node2cpt)
                 goto failed_alloc_node2cpt;
  
         memset(cptab->ctb_node2cpt, -1,
                nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
  
-       LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
+       CFS_ALLOC_PTR_ARRAY(cptab->ctb_parts, ncpt);
         if (!cptab->ctb_parts)
                 goto failed_alloc_ctb_parts;
  
@@ -112,16 +142,14 @@ struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
         for (i = 0; i < ncpt; i++) {
                 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
  
-               LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
-               if (!part->cpt_cpumask)
+               if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS))
                         goto failed_setting_ctb_parts;
  
                 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
                 if (!part->cpt_nodemask)
                         goto failed_setting_ctb_parts;
  
-               LIBCFS_ALLOC(part->cpt_distance,
-                            cptab->ctb_nparts * sizeof(part->cpt_distance[0]));
+               CFS_ALLOC_PTR_ARRAY(part->cpt_distance, cptab->ctb_nparts);
                 if (!part->cpt_distance)
                         goto failed_setting_ctb_parts;
  
@@ -140,36 +168,30 @@ failed_setting_ctb_parts:
                                     sizeof(*part->cpt_nodemask));
                 }
  
-               if (part->cpt_cpumask)
-                       LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+               free_cpumask_var(part->cpt_cpumask);
  
                 if (part->cpt_distance) {
-                       LIBCFS_FREE(part->cpt_distance,
-                               cptab->ctb_nparts *
-                                       sizeof(part->cpt_distance[0]));
+                       CFS_FREE_PTR_ARRAY(part->cpt_distance,
+                                          cptab->ctb_nparts);
                 }
         }
  
-       if (cptab->ctb_parts) {
-               LIBCFS_FREE(cptab->ctb_parts,
-                           cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
-       }
+       if (cptab->ctb_parts)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_parts, cptab->ctb_nparts);
+
  failed_alloc_ctb_parts:
-       if (cptab->ctb_node2cpt) {
-               LIBCFS_FREE(cptab->ctb_node2cpt,
-                           nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
-       }
+       if (cptab->ctb_node2cpt)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
+
  failed_alloc_node2cpt:
-       if (cptab->ctb_cpu2cpt) {
-               LIBCFS_FREE(cptab->ctb_cpu2cpt,
-                           nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
-       }
+       if (cptab->ctb_cpu2cpt)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
+
  failed_alloc_cpu2cpt:
         if (cptab->ctb_nodemask)
                 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
  failed_alloc_nodemask:
-       if (cptab->ctb_cpumask)
-               LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+       free_cpumask_var(cptab->ctb_cpumask);
  failed_alloc_cpumask:
         LIBCFS_FREE(cptab, sizeof(*cptab));
         return NULL;
@@ -180,15 +202,11 @@ void cfs_cpt_table_free(struct cfs_cpt_table *cptab)
  {
         int i;
  
-       if (cptab->ctb_cpu2cpt) {
-               LIBCFS_FREE(cptab->ctb_cpu2cpt,
-                           nr_cpu_ids * sizeof(cptab->ctb_cpu2cpt[0]));
-       }
+       if (cptab->ctb_cpu2cpt)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_cpu2cpt, nr_cpu_ids);
  
-       if (cptab->ctb_node2cpt) {
-               LIBCFS_FREE(cptab->ctb_node2cpt,
-                           nr_node_ids * sizeof(cptab->ctb_node2cpt[0]));
-       }
+       if (cptab->ctb_node2cpt)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_node2cpt, nr_node_ids);
  
         for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
                 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
@@ -198,25 +216,19 @@ void cfs_cpt_table_free(struct cfs_cpt_table *cptab)
                                     sizeof(*part->cpt_nodemask));
                 }
  
-               if (part->cpt_cpumask)
-                       LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
+               free_cpumask_var(part->cpt_cpumask);
  
-               if (part->cpt_distance) {
-                       LIBCFS_FREE(part->cpt_distance,
-                               cptab->ctb_nparts *
-                                       sizeof(part->cpt_distance[0]));
-               }
+               if (part->cpt_distance)
+                       CFS_FREE_PTR_ARRAY(part->cpt_distance,
+                                          cptab->ctb_nparts);
         }
  
-       if (cptab->ctb_parts) {
-               LIBCFS_FREE(cptab->ctb_parts,
-                           cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
-       }
+       if (cptab->ctb_parts)
+               CFS_FREE_PTR_ARRAY(cptab->ctb_parts, cptab->ctb_nparts);
  
         if (cptab->ctb_nodemask)
                 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
-       if (cptab->ctb_cpumask)
-               LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
+       free_cpumask_var(cptab->ctb_cpumask);
  
         LIBCFS_FREE(cptab, sizeof(*cptab));
  }
@@ -325,12 +337,12 @@ int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
  }
  EXPORT_SYMBOL(cfs_cpt_online);
  
-cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
+cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
  {
         LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
  
         return cpt == CFS_CPT_ANY ?
-              cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
+              &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask;
  }
  EXPORT_SYMBOL(cfs_cpt_cpumask);
  
@@ -648,7 +660,7 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
  {
         nodemask_t *mask;
         int weight;
-       int rotor;
+       unsigned int rotor;
         int node = 0;
  
         /* convert CPU partition ID to HW node id */
@@ -761,8 +773,8 @@ EXPORT_SYMBOL(cfs_cpt_bind);
  static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
                                 cpumask_t *node_mask, int number)
  {
-       cpumask_t *socket_mask = NULL;
-       cpumask_t *core_mask = NULL;
+       cpumask_var_t socket_mask;
+       cpumask_var_t core_mask;
         int rc = 0;
         int cpu;
         int i;
@@ -784,13 +796,17 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
                 return 0;
         }
  
-       /* allocate scratch buffer */
-       LIBCFS_ALLOC(socket_mask, cpumask_size());
-       LIBCFS_ALLOC(core_mask, cpumask_size());
-       if (!socket_mask || !core_mask) {
+       /*
+        * Allocate scratch buffers
+        * As we cannot initialize a cpumask_var_t, we need
+        * to alloc both before we can risk trying to free either
+        */
+       if (!zalloc_cpumask_var(&socket_mask, GFP_NOFS))
+               rc = -ENOMEM;
+       if (!zalloc_cpumask_var(&core_mask, GFP_NOFS))
                 rc = -ENOMEM;
+       if (rc)
                 goto out;
-       }
  
         while (!cpumask_empty(node_mask)) {
                 cpu = cpumask_first(node_mask);
@@ -823,20 +839,22 @@ static int cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
         }
  
  out:
-       if (core_mask)
-               LIBCFS_FREE(core_mask, cpumask_size());
-       if (socket_mask)
-               LIBCFS_FREE(socket_mask, cpumask_size());
+       free_cpumask_var(socket_mask);
+       free_cpumask_var(core_mask);
         return rc;
  }
  
-#define CPT_WEIGHT_MIN 4
+#define CPT_WEIGHT_MIN 4u
  
-static int cfs_cpt_num_estimate(void)
+static unsigned int cfs_cpt_num_estimate(void)
  {
-       int nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
-       int ncpu = num_online_cpus();
-       int ncpt = 1;
+       unsigned int nthr;
+       unsigned int ncpu = num_online_cpus();
+       unsigned int ncpt = 1;
+
+       preempt_disable();
+       nthr = cpumask_weight(topology_sibling_cpumask(smp_processor_id()));
+       preempt_enable();
  
         if (ncpu > CPT_WEIGHT_MIN)
                 for (ncpt = 2; ncpu > 2 * nthr * ncpt; ncpt++)
@@ -846,7 +864,7 @@ static int cfs_cpt_num_estimate(void)
         /* config many CPU partitions on 32-bit system could consume
          * too much memory
          */
-       ncpt = min(2, ncpt);
+       ncpt = min(2U, ncpt);
  #endif
         while (ncpu % ncpt)
                 ncpt--; /* worst case is 1 */
@@ -857,7 +875,7 @@ static int cfs_cpt_num_estimate(void)
  static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
  {
         struct cfs_cpt_table *cptab = NULL;
-       cpumask_t *node_mask = NULL;
+       cpumask_var_t node_mask;
         int cpt = 0;
         int node;
         int num;
@@ -868,7 +886,14 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
         if (ncpt <= 0)
                 ncpt = num;
  
-       if (ncpt > num_online_cpus() || ncpt > 4 * num) {
+       if (ncpt > num_online_cpus()) {
+               rc = -EINVAL;
+               CERROR("libcfs: CPU partition count %d > cores %d: rc = %d\n",
+                      ncpt, num_online_cpus(), rc);
+               goto failed;
+       }
+
+       if (ncpt > 4 * num) {
                 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
                       ncpt, num);
         }
@@ -880,8 +905,7 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
                 goto failed;
         }
  
-       LIBCFS_ALLOC(node_mask, cpumask_size());
-       if (!node_mask) {
+       if (!zalloc_cpumask_var(&node_mask, GFP_NOFS)) {
                 CERROR("Failed to allocate scratch cpumask\n");
                 rc = -ENOMEM;
                 goto failed;
@@ -897,7 +921,7 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
                         int ncpu = cpumask_weight(part->cpt_cpumask);
  
                         rc = cfs_cpt_choose_ncpus(cptab, cpt, node_mask,
-                                                 num - ncpu);
+                                                 (rem > 0) + num - ncpu);
                         if (rc < 0) {
                                 rc = -EINVAL;
                                 goto failed_mask;
@@ -911,13 +935,12 @@ static struct cfs_cpt_table *cfs_cpt_table_create(int ncpt)
                 }
         }
  
-       LIBCFS_FREE(node_mask, cpumask_size());
+       free_cpumask_var(node_mask);
  
         return cptab;
  
  failed_mask:
-       if (node_mask)
-               LIBCFS_FREE(node_mask, cpumask_size());
+       free_cpumask_var(node_mask);
  failed:
         CERROR("Failed (rc = %d) to setup CPU partition table with %d partitions, online HW NUMA nodes: %d, HW CPU cores: %d.\n",
                rc, ncpt, num_online_nodes(), num_online_cpus());
@@ -1160,8 +1183,8 @@ static struct notifier_block cfs_cpu_notifier = {
  
  void cfs_cpu_fini(void)
  {
-       if (!IS_ERR_OR_NULL(cfs_cpt_table))
-               cfs_cpt_table_free(cfs_cpt_table);
+       if (!IS_ERR_OR_NULL(cfs_cpt_tab))
+               cfs_cpt_table_free(cfs_cpt_tab);
  
  #ifdef CONFIG_HOTPLUG_CPU
  #ifdef HAVE_HOTPLUG_STATE_MACHINE
@@ -1178,7 +1201,7 @@ int cfs_cpu_init(void)
  {
         int ret;
  
-       LASSERT(!cfs_cpt_table);
+       LASSERT(!cfs_cpt_tab);
  
  #ifdef CONFIG_HOTPLUG_CPU
  #ifdef HAVE_HOTPLUG_STATE_MACHINE
@@ -1202,20 +1225,20 @@ int cfs_cpu_init(void)
  
         get_online_cpus();
         if (*cpu_pattern) {
-               cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
-               if (IS_ERR(cfs_cpt_table)) {
+               cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern);
+               if (IS_ERR(cfs_cpt_tab)) {
                         CERROR("Failed to create cptab from pattern '%s'\n",
                                cpu_pattern);
-                       ret = PTR_ERR(cfs_cpt_table);
+                       ret = PTR_ERR(cfs_cpt_tab);
                         goto failed_alloc_table;
                 }
  
         } else {
-               cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
-               if (IS_ERR(cfs_cpt_table)) {
+               cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions);
+               if (IS_ERR(cfs_cpt_tab)) {
                         CERROR("Failed to create cptab with npartitions %d\n",
                                cpu_npartitions);
-                       ret = PTR_ERR(cfs_cpt_table);
+                       ret = PTR_ERR(cfs_cpt_tab);
                         goto failed_alloc_table;
                 }
         }
@@ -1224,14 +1247,14 @@ int cfs_cpu_init(void)
  
         LCONSOLE(0, "HW NUMA nodes: %d, HW CPU cores: %d, npartitions: %d\n",
                  num_online_nodes(), num_online_cpus(),
-                cfs_cpt_number(cfs_cpt_table));
+                cfs_cpt_number(cfs_cpt_tab));
         return 0;
  
  failed_alloc_table:
         put_online_cpus();
  
-       if (cfs_cpt_table)
-               cfs_cpt_table_free(cfs_cpt_table);
+       if (!IS_ERR_OR_NULL(cfs_cpt_tab))
+               cfs_cpt_table_free(cfs_cpt_tab);
  
  #ifdef CONFIG_HOTPLUG_CPU
  #ifdef HAVE_HOTPLUG_STATE_MACHINE
@@ -1246,68 +1269,3 @@ failed_cpu_dead:
  #endif /* CONFIG_HOTPLUG_CPU */
         return ret;
  }
-
-#else /* ! CONFIG_SMP */
-
-struct cfs_cpt_table *cfs_cpt_table_alloc(int ncpt)
-{
-       struct cfs_cpt_table *cptab;
-
-       if (ncpt != 1) {
-               CERROR("Can't support cpu partition number %d\n", ncpt);
-               return NULL;
-       }
-
-       LIBCFS_ALLOC(cptab, sizeof(*cptab));
-       if (!cptab)
-               return NULL;
-
-       cpumask_set_cpu(0, cptab->ctb_cpumask);
-       node_set(0, cptab->ctb_nodemask);
-
-       return cptab;
-}
-EXPORT_SYMBOL(cfs_cpt_table_alloc);
-
-int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-       int rc;
-
-       rc = snprintf(buf, len, "0\t: 0\n");
-       len -= rc;
-       if (len <= 0)
-               return -EFBIG;
-
-       return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_table_print);
-
-int cfs_cpt_distance_print(struct cfs_cpt_table *cptab, char *buf, int len)
-{
-       int rc;
-
-       rc = snprintf(buf, len, "0\t: 0:1\n");
-       len -= rc;
-       if (len <= 0)
-               return -EFBIG;
-
-       return rc;
-}
-EXPORT_SYMBOL(cfs_cpt_distance_print);
-
-void cfs_cpu_fini(void)
-{
-       if (cfs_cpt_table) {
-               cfs_cpt_table_free(cfs_cpt_table);
-               cfs_cpt_table = NULL;
-       }
-}
-
-int cfs_cpu_init(void)
-{
-       cfs_cpt_table = cfs_cpt_table_alloc(1);
-
-       return cfs_cpt_table ? 0 : -1;
-}
-
-#endif /* !CONFIG_SMP */