LU-11454 ptlrpc: Make CPU binding switchable

author Patrick Farrell <paf@cray.com>

Thu, 4 Oct 2018 12:10:11 +0000 (07:10 -0500)

committer Oleg Drokin <green@whamcloud.com>

Fri, 12 Oct 2018 23:50:27 +0000 (23:50 +0000)
author Patrick Farrell <paf@cray.com>
Thu, 4 Oct 2018 12:10:11 +0000 (07:10 -0500)
committer Oleg Drokin <green@whamcloud.com>
Fri, 12 Oct 2018 23:50:27 +0000 (23:50 +0000)
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index 3274334..a8ae492 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -1715,14 +1715,16 @@ struct ptlrpc_service {
          int                             srv_watchdog_factor;
          /** under unregister_service */
          unsigned                        srv_is_stopping:1;
+       /** Whether or not to restrict service threads to CPUs in this CPT */
+       unsigned                        srv_cpt_bind:1;
  
         /** max # request buffers */
         int                             srv_nrqbds_max;
         /** max # request buffers in history per partition */
         int                             srv_hist_nrqbds_cpt_max;
-       /** number of CPTs this service bound on */
+       /** number of CPTs this service associated with */
         int                             srv_ncpts;
-       /** CPTs array this service bound on */
+       /** CPTs array this service associated with */
         __u32                           *srv_cpts;
         /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */
         int                             srv_cpt_bits;
@@ -2247,8 +2249,8 @@ struct ptlrpc_service_thr_conf {
         /* user specified threads number, it will be validated due to
          * other members of this structure. */
         unsigned int                    tc_nthrs_user;
-       /* set NUMA node affinity for service threads */
-       unsigned int                    tc_cpu_affinity;
+       /* bind service threads to only CPUs in their associated CPT */
+       unsigned int                    tc_cpu_bind;
         /* Tags for lu_context associated with service thread */
         __u32                           tc_ctx_tags;
  };
@@ -2257,6 +2259,8 @@ struct ptlrpc_service_cpt_conf {
         struct cfs_cpt_table            *cc_cptable;
         /* string pattern to describe CPTs for a service */
         char                            *cc_pattern;
+       /* whether or not to have per-CPT service partitions */
+       bool                            cc_affinity;
  };
  
  struct ptlrpc_service_conf {
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 6edef77..a062e5c 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -49,6 +49,11 @@ static int ldlm_num_threads;
  module_param(ldlm_num_threads, int, 0444);
  MODULE_PARM_DESC(ldlm_num_threads, "number of DLM service threads to start");
  
+static unsigned int ldlm_cpu_bind = 1;
+module_param(ldlm_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(ldlm_cpu_bind,
+                "bind DLM service threads to particular CPU partitions");
+
  static char *ldlm_cpts;
  module_param(ldlm_cpts, charp, 0444);
  MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on");
@@ -2954,11 +2959,12 @@ static int ldlm_setup(void)
                         .tc_nthrs_base          = LDLM_NTHRS_BASE,
                         .tc_nthrs_max           = LDLM_NTHRS_MAX,
                         .tc_nthrs_user          = ldlm_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = ldlm_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = ldlm_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = ldlm_callback_handler,
@@ -2995,13 +3001,14 @@ static int ldlm_setup(void)
                         .tc_nthrs_base          = LDLM_NTHRS_BASE,
                         .tc_nthrs_max           = LDLM_NTHRS_MAX,
                         .tc_nthrs_user          = ldlm_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = ldlm_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD | \
                                                   LCT_DT_THREAD | \
                                                   LCT_CL_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = ldlm_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = ldlm_cancel_handler,
diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c

index 99db6e5..43d8e72 100644 (file)
--- a/lustre/mdt/mdt_mds.c
+++ b/lustre/mdt/mdt_mds.c
@@ -75,9 +75,20 @@ static unsigned long mds_num_threads;
  module_param(mds_num_threads, ulong, 0444);
  MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start");
  
+static unsigned int mds_cpu_bind = 1;
+module_param(mds_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_cpu_bind,
+                "bind MDS threads to particular CPU partitions");
+
  int mds_max_io_threads = 512;
  module_param(mds_max_io_threads, int, 0444);
-MODULE_PARM_DESC(mds_max_io_threads, "maximum number of MDS IO service threads");
+MODULE_PARM_DESC(mds_max_io_threads,
+                "maximum number of MDS IO service threads");
+
+static unsigned int mds_io_cpu_bind = 1;
+module_param(mds_io_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_io_cpu_bind,
+                "bind MDS IO threads to particular CPU partitions");
  
  static char *mds_io_num_cpts;
  module_param(mds_io_num_cpts, charp, 0444);
@@ -95,6 +106,11 @@ module_param(mds_rdpg_num_threads, ulong, 0444);
  MODULE_PARM_DESC(mds_rdpg_num_threads,
                  "number of MDS readpage service threads to start");
  
+static unsigned int mds_rdpg_cpu_bind = 1;
+module_param(mds_rdpg_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_rdpg_cpu_bind,
+                "bind MDS readpage threads to particular CPU partitions");
+
  static char *mds_rdpg_num_cpts;
  module_param(mds_rdpg_num_cpts, charp, 0444);
  MODULE_PARM_DESC(mds_rdpg_num_cpts,
@@ -106,6 +122,11 @@ module_param(mds_attr_num_threads, ulong, 0444);
  MODULE_PARM_DESC(mds_attr_num_threads,
                  "number of MDS setattr service threads to start");
  
+static unsigned int mds_attr_cpu_bind = 1;
+module_param(mds_attr_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_attr_cpu_bind,
+                "bind MDS setattr threads to particular CPU partitions");
+
  static char *mds_attr_num_cpts;
  module_param(mds_attr_num_cpts, charp, 0444);
  MODULE_PARM_DESC(mds_attr_num_cpts,
@@ -190,11 +211,12 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                         .tc_nthrs_base          = MDS_NTHRS_BASE,
                         .tc_nthrs_max           = MDS_NTHRS_MAX,
                         .tc_nthrs_user          = mds_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = mds_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = mds_num_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -235,11 +257,12 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                         .tc_nthrs_base          = MDS_RDPG_NTHRS_BASE,
                         .tc_nthrs_max           = MDS_RDPG_NTHRS_MAX,
                         .tc_nthrs_user          = mds_rdpg_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = mds_rdpg_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = mds_rdpg_num_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -282,11 +305,12 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                         .tc_nthrs_base          = MDS_SETA_NTHRS_BASE,
                         .tc_nthrs_max           = MDS_SETA_NTHRS_MAX,
                         .tc_nthrs_user          = mds_attr_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = mds_attr_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = mds_attr_num_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -327,12 +351,13 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                         .tc_nthrs_base          = MDS_NTHRS_BASE,
                         .tc_nthrs_max           = MDS_NTHRS_MAX,
                         .tc_nthrs_user          = mds_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = mds_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD |
                                                   LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = mds_num_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -503,13 +528,14 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                         .tc_nthrs_base          = OSS_NTHRS_BASE,
                         .tc_nthrs_max           = mds_max_io_threads,
                         .tc_nthrs_user          = mds_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = mds_io_cpu_bind,
                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_cptable             = mdt_io_cptable,
                         .cc_pattern             = mdt_io_cptable == NULL ?
                                                   mds_io_num_cpts : NULL,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_thr_init            = tgt_io_thread_init,
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index c866b7e..ab72698 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -51,10 +51,20 @@ static int oss_num_threads;
  module_param(oss_num_threads, int, 0444);
  MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
  
+static unsigned int oss_cpu_bind = 1;
+module_param(oss_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(oss_cpu_bind,
+                "bind OSS service threads to particular CPU partitions");
+
  static int oss_num_create_threads;
  module_param(oss_num_create_threads, int, 0444);
  MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
  
+static unsigned int oss_create_cpu_bind = 1;
+module_param(oss_create_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(oss_create_cpu_bind,
+                "bind OSS create threads to particular CPU partitions");
+
  static char *oss_cpts;
  module_param(oss_cpts, charp, 0444);
  MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
@@ -100,11 +110,12 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                         .tc_nthrs_base          = OSS_NTHRS_BASE,
                         .tc_nthrs_max           = oss_max_threads,
                         .tc_nthrs_user          = oss_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = oss_cpu_bind,
                         .tc_ctx_tags            = LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = oss_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -140,11 +151,12 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
                         .tc_nthrs_user          = oss_num_create_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = oss_create_cpu_bind,
                         .tc_ctx_tags            = LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = oss_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -204,13 +216,14 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                         .tc_nthrs_base          = OSS_NTHRS_BASE,
                         .tc_nthrs_max           = oss_max_threads,
                         .tc_nthrs_user          = oss_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = oss_cpu_bind,
                         .tc_ctx_tags            = LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_cptable             = ost_io_cptable,
                         .cc_pattern             = ost_io_cptable == NULL ?
                                                   oss_io_cpts : NULL,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_thr_init            = tgt_io_thread_init,
@@ -249,12 +262,13 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
                         .tc_nthrs_user          = oss_num_create_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = oss_create_cpu_bind,
                         .tc_ctx_tags            = LCT_DT_THREAD,
                 },
  
                 .psc_cpt                = {
-                       .cc_pattern          = oss_cpts,
+                       .cc_pattern             = oss_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
@@ -296,12 +310,13 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
                         .tc_nthrs_user          = oss_num_create_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = oss_create_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD |
                                                   LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = oss_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = tgt_request_handle,
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 67568ff..3308d3c 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -710,7 +710,13 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
         if (cptable == NULL)
                 cptable = cfs_cpt_table;
  
-       if (!conf->psc_thr.tc_cpu_affinity) {
+       if (conf->psc_thr.tc_cpu_bind > 1) {
+               CERROR("%s: Invalid cpu bind value %d, only 1 or 0 allowed\n",
+                      conf->psc_name, conf->psc_thr.tc_cpu_bind);
+               RETURN(ERR_PTR(-EINVAL));
+       }
+
+       if (!cconf->cc_affinity) {
                 ncpts = 1;
         } else {
                 ncpts = cfs_cpt_number(cptable);
@@ -749,6 +755,7 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
         service->srv_cptable            = cptable;
         service->srv_cpts               = cpts;
         service->srv_ncpts              = ncpts;
+       service->srv_cpt_bind           = conf->psc_thr.tc_cpu_bind;
  
         service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
         while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
@@ -784,7 +791,7 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
         service->srv_ops                = conf->psc_ops;
  
         for (i = 0; i < ncpts; i++) {
-               if (!conf->psc_thr.tc_cpu_affinity)
+               if (!cconf->cc_affinity)
                         cpt = CFS_CPT_ANY;
                 else
                         cpt = cpts != NULL ? cpts[i] : i;
@@ -2497,13 +2504,12 @@ static int ptlrpc_main(void *arg)
         thread->t_pid = current_pid();
         unshare_fs_struct();
  
-       /* NB: we will call cfs_cpt_bind() for all threads, because we
-        * might want to run lustre server only on a subset of system CPUs,
-        * in that case ->scp_cpt is CFS_CPT_ANY */
-       rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
-       if (rc != 0) {
-               CWARN("%s: failed to bind %s on CPT %d\n",
-                     svc->srv_name, thread->t_name, svcpt->scp_cpt);
+       if (svc->srv_cpt_bind) {
+               rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
+               if (rc != 0) {
+                       CWARN("%s: failed to bind %s on CPT %d\n",
+                             svc->srv_name, thread->t_name, svcpt->scp_cpt);
+               }
         }
  
         ginfo = groups_alloc(0);
author	Patrick Farrell <paf@cray.com>
	Thu, 4 Oct 2018 12:10:11 +0000 (07:10 -0500)
committer	Oleg Drokin <green@whamcloud.com>
	Fri, 12 Oct 2018 23:50:27 +0000 (23:50 +0000)
lustre/include/lustre_net.h		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/mdt/mdt_mds.c		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history