* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2013, Intel Corporation.
+ * Copyright (c) 2013, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include "mdt_internal.h"
#include <lustre_quota.h>
#include <lustre_acl.h>
-#include <lustre_param.h>
+#include <uapi/linux/lustre/lustre_param.h>
struct mds_device {
/* super-class */
- struct md_device mds_md_dev;
- struct ptlrpc_service *mds_regular_service;
- struct ptlrpc_service *mds_readpage_service;
- struct ptlrpc_service *mds_out_service;
- struct ptlrpc_service *mds_setattr_service;
- struct ptlrpc_service *mds_mdsc_service;
- struct ptlrpc_service *mds_mdss_service;
- struct ptlrpc_service *mds_fld_service;
+ struct md_device mds_md_dev;
+ struct ptlrpc_service *mds_regular_service;
+ struct ptlrpc_service *mds_readpage_service;
+ struct ptlrpc_service *mds_out_service;
+ struct ptlrpc_service *mds_setattr_service;
+ struct ptlrpc_service *mds_mdsc_service;
+ struct ptlrpc_service *mds_mdss_service;
+ struct ptlrpc_service *mds_fld_service;
+ struct ptlrpc_service *mds_io_service;
+ struct mutex mds_health_mutex;
};
/*
- * * Initialized in mdt_mod_init().
+ * * Initialized in mds_mod_init().
* */
-static unsigned long mdt_num_threads;
-CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444,
- "number of MDS service threads to start "
- "(deprecated in favor of mds_num_threads)");
-
static unsigned long mds_num_threads;
-CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444,
- "number of MDS service threads to start");
+module_param(mds_num_threads, ulong, 0444);
+MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start");
+
+static unsigned int mds_cpu_bind = 1;
+module_param(mds_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_cpu_bind,
+ "bind MDS threads to particular CPU partitions");
+
+int mds_max_io_threads = 512;
+module_param(mds_max_io_threads, int, 0444);
+MODULE_PARM_DESC(mds_max_io_threads,
+ "maximum number of MDS IO service threads");
+
+static unsigned int mds_io_cpu_bind = 1;
+module_param(mds_io_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_io_cpu_bind,
+ "bind MDS IO threads to particular CPU partitions");
+
+static char *mds_io_num_cpts;
+module_param(mds_io_num_cpts, charp, 0444);
+MODULE_PARM_DESC(mds_io_num_cpts,
+ "CPU partitions MDS IO threads should run on");
+
+static struct cfs_cpt_table *mdt_io_cptable;
static char *mds_num_cpts;
-CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444,
- "CPU partitions MDS threads should run on");
+module_param(mds_num_cpts, charp, 0444);
+MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on");
static unsigned long mds_rdpg_num_threads;
-CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444,
- "number of MDS readpage service threads to start");
+module_param(mds_rdpg_num_threads, ulong, 0444);
+MODULE_PARM_DESC(mds_rdpg_num_threads,
+ "number of MDS readpage service threads to start");
+
+static unsigned int mds_rdpg_cpu_bind = 1;
+module_param(mds_rdpg_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_rdpg_cpu_bind,
+ "bind MDS readpage threads to particular CPU partitions");
static char *mds_rdpg_num_cpts;
-CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444,
- "CPU partitions MDS readpage threads should run on");
+module_param(mds_rdpg_num_cpts, charp, 0444);
+MODULE_PARM_DESC(mds_rdpg_num_cpts,
+ "CPU partitions MDS readpage threads should run on");
/* NB: these two should be removed along with setattr service in the future */
static unsigned long mds_attr_num_threads;
-CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444,
- "number of MDS setattr service threads to start");
+module_param(mds_attr_num_threads, ulong, 0444);
+MODULE_PARM_DESC(mds_attr_num_threads,
+ "number of MDS setattr service threads to start");
+
+static unsigned int mds_attr_cpu_bind = 1;
+module_param(mds_attr_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(mds_attr_cpu_bind,
+ "bind MDS setattr threads to particular CPU partitions");
static char *mds_attr_num_cpts;
-CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
- "CPU partitions MDS setattr threads should run on");
+module_param(mds_attr_num_cpts, charp, 0444);
+MODULE_PARM_DESC(mds_attr_num_cpts,
+ "CPU partitions MDS setattr threads should run on");
/* device init/fini methods */
static void mds_stop_ptlrpc_service(struct mds_device *m)
{
ENTRY;
+
+ mutex_lock(&m->mds_health_mutex);
if (m->mds_regular_service != NULL) {
ptlrpc_unregister_service(m->mds_regular_service);
m->mds_regular_service = NULL;
ptlrpc_unregister_service(m->mds_fld_service);
m->mds_fld_service = NULL;
}
+ if (m->mds_io_service != NULL) {
+ ptlrpc_unregister_service(m->mds_io_service);
+ m->mds_io_service = NULL;
+ }
+ mutex_unlock(&m->mds_health_mutex);
+
+ if (mdt_io_cptable != NULL) {
+ cfs_cpt_table_free(mdt_io_cptable);
+ mdt_io_cptable = NULL;
+ }
+
EXIT;
}
{
static struct ptlrpc_service_conf conf;
struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
- cfs_proc_dir_entry_t *procfs_entry;
+ nodemask_t *mask;
int rc = 0;
- ENTRY;
- procfs_entry = obd->obd_proc_entry;
- LASSERT(procfs_entry != NULL);
+ ENTRY;
conf = (typeof(conf)) {
.psc_name = LUSTRE_MDT_NAME,
.tc_nthrs_base = MDS_NTHRS_BASE,
.tc_nthrs_max = MDS_NTHRS_MAX,
.tc_nthrs_user = mds_num_threads,
- .tc_cpu_affinity = 1,
- .tc_ctx_tags = LCT_MD_THREAD,
+ .tc_cpu_bind = mds_cpu_bind,
+ /* LCT_DT_THREAD is required as MDT threads may scan
+ * all LDLM namespaces (including OFD-originated) to
+ * cancel LDLM locks */
+ .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD,
},
.psc_cpt = {
.cc_pattern = mds_num_cpts,
+ .cc_affinity = true,
},
.psc_ops = {
.so_req_handler = tgt_request_handle,
.so_hpreq_handler = ptlrpc_hpreq_handler,
},
};
- m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_regular_service)) {
rc = PTR_ERR(m->mds_regular_service);
CERROR("failed to start regular mdt service: %d\n", rc);
.tc_nthrs_base = MDS_RDPG_NTHRS_BASE,
.tc_nthrs_max = MDS_RDPG_NTHRS_MAX,
.tc_nthrs_user = mds_rdpg_num_threads,
- .tc_cpu_affinity = 1,
+ .tc_cpu_bind = mds_rdpg_cpu_bind,
.tc_ctx_tags = LCT_MD_THREAD,
},
.psc_cpt = {
.cc_pattern = mds_rdpg_num_cpts,
+ .cc_affinity = true,
},
.psc_ops = {
.so_req_handler = tgt_request_handle,
.so_req_printer = target_print_req,
},
};
- m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_readpage_service)) {
rc = PTR_ERR(m->mds_readpage_service);
CERROR("failed to start readpage service: %d\n", rc);
.tc_nthrs_base = MDS_SETA_NTHRS_BASE,
.tc_nthrs_max = MDS_SETA_NTHRS_MAX,
.tc_nthrs_user = mds_attr_num_threads,
- .tc_cpu_affinity = 1,
+ .tc_cpu_bind = mds_attr_cpu_bind,
.tc_ctx_tags = LCT_MD_THREAD,
},
.psc_cpt = {
.cc_pattern = mds_attr_num_cpts,
+ .cc_affinity = true,
},
.psc_ops = {
.so_req_handler = tgt_request_handle,
.so_hpreq_handler = NULL,
},
};
- m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_setattr_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_setattr_service)) {
rc = PTR_ERR(m->mds_setattr_service);
CERROR("failed to start setattr service: %d\n", rc);
.tc_nthrs_base = MDS_NTHRS_BASE,
.tc_nthrs_max = MDS_NTHRS_MAX,
.tc_nthrs_user = mds_num_threads,
- .tc_cpu_affinity = 1,
+ .tc_cpu_bind = mds_cpu_bind,
.tc_ctx_tags = LCT_MD_THREAD |
LCT_DT_THREAD,
},
.psc_cpt = {
.cc_pattern = mds_num_cpts,
+ .cc_affinity = true,
},
.psc_ops = {
.so_req_handler = tgt_request_handle,
.so_hpreq_handler = NULL,
},
};
- m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_out_service)) {
rc = PTR_ERR(m->mds_out_service);
CERROR("failed to start out service: %d\n", rc);
.so_hpreq_handler = NULL,
},
};
- m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_mdsc_service)) {
rc = PTR_ERR(m->mds_mdsc_service);
CERROR("failed to start seq controller service: %d\n", rc);
.so_hpreq_handler = NULL,
},
};
- m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_mdss_service)) {
rc = PTR_ERR(m->mds_mdss_service);
CERROR("failed to start metadata seq server service: %d\n", rc);
.so_hpreq_handler = NULL,
},
};
- m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry);
+ m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(m->mds_fld_service)) {
rc = PTR_ERR(m->mds_fld_service);
CERROR("failed to start fld service: %d\n", rc);
GOTO(err_mds_svc, rc);
}
+
+ mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY);
+ /* event CPT feature is disabled in libcfs level by set partition
+ * number to 1, we still want to set node affinity for io service */
+ if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) {
+ int cpt = 0;
+ int i;
+
+ mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
+ for_each_node_mask(i, *mask) {
+ if (mdt_io_cptable == NULL) {
+ CWARN("MDS failed to create CPT table\n");
+ break;
+ }
+
+ rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i);
+ if (!rc) {
+ CWARN("MDS Failed to set node %d for IO CPT table\n",
+ i);
+ cfs_cpt_table_free(mdt_io_cptable);
+ mdt_io_cptable = NULL;
+ break;
+ }
+ }
+ }
+
+ memset(&conf, 0, sizeof(conf));
+ conf = (typeof(conf)) {
+ .psc_name = LUSTRE_MDT_NAME "_io",
+ .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
+ .psc_buf = {
+ .bc_nbufs = OST_NBUFS,
+ .bc_buf_size = OST_IO_BUFSIZE,
+ .bc_req_max_size = OST_IO_MAXREQSIZE,
+ .bc_rep_max_size = OST_IO_MAXREPSIZE,
+ .bc_req_portal = MDS_IO_PORTAL,
+ .bc_rep_portal = MDC_REPLY_PORTAL,
+ },
+ .psc_thr = {
+ .tc_thr_name = LUSTRE_MDT_NAME "_io",
+ .tc_thr_factor = OSS_THR_FACTOR,
+ .tc_nthrs_init = OSS_NTHRS_INIT,
+ .tc_nthrs_base = OSS_NTHRS_BASE,
+ .tc_nthrs_max = mds_max_io_threads,
+ .tc_nthrs_user = mds_num_threads,
+ .tc_cpu_bind = mds_io_cpu_bind,
+ .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD,
+ },
+ .psc_cpt = {
+ .cc_cptable = mdt_io_cptable,
+ .cc_pattern = mdt_io_cptable == NULL ?
+ mds_io_num_cpts : NULL,
+ .cc_affinity = true,
+ },
+ .psc_ops = {
+ .so_thr_init = tgt_io_thread_init,
+ .so_thr_done = tgt_io_thread_done,
+ .so_req_handler = tgt_request_handle,
+ .so_req_printer = target_print_req,
+ .so_hpreq_handler = tgt_hpreq_handler,
+ },
+ };
+ m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
+ if (IS_ERR(m->mds_io_service)) {
+ rc = PTR_ERR(m->mds_io_service);
+ CERROR("failed to start MDT I/O service: %d\n", rc);
+ m->mds_io_service = NULL;
+ GOTO(err_mds_svc, rc);
+ }
+
EXIT;
err_mds_svc:
if (rc)
RETURN(NULL);
}
-LPROC_SEQ_FOPS_RO_TYPE(mds, uuid);
-
-static struct lprocfs_seq_vars lprocfs_mds_obd_vars[] = {
- { "uuid", &mds_uuid_fops },
- { 0 }
-};
-
static struct lu_device *mds_device_alloc(const struct lu_env *env,
struct lu_device_type *t,
struct lustre_cfg *cfg)
/* set this lu_device to obd, because error handling need it */
obd->obd_lu_dev = l;
- obd->obd_vars = lprocfs_mds_obd_vars;
- rc = lprocfs_obd_setup(obd);
+ rc = lprocfs_obd_setup(obd, true);
if (rc != 0) {
mds_device_free(env, l);
l = ERR_PTR(rc);
return l;
}
- rc = mds_start_ptlrpc_service(m);
+ mutex_init(&m->mds_health_mutex);
+ rc = mds_start_ptlrpc_service(m);
if (rc != 0) {
+ lprocfs_obd_cleanup(obd);
mds_device_free(env, l);
l = ERR_PTR(rc);
return l;
.ldt_ctx_tags = LCT_MD_THREAD
};
-static struct obd_ops mds_obd_device_ops = {
+static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+ struct mds_device *mds = mds_dev(obd->obd_lu_dev);
+ int rc = 0;
+
+
+ mutex_lock(&mds->mds_health_mutex);
+ rc |= ptlrpc_service_health_check(mds->mds_regular_service);
+ rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
+ rc |= ptlrpc_service_health_check(mds->mds_out_service);
+ rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
+ rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
+ rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
+ rc |= ptlrpc_service_health_check(mds->mds_fld_service);
+ rc |= ptlrpc_service_health_check(mds->mds_io_service);
+ mutex_unlock(&mds->mds_health_mutex);
+
+ return rc != 0 ? 1 : 0;
+}
+
+static const struct obd_ops mds_obd_device_ops = {
.o_owner = THIS_MODULE,
+ .o_health_check = mds_health_check,
};
int mds_mod_init(void)
{
- if (mdt_num_threads != 0 && mds_num_threads == 0) {
- LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, "
- "use mds_num_threads instead or unset both for "
- "dynamic thread startup\n");
- mds_num_threads = mdt_num_threads;
- }
-
- return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
+ return class_register_type(&mds_obd_device_ops, NULL, false, NULL,
LUSTRE_MDS_NAME, &mds_device_type);
}