X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_mds.c;h=d7408b14e901a46f42a02ab8183f6f4e1e020d5a;hb=refs%2Fchanges%2F15%2F35815%2F2;hp=078051a31d89eceabff5b6fb08103ca242dd5645;hpb=2bcc5ad0ed6a440e15233b454191e7f66fcb1921;p=fs%2Flustre-release.git diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index 078051a..d7408b1 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2013, 2015, Intel Corporation. + * Copyright (c) 2013, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -66,7 +66,6 @@ struct mds_device { struct ptlrpc_service *mds_fld_service; struct ptlrpc_service *mds_io_service; struct mutex mds_health_mutex; - struct kset *mds_kset; }; /* @@ -76,9 +75,27 @@ static unsigned long mds_num_threads; module_param(mds_num_threads, ulong, 0444); MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start"); +static unsigned int mds_cpu_bind = 1; +module_param(mds_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_cpu_bind, + "bind MDS threads to particular CPU partitions"); + int mds_max_io_threads = 512; module_param(mds_max_io_threads, int, 0444); -MODULE_PARM_DESC(mds_max_io_threads, "maximum number of MDS IO service threads"); +MODULE_PARM_DESC(mds_max_io_threads, + "maximum number of MDS IO service threads"); + +static unsigned int mds_io_cpu_bind = 1; +module_param(mds_io_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_io_cpu_bind, + "bind MDS IO threads to particular CPU partitions"); + +static char *mds_io_num_cpts; +module_param(mds_io_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_io_num_cpts, + "CPU partitions MDS IO threads should run on"); + +static struct cfs_cpt_table *mdt_io_cptable; static char *mds_num_cpts; module_param(mds_num_cpts, charp, 0444); @@ -89,6 +106,11 @@ module_param(mds_rdpg_num_threads, ulong, 0444); MODULE_PARM_DESC(mds_rdpg_num_threads, "number of MDS readpage service threads to start"); +static unsigned int mds_rdpg_cpu_bind = 1; +module_param(mds_rdpg_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_rdpg_cpu_bind, + "bind MDS readpage threads to particular CPU partitions"); + static char *mds_rdpg_num_cpts; module_param(mds_rdpg_num_cpts, charp, 0444); MODULE_PARM_DESC(mds_rdpg_num_cpts, @@ -100,6 +122,11 @@ module_param(mds_attr_num_threads, ulong, 0444); MODULE_PARM_DESC(mds_attr_num_threads, "number of MDS setattr service threads to start"); +static unsigned int mds_attr_cpu_bind = 1; +module_param(mds_attr_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_attr_cpu_bind, + "bind MDS setattr threads to particular CPU partitions"); + static char *mds_attr_num_cpts; module_param(mds_attr_num_cpts, charp, 0444); MODULE_PARM_DESC(mds_attr_num_cpts, @@ -145,6 +172,11 @@ static void mds_stop_ptlrpc_service(struct mds_device *m) } mutex_unlock(&m->mds_health_mutex); + if (mdt_io_cptable != NULL) { + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + } + EXIT; } @@ -152,12 +184,10 @@ static int mds_start_ptlrpc_service(struct mds_device *m) { static struct ptlrpc_service_conf conf; struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd; - struct proc_dir_entry *procfs_entry; + nodemask_t *mask; int rc = 0; - ENTRY; - procfs_entry = obd->obd_proc_entry; - LASSERT(procfs_entry != NULL); + ENTRY; conf = (typeof(conf)) { .psc_name = LUSTRE_MDT_NAME, @@ -181,11 +211,15 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD, + .tc_cpu_bind = mds_cpu_bind, + /* LCT_DT_THREAD is required as MDT threads may scan + * all LDLM namespaces (including OFD-originated) to + * cancel LDLM locks */ + .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -193,8 +227,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; - m->mds_regular_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_regular_service)) { rc = PTR_ERR(m->mds_regular_service); CERROR("failed to start regular mdt service: %d\n", rc); @@ -226,19 +260,20 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_RDPG_NTHRS_BASE, .tc_nthrs_max = MDS_RDPG_NTHRS_MAX, .tc_nthrs_user = mds_rdpg_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_rdpg_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_rdpg_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, }, }; - m->mds_readpage_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_readpage_service)) { rc = PTR_ERR(m->mds_readpage_service); CERROR("failed to start readpage service: %d\n", rc); @@ -273,11 +308,12 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_SETA_NTHRS_BASE, .tc_nthrs_max = MDS_SETA_NTHRS_MAX, .tc_nthrs_user = mds_attr_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_attr_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_attr_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -285,8 +321,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_setattr_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_setattr_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_setattr_service)) { rc = PTR_ERR(m->mds_setattr_service); CERROR("failed to start setattr service: %d\n", rc); @@ -318,12 +354,13 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -331,8 +368,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_out_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_out_service)) { rc = PTR_ERR(m->mds_out_service); CERROR("failed to start out service: %d\n", rc); @@ -367,8 +404,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_mdsc_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdsc_service)) { rc = PTR_ERR(m->mds_mdsc_service); CERROR("failed to start seq controller service: %d\n", rc); @@ -404,8 +441,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_mdss_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdss_service)) { rc = PTR_ERR(m->mds_mdss_service); CERROR("failed to start metadata seq server service: %d\n", rc); @@ -439,8 +476,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_fld_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_fld_service)) { rc = PTR_ERR(m->mds_fld_service); CERROR("failed to start fld service: %d\n", rc); @@ -449,6 +486,32 @@ static int mds_start_ptlrpc_service(struct mds_device *m) GOTO(err_mds_svc, rc); } + + mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY); + /* event CPT feature is disabled in libcfs level by set partition + * number to 1, we still want to set node affinity for io service */ + if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) { + int cpt = 0; + int i; + + mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask)); + for_each_node_mask(i, *mask) { + if (mdt_io_cptable == NULL) { + CWARN("MDS failed to create CPT table\n"); + break; + } + + rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i); + if (!rc) { + CWARN("MDS Failed to set node %d for" + "IO CPT table\n", i); + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + break; + } + } + } + memset(&conf, 0, sizeof(conf)); conf = (typeof(conf)) { .psc_name = LUSTRE_MDT_NAME "_io", @@ -462,23 +525,31 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .bc_rep_portal = MDC_REPLY_PORTAL, }, .psc_thr = { - .tc_thr_name = "ll_mdt_io", + .tc_thr_name = LUSTRE_MDT_NAME "_io", .tc_thr_factor = OSS_THR_FACTOR, .tc_nthrs_init = OSS_NTHRS_INIT, .tc_nthrs_base = OSS_NTHRS_BASE, .tc_nthrs_max = mds_max_io_threads, - .tc_cpu_affinity = 1, + .tc_nthrs_user = mds_num_threads, + .tc_cpu_bind = mds_io_cpu_bind, .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD, }, + .psc_cpt = { + .cc_cptable = mdt_io_cptable, + .cc_pattern = mdt_io_cptable == NULL ? + mds_io_num_cpts : NULL, + .cc_affinity = true, + }, .psc_ops = { .so_thr_init = tgt_io_thread_init, .so_thr_done = tgt_io_thread_done, .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = tgt_hpreq_handler, }, }; - m->mds_io_service = ptlrpc_register_service(&conf, m->mds_kset, - procfs_entry); + m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_io_service)) { rc = PTR_ERR(m->mds_io_service); CERROR("failed to start MDT I/O service: %d\n", rc); @@ -507,7 +578,7 @@ static struct lu_device *mds_device_fini(const struct lu_env *env, ENTRY; mds_stop_ptlrpc_service(m); - lprocfs_kset_unregister(obd, m->mds_kset); + lprocfs_obd_cleanup(obd); RETURN(NULL); } @@ -545,7 +616,7 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env, /* set this lu_device to obd, because error handling need it */ obd->obd_lu_dev = l; - rc = lprocfs_kset_register(obd, &m->mds_kset); + rc = lprocfs_obd_setup(obd, true); if (rc != 0) { mds_device_free(env, l); l = ERR_PTR(rc); @@ -556,7 +627,7 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env, rc = mds_start_ptlrpc_service(m); if (rc != 0) { - lprocfs_kset_unregister(obd, m->mds_kset); + lprocfs_obd_cleanup(obd); mds_device_free(env, l); l = ERR_PTR(rc); return l; @@ -613,7 +684,7 @@ static struct obd_ops mds_obd_device_ops = { int mds_mod_init(void) { - return class_register_type(&mds_obd_device_ops, NULL, true, NULL, + return class_register_type(&mds_obd_device_ops, NULL, false, NULL, LUSTRE_MDS_NAME, &mds_device_type); }