X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_mds.c;h=860cdf9711e1793049c774dfe40b948a3235ad70;hp=1c0677aef75a74ab7940700662c1c64bad3e5eda;hb=cc3643908d6c902db3d6c95647fff007bad0ff53;hpb=bafa12c06d87c05bb263eed37ad8af1a2df99894 diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index 1c0677a..860cdf9 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2013, Intel Corporation. + * Copyright (c) 2013, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -52,57 +52,92 @@ #include "mdt_internal.h" #include #include -#include +#include struct mds_device { /* super-class */ - struct md_device mds_md_dev; - struct ptlrpc_service *mds_regular_service; - struct ptlrpc_service *mds_readpage_service; - struct ptlrpc_service *mds_out_service; - struct ptlrpc_service *mds_setattr_service; - struct ptlrpc_service *mds_mdsc_service; - struct ptlrpc_service *mds_mdss_service; - struct ptlrpc_service *mds_fld_service; + struct md_device mds_md_dev; + struct ptlrpc_service *mds_regular_service; + struct ptlrpc_service *mds_readpage_service; + struct ptlrpc_service *mds_out_service; + struct ptlrpc_service *mds_setattr_service; + struct ptlrpc_service *mds_mdsc_service; + struct ptlrpc_service *mds_mdss_service; + struct ptlrpc_service *mds_fld_service; + struct ptlrpc_service *mds_io_service; + struct mutex mds_health_mutex; }; /* - * * Initialized in mdt_mod_init(). + * * Initialized in mds_mod_init(). * */ -static unsigned long mdt_num_threads; -CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444, - "number of MDS service threads to start " - "(deprecated in favor of mds_num_threads)"); - static unsigned long mds_num_threads; -CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444, - "number of MDS service threads to start"); +module_param(mds_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start"); + +static unsigned int mds_cpu_bind = 1; +module_param(mds_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_cpu_bind, + "bind MDS threads to particular CPU partitions"); + +int mds_max_io_threads = 512; +module_param(mds_max_io_threads, int, 0444); +MODULE_PARM_DESC(mds_max_io_threads, + "maximum number of MDS IO service threads"); + +static unsigned int mds_io_cpu_bind = 1; +module_param(mds_io_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_io_cpu_bind, + "bind MDS IO threads to particular CPU partitions"); + +static char *mds_io_num_cpts; +module_param(mds_io_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_io_num_cpts, + "CPU partitions MDS IO threads should run on"); + +static struct cfs_cpt_table *mdt_io_cptable; static char *mds_num_cpts; -CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444, - "CPU partitions MDS threads should run on"); +module_param(mds_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on"); static unsigned long mds_rdpg_num_threads; -CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444, - "number of MDS readpage service threads to start"); +module_param(mds_rdpg_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_rdpg_num_threads, + "number of MDS readpage service threads to start"); + +static unsigned int mds_rdpg_cpu_bind = 1; +module_param(mds_rdpg_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_rdpg_cpu_bind, + "bind MDS readpage threads to particular CPU partitions"); static char *mds_rdpg_num_cpts; -CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444, - "CPU partitions MDS readpage threads should run on"); +module_param(mds_rdpg_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_rdpg_num_cpts, + "CPU partitions MDS readpage threads should run on"); /* NB: these two should be removed along with setattr service in the future */ static unsigned long mds_attr_num_threads; -CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444, - "number of MDS setattr service threads to start"); +module_param(mds_attr_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_attr_num_threads, + "number of MDS setattr service threads to start"); + +static unsigned int mds_attr_cpu_bind = 1; +module_param(mds_attr_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_attr_cpu_bind, + "bind MDS setattr threads to particular CPU partitions"); static char *mds_attr_num_cpts; -CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444, - "CPU partitions MDS setattr threads should run on"); +module_param(mds_attr_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_attr_num_cpts, + "CPU partitions MDS setattr threads should run on"); /* device init/fini methods */ static void mds_stop_ptlrpc_service(struct mds_device *m) { ENTRY; + + mutex_lock(&m->mds_health_mutex); if (m->mds_regular_service != NULL) { ptlrpc_unregister_service(m->mds_regular_service); m->mds_regular_service = NULL; @@ -131,6 +166,17 @@ static void mds_stop_ptlrpc_service(struct mds_device *m) ptlrpc_unregister_service(m->mds_fld_service); m->mds_fld_service = NULL; } + if (m->mds_io_service != NULL) { + ptlrpc_unregister_service(m->mds_io_service); + m->mds_io_service = NULL; + } + mutex_unlock(&m->mds_health_mutex); + + if (mdt_io_cptable != NULL) { + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + } + EXIT; } @@ -138,12 +184,10 @@ static int mds_start_ptlrpc_service(struct mds_device *m) { static struct ptlrpc_service_conf conf; struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd; - cfs_proc_dir_entry_t *procfs_entry; + nodemask_t *mask; int rc = 0; - ENTRY; - procfs_entry = obd->obd_proc_entry; - LASSERT(procfs_entry != NULL); + ENTRY; conf = (typeof(conf)) { .psc_name = LUSTRE_MDT_NAME, @@ -167,11 +211,15 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD, + .tc_cpu_bind = mds_cpu_bind, + /* LCT_DT_THREAD is required as MDT threads may scan + * all LDLM namespaces (including OFD-originated) to + * cancel LDLM locks */ + .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -179,7 +227,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; - m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_regular_service)) { rc = PTR_ERR(m->mds_regular_service); CERROR("failed to start regular mdt service: %d\n", rc); @@ -211,18 +260,20 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_RDPG_NTHRS_BASE, .tc_nthrs_max = MDS_RDPG_NTHRS_MAX, .tc_nthrs_user = mds_rdpg_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_rdpg_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_rdpg_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, }, }; - m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_readpage_service)) { rc = PTR_ERR(m->mds_readpage_service); CERROR("failed to start readpage service: %d\n", rc); @@ -257,11 +308,12 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_SETA_NTHRS_BASE, .tc_nthrs_max = MDS_SETA_NTHRS_MAX, .tc_nthrs_user = mds_attr_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_attr_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_attr_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -269,7 +321,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_setattr_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_setattr_service)) { rc = PTR_ERR(m->mds_setattr_service); CERROR("failed to start setattr service: %d\n", rc); @@ -301,12 +354,13 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { .so_req_handler = tgt_request_handle, @@ -314,7 +368,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_out_service)) { rc = PTR_ERR(m->mds_out_service); CERROR("failed to start out service: %d\n", rc); @@ -349,7 +404,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdsc_service)) { rc = PTR_ERR(m->mds_mdsc_service); CERROR("failed to start seq controller service: %d\n", rc); @@ -385,7 +441,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdss_service)) { rc = PTR_ERR(m->mds_mdss_service); CERROR("failed to start metadata seq server service: %d\n", rc); @@ -419,7 +476,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .so_hpreq_handler = NULL, }, }; - m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_fld_service)) { rc = PTR_ERR(m->mds_fld_service); CERROR("failed to start fld service: %d\n", rc); @@ -428,6 +486,77 @@ static int mds_start_ptlrpc_service(struct mds_device *m) GOTO(err_mds_svc, rc); } + + mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY); + /* event CPT feature is disabled in libcfs level by set partition + * number to 1, we still want to set node affinity for io service */ + if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) { + int cpt = 0; + int i; + + mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask)); + for_each_node_mask(i, *mask) { + if (mdt_io_cptable == NULL) { + CWARN("MDS failed to create CPT table\n"); + break; + } + + rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i); + if (!rc) { + CWARN("MDS Failed to set node %d for IO CPT table\n", + i); + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + break; + } + } + } + + memset(&conf, 0, sizeof(conf)); + conf = (typeof(conf)) { + .psc_name = LUSTRE_MDT_NAME "_io", + .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR, + .psc_buf = { + .bc_nbufs = OST_NBUFS, + .bc_buf_size = OST_IO_BUFSIZE, + .bc_req_max_size = OST_IO_MAXREQSIZE, + .bc_rep_max_size = OST_IO_MAXREPSIZE, + .bc_req_portal = MDS_IO_PORTAL, + .bc_rep_portal = MDC_REPLY_PORTAL, + }, + .psc_thr = { + .tc_thr_name = LUSTRE_MDT_NAME "_io", + .tc_thr_factor = OSS_THR_FACTOR, + .tc_nthrs_init = OSS_NTHRS_INIT, + .tc_nthrs_base = OSS_NTHRS_BASE, + .tc_nthrs_max = mds_max_io_threads, + .tc_nthrs_user = mds_num_threads, + .tc_cpu_bind = mds_io_cpu_bind, + .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD, + }, + .psc_cpt = { + .cc_cptable = mdt_io_cptable, + .cc_pattern = mdt_io_cptable == NULL ? + mds_io_num_cpts : NULL, + .cc_affinity = true, + }, + .psc_ops = { + .so_thr_init = tgt_io_thread_init, + .so_thr_done = tgt_io_thread_done, + .so_req_handler = tgt_request_handle, + .so_req_printer = target_print_req, + .so_hpreq_handler = tgt_hpreq_handler, + }, + }; + m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); + if (IS_ERR(m->mds_io_service)) { + rc = PTR_ERR(m->mds_io_service); + CERROR("failed to start MDT I/O service: %d\n", rc); + m->mds_io_service = NULL; + GOTO(err_mds_svc, rc); + } + EXIT; err_mds_svc: if (rc) @@ -464,13 +593,6 @@ static struct lu_device *mds_device_free(const struct lu_env *env, RETURN(NULL); } -LPROC_SEQ_FOPS_RO_TYPE(mds, uuid); - -static struct lprocfs_seq_vars lprocfs_mds_obd_vars[] = { - { "uuid", &mds_uuid_fops }, - { 0 } -}; - static struct lu_device *mds_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) @@ -494,17 +616,18 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env, /* set this lu_device to obd, because error handling need it */ obd->obd_lu_dev = l; - obd->obd_vars = lprocfs_mds_obd_vars; - rc = lprocfs_obd_setup(obd); + rc = lprocfs_obd_setup(obd, true); if (rc != 0) { mds_device_free(env, l); l = ERR_PTR(rc); return l; } - rc = mds_start_ptlrpc_service(m); + mutex_init(&m->mds_health_mutex); + rc = mds_start_ptlrpc_service(m); if (rc != 0) { + lprocfs_obd_cleanup(obd); mds_device_free(env, l); l = ERR_PTR(rc); return l; @@ -534,23 +657,34 @@ static struct lu_device_type mds_device_type = { .ldt_ctx_tags = LCT_MD_THREAD }; -static struct obd_ops mds_obd_device_ops = { +static int mds_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mds_device *mds = mds_dev(obd->obd_lu_dev); + int rc = 0; + + + mutex_lock(&mds->mds_health_mutex); + rc |= ptlrpc_service_health_check(mds->mds_regular_service); + rc |= ptlrpc_service_health_check(mds->mds_readpage_service); + rc |= ptlrpc_service_health_check(mds->mds_out_service); + rc |= ptlrpc_service_health_check(mds->mds_setattr_service); + rc |= ptlrpc_service_health_check(mds->mds_mdsc_service); + rc |= ptlrpc_service_health_check(mds->mds_mdss_service); + rc |= ptlrpc_service_health_check(mds->mds_fld_service); + rc |= ptlrpc_service_health_check(mds->mds_io_service); + mutex_unlock(&mds->mds_health_mutex); + + return rc != 0 ? 1 : 0; +} + +static const struct obd_ops mds_obd_device_ops = { .o_owner = THIS_MODULE, + .o_health_check = mds_health_check, }; int mds_mod_init(void) { - if (mdt_num_threads != 0 && mds_num_threads == 0) { - LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, " - "use mds_num_threads instead or unset both for " - "dynamic thread startup\n"); - mds_num_threads = mdt_num_threads; - } - - return class_register_type(&mds_obd_device_ops, NULL, true, NULL, -#ifndef HAVE_ONLY_PROCFS_SEQ - NULL, -#endif + return class_register_type(&mds_obd_device_ops, NULL, false, NULL, LUSTRE_MDS_NAME, &mds_device_type); }