X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_mds.c;h=860cdf9711e1793049c774dfe40b948a3235ad70;hp=cbdf73abbef018faba7ec58bfeb95d40188ce64f;hb=cc3643908d6c902db3d6c95647fff007bad0ff53;hpb=6b8967aa9545fbf5942cc79438d27cd38e919f70 diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index cbdf73a..860cdf9 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012 Intel Corporation + * Copyright (c) 2013, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,339 +49,95 @@ /* lu2dt_dev() */ #include #include -#include #include "mdt_internal.h" #include #include -#include -#include +#include struct mds_device { /* super-class */ - struct md_device mds_md_dev; - struct ptlrpc_service *mds_regular_service; - struct ptlrpc_service *mds_readpage_service; - struct ptlrpc_service *mds_out_service; - struct ptlrpc_service *mds_setattr_service; - struct ptlrpc_service *mds_mdsc_service; - struct ptlrpc_service *mds_mdss_service; - struct ptlrpc_service *mds_fld_service; + struct md_device mds_md_dev; + struct ptlrpc_service *mds_regular_service; + struct ptlrpc_service *mds_readpage_service; + struct ptlrpc_service *mds_out_service; + struct ptlrpc_service *mds_setattr_service; + struct ptlrpc_service *mds_mdsc_service; + struct ptlrpc_service *mds_mdss_service; + struct ptlrpc_service *mds_fld_service; + struct ptlrpc_service *mds_io_service; + struct mutex mds_health_mutex; }; /* - * * Initialized in mdt_mod_init(). + * * Initialized in mds_mod_init(). * */ -static unsigned long mdt_num_threads; -CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444, - "number of MDS service threads to start " - "(deprecated in favor of mds_num_threads)"); - static unsigned long mds_num_threads; -CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444, - "number of MDS service threads to start"); - -static char *mds_num_cpts; -CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444, - "CPU partitions MDS threads should run on"); - -static unsigned long mds_rdpg_num_threads; -CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444, - "number of MDS readpage service threads to start"); +module_param(mds_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start"); -static char *mds_rdpg_num_cpts; -CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444, - "CPU partitions MDS readpage threads should run on"); +static unsigned int mds_cpu_bind = 1; +module_param(mds_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_cpu_bind, + "bind MDS threads to particular CPU partitions"); -/* NB: these two should be removed along with setattr service in the future */ -static unsigned long mds_attr_num_threads; -CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444, - "number of MDS setattr service threads to start"); +int mds_max_io_threads = 512; +module_param(mds_max_io_threads, int, 0444); +MODULE_PARM_DESC(mds_max_io_threads, + "maximum number of MDS IO service threads"); -static char *mds_attr_num_cpts; -CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444, - "CPU partitions MDS setattr threads should run on"); - -#define DEFINE_RPC_HANDLER(base, flags, opc, fn, fmt) \ -[opc - base] = { \ - .mh_name = #opc, \ - .mh_fail_id = OBD_FAIL_ ## opc ## _NET, \ - .mh_opc = opc, \ - .mh_flags = flags, \ - .mh_act = fn, \ - .mh_fmt = fmt \ -} - -/* Request with a format known in advance */ -#define DEF_MDT_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(MDS_GETATTR, flags, name, fn, &RQF_ ## name) - -/* Request with a format we do not yet know */ -#define DEF_MDT_HDL_VAR(flags, name, fn) \ - DEFINE_RPC_HANDLER(MDS_GETATTR, flags, name, fn, NULL) - -/* Map one non-standard request format handler. This should probably get - * a common OBD_SET_INFO RPC opcode instead of this mismatch. */ -#define RQF_MDS_SET_INFO RQF_OBD_SET_INFO - -static struct mdt_handler mdt_mds_ops[] = { -DEF_MDT_HDL(0, MDS_CONNECT, mdt_connect), -DEF_MDT_HDL(0, MDS_DISCONNECT, mdt_disconnect), -DEF_MDT_HDL(0, MDS_SET_INFO, mdt_set_info), -DEF_MDT_HDL(0, MDS_GET_INFO, mdt_get_info), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_GETSTATUS, mdt_getstatus), -DEF_MDT_HDL(HABEO_CORPUS, MDS_GETATTR, mdt_getattr), -DEF_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_GETATTR_NAME, mdt_getattr_name), -DEF_MDT_HDL(HABEO_CORPUS, MDS_GETXATTR, mdt_getxattr), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_STATFS, mdt_statfs), -DEF_MDT_HDL(0 | MUTABOR, MDS_REINT, mdt_reint), -DEF_MDT_HDL(HABEO_CORPUS, MDS_CLOSE, mdt_close), -DEF_MDT_HDL(HABEO_CORPUS, MDS_DONE_WRITING, mdt_done_writing), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_PIN, mdt_pin), -DEF_MDT_HDL_VAR(0, MDS_SYNC, mdt_sync), -DEF_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_IS_SUBDIR, mdt_is_subdir), -DEF_MDT_HDL(0, MDS_QUOTACHECK, mdt_quotacheck), -DEF_MDT_HDL(0, MDS_QUOTACTL, mdt_quotactl), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_HSM_PROGRESS, mdt_hsm_progress), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_HSM_CT_REGISTER, - mdt_hsm_ct_register), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_HSM_CT_UNREGISTER, - mdt_hsm_ct_unregister), -DEF_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_HSM_STATE_GET, - mdt_hsm_state_get), -DEF_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_HSM_STATE_SET, - mdt_hsm_state_set), -DEF_MDT_HDL(HABEO_CORPUS| HABEO_REFERO, MDS_HSM_ACTION, mdt_hsm_action), -DEF_MDT_HDL(0 | HABEO_REFERO, MDS_HSM_REQUEST, mdt_hsm_request), -DEF_MDT_HDL(HABEO_CORPUS|HABEO_REFERO, MDS_SWAP_LAYOUTS, mdt_swap_layouts) -}; - -#define DEF_OBD_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(OBD_PING, flags, name, fn, NULL) - -static struct mdt_handler mdt_obd_ops[] = { -DEF_OBD_HDL(0, OBD_PING, mdt_obd_ping), -DEF_OBD_HDL(0, OBD_LOG_CANCEL, mdt_obd_log_cancel), -DEF_OBD_HDL(0, OBD_QC_CALLBACK, mdt_obd_qc_callback), -DEF_OBD_HDL(0, OBD_IDX_READ, mdt_obd_idx_read) -}; +static unsigned int mds_io_cpu_bind = 1; +module_param(mds_io_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_io_cpu_bind, + "bind MDS IO threads to particular CPU partitions"); -#define DEF_DLM_HDL_VAR(flags, name, fn) \ - DEFINE_RPC_HANDLER(LDLM_ENQUEUE, flags, name, fn, NULL) -#define DEF_DLM_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(LDLM_ENQUEUE, flags, name, fn, &RQF_ ## name) +static char *mds_io_num_cpts; +module_param(mds_io_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_io_num_cpts, + "CPU partitions MDS IO threads should run on"); -static struct mdt_handler mdt_dlm_ops[] = { -DEF_DLM_HDL (HABEO_CLAVIS, LDLM_ENQUEUE, mdt_enqueue), -DEF_DLM_HDL_VAR(HABEO_CLAVIS, LDLM_CONVERT, mdt_convert), -DEF_DLM_HDL_VAR(0, LDLM_BL_CALLBACK, mdt_bl_callback), -DEF_DLM_HDL_VAR(0, LDLM_CP_CALLBACK, mdt_cp_callback) -}; - -#define DEF_LLOG_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(LLOG_ORIGIN_HANDLE_CREATE, flags, name, fn, NULL) - -static struct mdt_handler mdt_llog_ops[] = { -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_CREATE, mdt_llog_create), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, mdt_llog_next_block), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_READ_HEADER, mdt_llog_read_header), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_WRITE_REC, NULL), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_CLOSE, NULL), -DEF_LLOG_HDL(0, LLOG_ORIGIN_CONNECT, NULL), -DEF_LLOG_HDL(0, LLOG_CATINFO, NULL), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, mdt_llog_prev_block), -DEF_LLOG_HDL(0, LLOG_ORIGIN_HANDLE_DESTROY, mdt_llog_destroy), -}; - -#define DEF_SEC_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(SEC_CTX_INIT, flags, name, fn, NULL) - -static struct mdt_handler mdt_sec_ctx_ops[] = { -DEF_SEC_HDL(0, SEC_CTX_INIT, mdt_sec_ctx_handle), -DEF_SEC_HDL(0, SEC_CTX_INIT_CONT,mdt_sec_ctx_handle), -DEF_SEC_HDL(0, SEC_CTX_FINI, mdt_sec_ctx_handle) -}; - -#define DEF_QUOTA_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(QUOTA_DQACQ, flags, name, fn, &RQF_ ## name) - -static struct mdt_handler mdt_quota_ops[] = { -DEF_QUOTA_HDL(HABEO_REFERO, QUOTA_DQACQ, mdt_quota_dqacq), -}; - -struct mdt_opc_slice mdt_regular_handlers[] = { - { - .mos_opc_start = MDS_GETATTR, - .mos_opc_end = MDS_LAST_OPC, - .mos_hs = mdt_mds_ops - }, - { - .mos_opc_start = OBD_PING, - .mos_opc_end = OBD_LAST_OPC, - .mos_hs = mdt_obd_ops - }, - { - .mos_opc_start = LDLM_ENQUEUE, - .mos_opc_end = LDLM_LAST_OPC, - .mos_hs = mdt_dlm_ops - }, - { - .mos_opc_start = LLOG_ORIGIN_HANDLE_CREATE, - .mos_opc_end = LLOG_LAST_OPC, - .mos_hs = mdt_llog_ops - }, - { - .mos_opc_start = SEC_CTX_INIT, - .mos_opc_end = SEC_LAST_OPC, - .mos_hs = mdt_sec_ctx_ops - }, - { - .mos_opc_start = QUOTA_DQACQ, - .mos_opc_end = QUOTA_LAST_OPC, - .mos_hs = mdt_quota_ops - }, - { - .mos_hs = NULL - } -}; - -/* Readpage/readdir handlers */ -static struct mdt_handler mdt_readpage_ops[] = { -DEF_MDT_HDL(0, MDS_CONNECT, mdt_connect), -DEF_MDT_HDL(HABEO_CORPUS | HABEO_REFERO, MDS_READPAGE, mdt_readpage), -/* XXX: this is ugly and should be fixed one day, see mdc_close() for - * detailed comments. --umka */ -DEF_MDT_HDL(HABEO_CORPUS, MDS_CLOSE, mdt_close), -DEF_MDT_HDL(HABEO_CORPUS, MDS_DONE_WRITING, mdt_done_writing), -}; +static struct cfs_cpt_table *mdt_io_cptable; -static struct mdt_opc_slice mdt_readpage_handlers[] = { - { - .mos_opc_start = MDS_GETATTR, - .mos_opc_end = MDS_LAST_OPC, - .mos_hs = mdt_readpage_ops - }, - { - .mos_opc_start = OBD_FIRST_OPC, - .mos_opc_end = OBD_LAST_OPC, - .mos_hs = mdt_obd_ops - }, - { - .mos_hs = NULL - } -}; - -/* Sequence service handlers */ -#define DEF_SEQ_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(SEQ_QUERY, flags, name, fn, &RQF_ ## name) - -static struct mdt_handler mdt_seq_ops[] = { -DEF_SEQ_HDL(0, SEQ_QUERY, (void *)seq_query), -}; - -struct mdt_opc_slice mdt_seq_handlers[] = { - { - .mos_opc_start = SEQ_QUERY, - .mos_opc_end = SEQ_LAST_OPC, - .mos_hs = mdt_seq_ops - }, - { - .mos_hs = NULL - } -}; - -/* FID Location Database handlers */ -#define DEF_FLD_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(FLD_QUERY, flags, name, fn, &RQF_ ## name) - -static struct mdt_handler mdt_fld_ops[] = { -DEF_FLD_HDL(0, FLD_QUERY, (void *)fld_query), -}; - -struct mdt_opc_slice mdt_fld_handlers[] = { - { - .mos_opc_start = FLD_QUERY, - .mos_opc_end = FLD_LAST_OPC, - .mos_hs = mdt_fld_ops - }, - { - .mos_hs = NULL - } -}; - -/* Request with a format known in advance */ -#define DEF_UPDATE_HDL(flags, name, fn) \ - DEFINE_RPC_HANDLER(UPDATE_OBJ, flags, name, fn, &RQF_ ## name) - -#define target_handler mdt_handler -static struct target_handler out_ops[] = { - DEF_UPDATE_HDL(MUTABOR, UPDATE_OBJ, out_handle), -}; - -static struct mdt_opc_slice update_handlers[] = { - { - .mos_opc_start = MDS_GETATTR, - .mos_opc_end = MDS_LAST_OPC, - .mos_hs = mdt_mds_ops - }, - { - .mos_opc_start = OBD_PING, - .mos_opc_end = OBD_LAST_OPC, - .mos_hs = mdt_obd_ops - }, - { - .mos_opc_start = LDLM_ENQUEUE, - .mos_opc_end = LDLM_LAST_OPC, - .mos_hs = mdt_dlm_ops - }, - { - .mos_opc_start = SEC_CTX_INIT, - .mos_opc_end = SEC_LAST_OPC, - .mos_hs = mdt_sec_ctx_ops - }, - { - .mos_opc_start = UPDATE_OBJ, - .mos_opc_end = UPDATE_LAST_OPC, - .mos_hs = out_ops - }, - { - .mos_hs = NULL - } -}; +static char *mds_num_cpts; +module_param(mds_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on"); -static int mds_regular_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, mdt_regular_handlers); -} +static unsigned long mds_rdpg_num_threads; +module_param(mds_rdpg_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_rdpg_num_threads, + "number of MDS readpage service threads to start"); -static int mds_readpage_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, mdt_readpage_handlers); -} +static unsigned int mds_rdpg_cpu_bind = 1; +module_param(mds_rdpg_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_rdpg_cpu_bind, + "bind MDS readpage threads to particular CPU partitions"); -static int mds_mdsc_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, mdt_seq_handlers); -} +static char *mds_rdpg_num_cpts; +module_param(mds_rdpg_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_rdpg_num_cpts, + "CPU partitions MDS readpage threads should run on"); -static int mdt_out_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, update_handlers); -} +/* NB: these two should be removed along with setattr service in the future */ +static unsigned long mds_attr_num_threads; +module_param(mds_attr_num_threads, ulong, 0444); +MODULE_PARM_DESC(mds_attr_num_threads, + "number of MDS setattr service threads to start"); -static int mds_mdss_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, mdt_seq_handlers); -} +static unsigned int mds_attr_cpu_bind = 1; +module_param(mds_attr_cpu_bind, uint, 0444); +MODULE_PARM_DESC(mds_attr_cpu_bind, + "bind MDS setattr threads to particular CPU partitions"); -static int mds_fld_handle(struct ptlrpc_request *req) -{ - return mdt_handle_common(req, mdt_fld_handlers); -} +static char *mds_attr_num_cpts; +module_param(mds_attr_num_cpts, charp, 0444); +MODULE_PARM_DESC(mds_attr_num_cpts, + "CPU partitions MDS setattr threads should run on"); /* device init/fini methods */ static void mds_stop_ptlrpc_service(struct mds_device *m) { ENTRY; + + mutex_lock(&m->mds_health_mutex); if (m->mds_regular_service != NULL) { ptlrpc_unregister_service(m->mds_regular_service); m->mds_regular_service = NULL; @@ -410,6 +166,17 @@ static void mds_stop_ptlrpc_service(struct mds_device *m) ptlrpc_unregister_service(m->mds_fld_service); m->mds_fld_service = NULL; } + if (m->mds_io_service != NULL) { + ptlrpc_unregister_service(m->mds_io_service); + m->mds_io_service = NULL; + } + mutex_unlock(&m->mds_health_mutex); + + if (mdt_io_cptable != NULL) { + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + } + EXIT; } @@ -417,21 +184,19 @@ static int mds_start_ptlrpc_service(struct mds_device *m) { static struct ptlrpc_service_conf conf; struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd; - cfs_proc_dir_entry_t *procfs_entry; + nodemask_t *mask; int rc = 0; - ENTRY; - procfs_entry = obd->obd_proc_entry; - LASSERT(procfs_entry != NULL); + ENTRY; conf = (typeof(conf)) { .psc_name = LUSTRE_MDT_NAME, .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR, .psc_buf = { .bc_nbufs = MDS_NBUFS, - .bc_buf_size = MDS_LOV_BUFSIZE, - .bc_req_max_size = MDS_LOV_MAXREQSIZE, - .bc_rep_max_size = MDS_LOV_MAXREPSIZE, + .bc_buf_size = MDS_REG_BUFSIZE, + .bc_req_max_size = MDS_REG_MAXREQSIZE, + .bc_rep_max_size = MDS_REG_MAXREPSIZE, .bc_req_portal = MDS_REQUEST_PORTAL, .bc_rep_portal = MDC_REPLY_PORTAL, }, @@ -446,19 +211,24 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD, + .tc_cpu_bind = mds_cpu_bind, + /* LCT_DT_THREAD is required as MDT threads may scan + * all LDLM namespaces (including OFD-originated) to + * cancel LDLM locks */ + .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { - .so_req_handler = mds_regular_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; - m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_regular_service)) { rc = PTR_ERR(m->mds_regular_service); CERROR("failed to start regular mdt service: %d\n", rc); @@ -490,18 +260,20 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_RDPG_NTHRS_BASE, .tc_nthrs_max = MDS_RDPG_NTHRS_MAX, .tc_nthrs_user = mds_rdpg_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_rdpg_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_rdpg_num_cpts, + .cc_affinity = true, }, .psc_ops = { - .so_req_handler = mds_readpage_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, }, }; - m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_readpage_service)) { rc = PTR_ERR(m->mds_readpage_service); CERROR("failed to start readpage service: %d\n", rc); @@ -536,19 +308,21 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_SETA_NTHRS_BASE, .tc_nthrs_max = MDS_SETA_NTHRS_MAX, .tc_nthrs_user = mds_attr_num_threads, - .tc_cpu_affinity = 1, + .tc_cpu_bind = mds_attr_cpu_bind, .tc_ctx_tags = LCT_MD_THREAD, }, .psc_cpt = { .cc_pattern = mds_attr_num_cpts, + .cc_affinity = true, }, .psc_ops = { - .so_req_handler = mds_regular_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = NULL, }, }; - m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_setattr_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_setattr_service)) { rc = PTR_ERR(m->mds_setattr_service); CERROR("failed to start setattr service: %d\n", rc); @@ -563,11 +337,11 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR, .psc_buf = { .bc_nbufs = MDS_NBUFS, - .bc_buf_size = MDS_OUT_BUFSIZE, - .bc_req_max_size = MDS_OUT_MAXREQSIZE, - .bc_rep_max_size = MDS_OUT_MAXREPSIZE, - .bc_req_portal = MDS_MDS_PORTAL, - .bc_rep_portal = MDC_REPLY_PORTAL, + .bc_buf_size = OUT_BUFSIZE, + .bc_req_max_size = OUT_MAXREQSIZE, + .bc_rep_max_size = OUT_MAXREPSIZE, + .bc_req_portal = OUT_PORTAL, + .bc_rep_portal = OSC_REPLY_PORTAL, }, /* * We'd like to have a mechanism to set this on a per-device @@ -580,19 +354,22 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_nthrs_base = MDS_NTHRS_BASE, .tc_nthrs_max = MDS_NTHRS_MAX, .tc_nthrs_user = mds_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD, + .tc_cpu_bind = mds_cpu_bind, + .tc_ctx_tags = LCT_MD_THREAD | + LCT_DT_THREAD, }, .psc_cpt = { .cc_pattern = mds_num_cpts, + .cc_affinity = true, }, .psc_ops = { - .so_req_handler = mdt_out_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = NULL, }, }; - m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_out_service)) { rc = PTR_ERR(m->mds_out_service); CERROR("failed to start out service: %d\n", rc); @@ -622,12 +399,13 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_ctx_tags = LCT_MD_THREAD, }, .psc_ops = { - .so_req_handler = mds_mdsc_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = NULL, }, }; - m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdsc_service)) { rc = PTR_ERR(m->mds_mdsc_service); CERROR("failed to start seq controller service: %d\n", rc); @@ -658,12 +436,13 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD }, .psc_ops = { - .so_req_handler = mds_mdss_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = NULL, }, }; - m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_mdss_service)) { rc = PTR_ERR(m->mds_mdss_service); CERROR("failed to start metadata seq server service: %d\n", rc); @@ -689,15 +468,16 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .tc_thr_name = LUSTRE_MDT_NAME "_fld", .tc_nthrs_init = MDS_OTHR_NTHRS_INIT, .tc_nthrs_max = MDS_OTHR_NTHRS_MAX, - .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD + .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD, }, .psc_ops = { - .so_req_handler = mds_fld_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, .so_hpreq_handler = NULL, }, }; - m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry); + m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(m->mds_fld_service)) { rc = PTR_ERR(m->mds_fld_service); CERROR("failed to start fld service: %d\n", rc); @@ -706,6 +486,77 @@ static int mds_start_ptlrpc_service(struct mds_device *m) GOTO(err_mds_svc, rc); } + + mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY); + /* event CPT feature is disabled in libcfs level by set partition + * number to 1, we still want to set node affinity for io service */ + if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) { + int cpt = 0; + int i; + + mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask)); + for_each_node_mask(i, *mask) { + if (mdt_io_cptable == NULL) { + CWARN("MDS failed to create CPT table\n"); + break; + } + + rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i); + if (!rc) { + CWARN("MDS Failed to set node %d for IO CPT table\n", + i); + cfs_cpt_table_free(mdt_io_cptable); + mdt_io_cptable = NULL; + break; + } + } + } + + memset(&conf, 0, sizeof(conf)); + conf = (typeof(conf)) { + .psc_name = LUSTRE_MDT_NAME "_io", + .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR, + .psc_buf = { + .bc_nbufs = OST_NBUFS, + .bc_buf_size = OST_IO_BUFSIZE, + .bc_req_max_size = OST_IO_MAXREQSIZE, + .bc_rep_max_size = OST_IO_MAXREPSIZE, + .bc_req_portal = MDS_IO_PORTAL, + .bc_rep_portal = MDC_REPLY_PORTAL, + }, + .psc_thr = { + .tc_thr_name = LUSTRE_MDT_NAME "_io", + .tc_thr_factor = OSS_THR_FACTOR, + .tc_nthrs_init = OSS_NTHRS_INIT, + .tc_nthrs_base = OSS_NTHRS_BASE, + .tc_nthrs_max = mds_max_io_threads, + .tc_nthrs_user = mds_num_threads, + .tc_cpu_bind = mds_io_cpu_bind, + .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD, + }, + .psc_cpt = { + .cc_cptable = mdt_io_cptable, + .cc_pattern = mdt_io_cptable == NULL ? + mds_io_num_cpts : NULL, + .cc_affinity = true, + }, + .psc_ops = { + .so_thr_init = tgt_io_thread_init, + .so_thr_done = tgt_io_thread_done, + .so_req_handler = tgt_request_handle, + .so_req_printer = target_print_req, + .so_hpreq_handler = tgt_hpreq_handler, + }, + }; + m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); + if (IS_ERR(m->mds_io_service)) { + rc = PTR_ERR(m->mds_io_service); + CERROR("failed to start MDT I/O service: %d\n", rc); + m->mds_io_service = NULL; + GOTO(err_mds_svc, rc); + } + EXIT; err_mds_svc: if (rc) @@ -765,21 +616,22 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env, /* set this lu_device to obd, because error handling need it */ obd->obd_lu_dev = l; - rc = lprocfs_obd_setup(obd, lprocfs_mds_obd_vars); + rc = lprocfs_obd_setup(obd, true); if (rc != 0) { mds_device_free(env, l); l = ERR_PTR(rc); return l; } - rc = mds_start_ptlrpc_service(m); + mutex_init(&m->mds_health_mutex); + rc = mds_start_ptlrpc_service(m); if (rc != 0) { + lprocfs_obd_cleanup(obd); mds_device_free(env, l); l = ERR_PTR(rc); return l; } - return l; } @@ -805,25 +657,35 @@ static struct lu_device_type mds_device_type = { .ldt_ctx_tags = LCT_MD_THREAD }; -static struct obd_ops mds_obd_device_ops = { +static int mds_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mds_device *mds = mds_dev(obd->obd_lu_dev); + int rc = 0; + + + mutex_lock(&mds->mds_health_mutex); + rc |= ptlrpc_service_health_check(mds->mds_regular_service); + rc |= ptlrpc_service_health_check(mds->mds_readpage_service); + rc |= ptlrpc_service_health_check(mds->mds_out_service); + rc |= ptlrpc_service_health_check(mds->mds_setattr_service); + rc |= ptlrpc_service_health_check(mds->mds_mdsc_service); + rc |= ptlrpc_service_health_check(mds->mds_mdss_service); + rc |= ptlrpc_service_health_check(mds->mds_fld_service); + rc |= ptlrpc_service_health_check(mds->mds_io_service); + mutex_unlock(&mds->mds_health_mutex); + + return rc != 0 ? 1 : 0; +} + +static const struct obd_ops mds_obd_device_ops = { .o_owner = THIS_MODULE, + .o_health_check = mds_health_check, }; int mds_mod_init(void) { - int rc; - - if (mdt_num_threads != 0 && mds_num_threads == 0) { - LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, " - "use mds_num_threads instead or unset both for " - "dynamic thread startup\n"); - mds_num_threads = mdt_num_threads; - } - - rc = class_register_type(&mds_obd_device_ops, NULL, - lprocfs_mds_module_vars, LUSTRE_MDS_NAME, - &mds_device_type); - return rc; + return class_register_type(&mds_obd_device_ops, NULL, false, NULL, + LUSTRE_MDS_NAME, &mds_device_type); } void mds_mod_exit(void)