4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2013, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/mdt/mdt_mds.c
33 * Lustre Metadata Service Layer
35 * Author: Di Wang <di.wang@whamcloud.com>
38 #define DEBUG_SUBSYSTEM S_MDS
40 #include <linux/module.h>
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <lustre_param.h>
59 struct md_device mds_md_dev;
60 struct ptlrpc_service *mds_regular_service;
61 struct ptlrpc_service *mds_readpage_service;
62 struct ptlrpc_service *mds_out_service;
63 struct ptlrpc_service *mds_setattr_service;
64 struct ptlrpc_service *mds_mdsc_service;
65 struct ptlrpc_service *mds_mdss_service;
66 struct ptlrpc_service *mds_fld_service;
67 struct mutex mds_health_mutex;
71 * * Initialized in mdt_mod_init().
73 static unsigned long mdt_num_threads;
74 CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444,
75 "number of MDS service threads to start "
76 "(deprecated in favor of mds_num_threads)");
78 static unsigned long mds_num_threads;
79 CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444,
80 "number of MDS service threads to start");
82 static char *mds_num_cpts;
83 CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444,
84 "CPU partitions MDS threads should run on");
86 static unsigned long mds_rdpg_num_threads;
87 CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444,
88 "number of MDS readpage service threads to start");
90 static char *mds_rdpg_num_cpts;
91 CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444,
92 "CPU partitions MDS readpage threads should run on");
94 /* NB: these two should be removed along with setattr service in the future */
95 static unsigned long mds_attr_num_threads;
96 CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444,
97 "number of MDS setattr service threads to start");
99 static char *mds_attr_num_cpts;
100 CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
101 "CPU partitions MDS setattr threads should run on");
103 /* device init/fini methods */
104 static void mds_stop_ptlrpc_service(struct mds_device *m)
108 mutex_lock(&m->mds_health_mutex);
109 if (m->mds_regular_service != NULL) {
110 ptlrpc_unregister_service(m->mds_regular_service);
111 m->mds_regular_service = NULL;
113 if (m->mds_readpage_service != NULL) {
114 ptlrpc_unregister_service(m->mds_readpage_service);
115 m->mds_readpage_service = NULL;
117 if (m->mds_out_service != NULL) {
118 ptlrpc_unregister_service(m->mds_out_service);
119 m->mds_out_service = NULL;
121 if (m->mds_setattr_service != NULL) {
122 ptlrpc_unregister_service(m->mds_setattr_service);
123 m->mds_setattr_service = NULL;
125 if (m->mds_mdsc_service != NULL) {
126 ptlrpc_unregister_service(m->mds_mdsc_service);
127 m->mds_mdsc_service = NULL;
129 if (m->mds_mdss_service != NULL) {
130 ptlrpc_unregister_service(m->mds_mdss_service);
131 m->mds_mdss_service = NULL;
133 if (m->mds_fld_service != NULL) {
134 ptlrpc_unregister_service(m->mds_fld_service);
135 m->mds_fld_service = NULL;
137 mutex_unlock(&m->mds_health_mutex);
142 static int mds_start_ptlrpc_service(struct mds_device *m)
144 static struct ptlrpc_service_conf conf;
145 struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
146 struct proc_dir_entry *procfs_entry;
150 procfs_entry = obd->obd_proc_entry;
151 LASSERT(procfs_entry != NULL);
153 conf = (typeof(conf)) {
154 .psc_name = LUSTRE_MDT_NAME,
155 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
157 .bc_nbufs = MDS_NBUFS,
158 .bc_buf_size = MDS_REG_BUFSIZE,
159 .bc_req_max_size = MDS_REG_MAXREQSIZE,
160 .bc_rep_max_size = MDS_REG_MAXREPSIZE,
161 .bc_req_portal = MDS_REQUEST_PORTAL,
162 .bc_rep_portal = MDC_REPLY_PORTAL,
165 * We'd like to have a mechanism to set this on a per-device
169 .tc_thr_name = LUSTRE_MDT_NAME,
170 .tc_thr_factor = MDS_THR_FACTOR,
171 .tc_nthrs_init = MDS_NTHRS_INIT,
172 .tc_nthrs_base = MDS_NTHRS_BASE,
173 .tc_nthrs_max = MDS_NTHRS_MAX,
174 .tc_nthrs_user = mds_num_threads,
175 .tc_cpu_affinity = 1,
176 .tc_ctx_tags = LCT_MD_THREAD,
179 .cc_pattern = mds_num_cpts,
182 .so_req_handler = tgt_request_handle,
183 .so_req_printer = target_print_req,
184 .so_hpreq_handler = ptlrpc_hpreq_handler,
187 m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry);
188 if (IS_ERR(m->mds_regular_service)) {
189 rc = PTR_ERR(m->mds_regular_service);
190 CERROR("failed to start regular mdt service: %d\n", rc);
191 m->mds_regular_service = NULL;
197 * readpage service configuration. Parameters have to be adjusted,
200 memset(&conf, 0, sizeof(conf));
201 conf = (typeof(conf)) {
202 .psc_name = LUSTRE_MDT_NAME "_readpage",
203 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
205 .bc_nbufs = MDS_NBUFS,
206 .bc_buf_size = MDS_BUFSIZE,
207 .bc_req_max_size = MDS_MAXREQSIZE,
208 .bc_rep_max_size = MDS_MAXREPSIZE,
209 .bc_req_portal = MDS_READPAGE_PORTAL,
210 .bc_rep_portal = MDC_REPLY_PORTAL,
213 .tc_thr_name = LUSTRE_MDT_NAME "_rdpg",
214 .tc_thr_factor = MDS_RDPG_THR_FACTOR,
215 .tc_nthrs_init = MDS_RDPG_NTHRS_INIT,
216 .tc_nthrs_base = MDS_RDPG_NTHRS_BASE,
217 .tc_nthrs_max = MDS_RDPG_NTHRS_MAX,
218 .tc_nthrs_user = mds_rdpg_num_threads,
219 .tc_cpu_affinity = 1,
220 .tc_ctx_tags = LCT_MD_THREAD,
223 .cc_pattern = mds_rdpg_num_cpts,
226 .so_req_handler = tgt_request_handle,
227 .so_req_printer = target_print_req,
230 m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry);
231 if (IS_ERR(m->mds_readpage_service)) {
232 rc = PTR_ERR(m->mds_readpage_service);
233 CERROR("failed to start readpage service: %d\n", rc);
234 m->mds_readpage_service = NULL;
236 GOTO(err_mds_svc, rc);
240 * setattr service configuration.
242 * XXX To keep the compatibility with old client(< 2.2), we need to
243 * preserve this portal for a certain time, it should be removed
244 * eventually. LU-617.
246 memset(&conf, 0, sizeof(conf));
247 conf = (typeof(conf)) {
248 .psc_name = LUSTRE_MDT_NAME "_setattr",
249 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
251 .bc_nbufs = MDS_NBUFS,
252 .bc_buf_size = MDS_BUFSIZE,
253 .bc_req_max_size = MDS_MAXREQSIZE,
254 .bc_rep_max_size = MDS_LOV_MAXREPSIZE,
255 .bc_req_portal = MDS_SETATTR_PORTAL,
256 .bc_rep_portal = MDC_REPLY_PORTAL,
259 .tc_thr_name = LUSTRE_MDT_NAME "_attr",
260 .tc_thr_factor = MDS_SETA_THR_FACTOR,
261 .tc_nthrs_init = MDS_SETA_NTHRS_INIT,
262 .tc_nthrs_base = MDS_SETA_NTHRS_BASE,
263 .tc_nthrs_max = MDS_SETA_NTHRS_MAX,
264 .tc_nthrs_user = mds_attr_num_threads,
265 .tc_cpu_affinity = 1,
266 .tc_ctx_tags = LCT_MD_THREAD,
269 .cc_pattern = mds_attr_num_cpts,
272 .so_req_handler = tgt_request_handle,
273 .so_req_printer = target_print_req,
274 .so_hpreq_handler = NULL,
277 m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry);
278 if (IS_ERR(m->mds_setattr_service)) {
279 rc = PTR_ERR(m->mds_setattr_service);
280 CERROR("failed to start setattr service: %d\n", rc);
281 m->mds_setattr_service = NULL;
283 GOTO(err_mds_svc, rc);
286 /* Object update service */
287 conf = (typeof(conf)) {
288 .psc_name = LUSTRE_MDT_NAME "_out",
289 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
291 .bc_nbufs = MDS_NBUFS,
292 .bc_buf_size = OUT_BUFSIZE,
293 .bc_req_max_size = OUT_MAXREQSIZE,
294 .bc_rep_max_size = OUT_MAXREPSIZE,
295 .bc_req_portal = OUT_PORTAL,
296 .bc_rep_portal = OSC_REPLY_PORTAL,
299 * We'd like to have a mechanism to set this on a per-device
303 .tc_thr_name = LUSTRE_MDT_NAME "_out",
304 .tc_thr_factor = MDS_THR_FACTOR,
305 .tc_nthrs_init = MDS_NTHRS_INIT,
306 .tc_nthrs_base = MDS_NTHRS_BASE,
307 .tc_nthrs_max = MDS_NTHRS_MAX,
308 .tc_nthrs_user = mds_num_threads,
309 .tc_cpu_affinity = 1,
310 .tc_ctx_tags = LCT_MD_THREAD |
314 .cc_pattern = mds_num_cpts,
317 .so_req_handler = tgt_request_handle,
318 .so_req_printer = target_print_req,
319 .so_hpreq_handler = NULL,
322 m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry);
323 if (IS_ERR(m->mds_out_service)) {
324 rc = PTR_ERR(m->mds_out_service);
325 CERROR("failed to start out service: %d\n", rc);
326 m->mds_out_service = NULL;
327 GOTO(err_mds_svc, rc);
331 * sequence controller service configuration
333 memset(&conf, 0, sizeof(conf));
334 conf = (typeof(conf)) {
335 .psc_name = LUSTRE_MDT_NAME "_seqs",
336 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
338 .bc_nbufs = MDS_NBUFS,
339 .bc_buf_size = SEQ_BUFSIZE,
340 .bc_req_max_size = SEQ_MAXREQSIZE,
341 .bc_rep_max_size = SEQ_MAXREPSIZE,
342 .bc_req_portal = SEQ_CONTROLLER_PORTAL,
343 .bc_rep_portal = MDC_REPLY_PORTAL,
346 .tc_thr_name = LUSTRE_MDT_NAME "_seqs",
347 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
348 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
349 .tc_ctx_tags = LCT_MD_THREAD,
352 .so_req_handler = tgt_request_handle,
353 .so_req_printer = target_print_req,
354 .so_hpreq_handler = NULL,
357 m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry);
358 if (IS_ERR(m->mds_mdsc_service)) {
359 rc = PTR_ERR(m->mds_mdsc_service);
360 CERROR("failed to start seq controller service: %d\n", rc);
361 m->mds_mdsc_service = NULL;
363 GOTO(err_mds_svc, rc);
367 * metadata sequence server service configuration
369 memset(&conf, 0, sizeof(conf));
370 conf = (typeof(conf)) {
371 .psc_name = LUSTRE_MDT_NAME "_seqm",
372 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
374 .bc_nbufs = MDS_NBUFS,
375 .bc_buf_size = SEQ_BUFSIZE,
376 .bc_req_max_size = SEQ_MAXREQSIZE,
377 .bc_rep_max_size = SEQ_MAXREPSIZE,
378 .bc_req_portal = SEQ_METADATA_PORTAL,
379 .bc_rep_portal = MDC_REPLY_PORTAL,
382 .tc_thr_name = LUSTRE_MDT_NAME "_seqm",
383 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
384 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
385 .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD
388 .so_req_handler = tgt_request_handle,
389 .so_req_printer = target_print_req,
390 .so_hpreq_handler = NULL,
393 m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry);
394 if (IS_ERR(m->mds_mdss_service)) {
395 rc = PTR_ERR(m->mds_mdss_service);
396 CERROR("failed to start metadata seq server service: %d\n", rc);
397 m->mds_mdss_service = NULL;
399 GOTO(err_mds_svc, rc);
402 /* FLD service start */
403 memset(&conf, 0, sizeof(conf));
404 conf = (typeof(conf)) {
405 .psc_name = LUSTRE_MDT_NAME "_fld",
406 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
408 .bc_nbufs = MDS_NBUFS,
409 .bc_buf_size = FLD_BUFSIZE,
410 .bc_req_max_size = FLD_MAXREQSIZE,
411 .bc_rep_max_size = FLD_MAXREPSIZE,
412 .bc_req_portal = FLD_REQUEST_PORTAL,
413 .bc_rep_portal = MDC_REPLY_PORTAL,
416 .tc_thr_name = LUSTRE_MDT_NAME "_fld",
417 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
418 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
419 .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD,
422 .so_req_handler = tgt_request_handle,
423 .so_req_printer = target_print_req,
424 .so_hpreq_handler = NULL,
427 m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry);
428 if (IS_ERR(m->mds_fld_service)) {
429 rc = PTR_ERR(m->mds_fld_service);
430 CERROR("failed to start fld service: %d\n", rc);
431 m->mds_fld_service = NULL;
433 GOTO(err_mds_svc, rc);
439 mds_stop_ptlrpc_service(m);
444 static inline struct mds_device *mds_dev(struct lu_device *d)
446 return container_of0(d, struct mds_device, mds_md_dev.md_lu_dev);
449 static struct lu_device *mds_device_fini(const struct lu_env *env,
452 struct mds_device *m = mds_dev(d);
453 struct obd_device *obd = d->ld_obd;
456 mds_stop_ptlrpc_service(m);
457 lprocfs_obd_cleanup(obd);
461 static struct lu_device *mds_device_free(const struct lu_env *env,
464 struct mds_device *m = mds_dev(d);
467 md_device_fini(&m->mds_md_dev);
472 LPROC_SEQ_FOPS_RO_TYPE(mds, uuid);
474 static struct lprocfs_vars lprocfs_mds_obd_vars[] = {
475 { "uuid", &mds_uuid_fops },
479 static struct lu_device *mds_device_alloc(const struct lu_env *env,
480 struct lu_device_type *t,
481 struct lustre_cfg *cfg)
483 struct mds_device *m;
484 struct obd_device *obd;
490 return ERR_PTR(-ENOMEM);
492 md_device_init(&m->mds_md_dev, t);
493 l = &m->mds_md_dev.md_lu_dev;
495 obd = class_name2obd(lustre_cfg_string(cfg, 0));
496 LASSERT(obd != NULL);
499 /* set this lu_device to obd, because error handling need it */
502 obd->obd_vars = lprocfs_mds_obd_vars;
503 rc = lprocfs_obd_setup(obd);
505 mds_device_free(env, l);
510 mutex_init(&m->mds_health_mutex);
512 rc = mds_start_ptlrpc_service(m);
515 mds_device_free(env, l);
522 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
523 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
525 static struct lu_device_type_operations mds_device_type_ops = {
526 .ldto_init = mds_type_init,
527 .ldto_fini = mds_type_fini,
529 .ldto_start = mds_type_start,
530 .ldto_stop = mds_type_stop,
532 .ldto_device_alloc = mds_device_alloc,
533 .ldto_device_free = mds_device_free,
534 .ldto_device_fini = mds_device_fini
537 static struct lu_device_type mds_device_type = {
538 .ldt_tags = LU_DEVICE_MD,
539 .ldt_name = LUSTRE_MDS_NAME,
540 .ldt_ops = &mds_device_type_ops,
541 .ldt_ctx_tags = LCT_MD_THREAD
544 static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
546 struct mds_device *mds = mds_dev(obd->obd_lu_dev);
550 mutex_lock(&mds->mds_health_mutex);
551 rc |= ptlrpc_service_health_check(mds->mds_regular_service);
552 rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
553 rc |= ptlrpc_service_health_check(mds->mds_out_service);
554 rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
555 rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
556 rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
557 rc |= ptlrpc_service_health_check(mds->mds_fld_service);
558 mutex_unlock(&mds->mds_health_mutex);
560 return rc != 0 ? 1 : 0;
563 static struct obd_ops mds_obd_device_ops = {
564 .o_owner = THIS_MODULE,
565 .o_health_check = mds_health_check,
568 int mds_mod_init(void)
570 if (mdt_num_threads != 0 && mds_num_threads == 0) {
571 LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, "
572 "use mds_num_threads instead or unset both for "
573 "dynamic thread startup\n");
574 mds_num_threads = mdt_num_threads;
577 return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
578 LUSTRE_MDS_NAME, &mds_device_type);
581 void mds_mod_exit(void)
583 class_unregister_type(LUSTRE_MDS_NAME);