4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2013, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/mdt/mdt_mds.c
33 * Lustre Metadata Service Layer
35 * Author: Di Wang <di.wang@whamcloud.com>
38 #define DEBUG_SUBSYSTEM S_MDS
40 #include <linux/module.h>
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <lustre_param.h>
59 struct md_device mds_md_dev;
60 struct ptlrpc_service *mds_regular_service;
61 struct ptlrpc_service *mds_readpage_service;
62 struct ptlrpc_service *mds_out_service;
63 struct ptlrpc_service *mds_setattr_service;
64 struct ptlrpc_service *mds_mdsc_service;
65 struct ptlrpc_service *mds_mdss_service;
66 struct ptlrpc_service *mds_fld_service;
70 * * Initialized in mdt_mod_init().
72 static unsigned long mdt_num_threads;
73 CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444,
74 "number of MDS service threads to start "
75 "(deprecated in favor of mds_num_threads)");
77 static unsigned long mds_num_threads;
78 CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444,
79 "number of MDS service threads to start");
81 static char *mds_num_cpts;
82 CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444,
83 "CPU partitions MDS threads should run on");
85 static unsigned long mds_rdpg_num_threads;
86 CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444,
87 "number of MDS readpage service threads to start");
89 static char *mds_rdpg_num_cpts;
90 CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444,
91 "CPU partitions MDS readpage threads should run on");
93 /* NB: these two should be removed along with setattr service in the future */
94 static unsigned long mds_attr_num_threads;
95 CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444,
96 "number of MDS setattr service threads to start");
98 static char *mds_attr_num_cpts;
99 CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
100 "CPU partitions MDS setattr threads should run on");
102 /* device init/fini methods */
103 static void mds_stop_ptlrpc_service(struct mds_device *m)
106 if (m->mds_regular_service != NULL) {
107 ptlrpc_unregister_service(m->mds_regular_service);
108 m->mds_regular_service = NULL;
110 if (m->mds_readpage_service != NULL) {
111 ptlrpc_unregister_service(m->mds_readpage_service);
112 m->mds_readpage_service = NULL;
114 if (m->mds_out_service != NULL) {
115 ptlrpc_unregister_service(m->mds_out_service);
116 m->mds_out_service = NULL;
118 if (m->mds_setattr_service != NULL) {
119 ptlrpc_unregister_service(m->mds_setattr_service);
120 m->mds_setattr_service = NULL;
122 if (m->mds_mdsc_service != NULL) {
123 ptlrpc_unregister_service(m->mds_mdsc_service);
124 m->mds_mdsc_service = NULL;
126 if (m->mds_mdss_service != NULL) {
127 ptlrpc_unregister_service(m->mds_mdss_service);
128 m->mds_mdss_service = NULL;
130 if (m->mds_fld_service != NULL) {
131 ptlrpc_unregister_service(m->mds_fld_service);
132 m->mds_fld_service = NULL;
137 static int mds_start_ptlrpc_service(struct mds_device *m)
139 static struct ptlrpc_service_conf conf;
140 struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
141 struct proc_dir_entry *procfs_entry;
145 procfs_entry = obd->obd_proc_entry;
146 LASSERT(procfs_entry != NULL);
148 conf = (typeof(conf)) {
149 .psc_name = LUSTRE_MDT_NAME,
150 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
152 .bc_nbufs = MDS_NBUFS,
153 .bc_buf_size = MDS_REG_BUFSIZE,
154 .bc_req_max_size = MDS_REG_MAXREQSIZE,
155 .bc_rep_max_size = MDS_REG_MAXREPSIZE,
156 .bc_req_portal = MDS_REQUEST_PORTAL,
157 .bc_rep_portal = MDC_REPLY_PORTAL,
160 * We'd like to have a mechanism to set this on a per-device
164 .tc_thr_name = LUSTRE_MDT_NAME,
165 .tc_thr_factor = MDS_THR_FACTOR,
166 .tc_nthrs_init = MDS_NTHRS_INIT,
167 .tc_nthrs_base = MDS_NTHRS_BASE,
168 .tc_nthrs_max = MDS_NTHRS_MAX,
169 .tc_nthrs_user = mds_num_threads,
170 .tc_cpu_affinity = 1,
171 .tc_ctx_tags = LCT_MD_THREAD,
174 .cc_pattern = mds_num_cpts,
177 .so_req_handler = tgt_request_handle,
178 .so_req_printer = target_print_req,
179 .so_hpreq_handler = ptlrpc_hpreq_handler,
182 m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry);
183 if (IS_ERR(m->mds_regular_service)) {
184 rc = PTR_ERR(m->mds_regular_service);
185 CERROR("failed to start regular mdt service: %d\n", rc);
186 m->mds_regular_service = NULL;
192 * readpage service configuration. Parameters have to be adjusted,
195 memset(&conf, 0, sizeof(conf));
196 conf = (typeof(conf)) {
197 .psc_name = LUSTRE_MDT_NAME "_readpage",
198 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
200 .bc_nbufs = MDS_NBUFS,
201 .bc_buf_size = MDS_BUFSIZE,
202 .bc_req_max_size = MDS_MAXREQSIZE,
203 .bc_rep_max_size = MDS_MAXREPSIZE,
204 .bc_req_portal = MDS_READPAGE_PORTAL,
205 .bc_rep_portal = MDC_REPLY_PORTAL,
208 .tc_thr_name = LUSTRE_MDT_NAME "_rdpg",
209 .tc_thr_factor = MDS_RDPG_THR_FACTOR,
210 .tc_nthrs_init = MDS_RDPG_NTHRS_INIT,
211 .tc_nthrs_base = MDS_RDPG_NTHRS_BASE,
212 .tc_nthrs_max = MDS_RDPG_NTHRS_MAX,
213 .tc_nthrs_user = mds_rdpg_num_threads,
214 .tc_cpu_affinity = 1,
215 .tc_ctx_tags = LCT_MD_THREAD,
218 .cc_pattern = mds_rdpg_num_cpts,
221 .so_req_handler = tgt_request_handle,
222 .so_req_printer = target_print_req,
225 m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry);
226 if (IS_ERR(m->mds_readpage_service)) {
227 rc = PTR_ERR(m->mds_readpage_service);
228 CERROR("failed to start readpage service: %d\n", rc);
229 m->mds_readpage_service = NULL;
231 GOTO(err_mds_svc, rc);
235 * setattr service configuration.
237 * XXX To keep the compatibility with old client(< 2.2), we need to
238 * preserve this portal for a certain time, it should be removed
239 * eventually. LU-617.
241 memset(&conf, 0, sizeof(conf));
242 conf = (typeof(conf)) {
243 .psc_name = LUSTRE_MDT_NAME "_setattr",
244 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
246 .bc_nbufs = MDS_NBUFS,
247 .bc_buf_size = MDS_BUFSIZE,
248 .bc_req_max_size = MDS_MAXREQSIZE,
249 .bc_rep_max_size = MDS_LOV_MAXREPSIZE,
250 .bc_req_portal = MDS_SETATTR_PORTAL,
251 .bc_rep_portal = MDC_REPLY_PORTAL,
254 .tc_thr_name = LUSTRE_MDT_NAME "_attr",
255 .tc_thr_factor = MDS_SETA_THR_FACTOR,
256 .tc_nthrs_init = MDS_SETA_NTHRS_INIT,
257 .tc_nthrs_base = MDS_SETA_NTHRS_BASE,
258 .tc_nthrs_max = MDS_SETA_NTHRS_MAX,
259 .tc_nthrs_user = mds_attr_num_threads,
260 .tc_cpu_affinity = 1,
261 .tc_ctx_tags = LCT_MD_THREAD,
264 .cc_pattern = mds_attr_num_cpts,
267 .so_req_handler = tgt_request_handle,
268 .so_req_printer = target_print_req,
269 .so_hpreq_handler = NULL,
272 m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry);
273 if (IS_ERR(m->mds_setattr_service)) {
274 rc = PTR_ERR(m->mds_setattr_service);
275 CERROR("failed to start setattr service: %d\n", rc);
276 m->mds_setattr_service = NULL;
278 GOTO(err_mds_svc, rc);
281 /* Object update service */
282 conf = (typeof(conf)) {
283 .psc_name = LUSTRE_MDT_NAME "_out",
284 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
286 .bc_nbufs = MDS_NBUFS,
287 .bc_buf_size = OUT_BUFSIZE,
288 .bc_req_max_size = OUT_MAXREQSIZE,
289 .bc_rep_max_size = OUT_MAXREPSIZE,
290 .bc_req_portal = OUT_PORTAL,
291 .bc_rep_portal = OSC_REPLY_PORTAL,
294 * We'd like to have a mechanism to set this on a per-device
298 .tc_thr_name = LUSTRE_MDT_NAME "_out",
299 .tc_thr_factor = MDS_THR_FACTOR,
300 .tc_nthrs_init = MDS_NTHRS_INIT,
301 .tc_nthrs_base = MDS_NTHRS_BASE,
302 .tc_nthrs_max = MDS_NTHRS_MAX,
303 .tc_nthrs_user = mds_num_threads,
304 .tc_cpu_affinity = 1,
305 .tc_ctx_tags = LCT_MD_THREAD |
309 .cc_pattern = mds_num_cpts,
312 .so_req_handler = tgt_request_handle,
313 .so_req_printer = target_print_req,
314 .so_hpreq_handler = NULL,
317 m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry);
318 if (IS_ERR(m->mds_out_service)) {
319 rc = PTR_ERR(m->mds_out_service);
320 CERROR("failed to start out service: %d\n", rc);
321 m->mds_out_service = NULL;
322 GOTO(err_mds_svc, rc);
326 * sequence controller service configuration
328 memset(&conf, 0, sizeof(conf));
329 conf = (typeof(conf)) {
330 .psc_name = LUSTRE_MDT_NAME "_seqs",
331 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
333 .bc_nbufs = MDS_NBUFS,
334 .bc_buf_size = SEQ_BUFSIZE,
335 .bc_req_max_size = SEQ_MAXREQSIZE,
336 .bc_rep_max_size = SEQ_MAXREPSIZE,
337 .bc_req_portal = SEQ_CONTROLLER_PORTAL,
338 .bc_rep_portal = MDC_REPLY_PORTAL,
341 .tc_thr_name = LUSTRE_MDT_NAME "_seqs",
342 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
343 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
344 .tc_ctx_tags = LCT_MD_THREAD,
347 .so_req_handler = tgt_request_handle,
348 .so_req_printer = target_print_req,
349 .so_hpreq_handler = NULL,
352 m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry);
353 if (IS_ERR(m->mds_mdsc_service)) {
354 rc = PTR_ERR(m->mds_mdsc_service);
355 CERROR("failed to start seq controller service: %d\n", rc);
356 m->mds_mdsc_service = NULL;
358 GOTO(err_mds_svc, rc);
362 * metadata sequence server service configuration
364 memset(&conf, 0, sizeof(conf));
365 conf = (typeof(conf)) {
366 .psc_name = LUSTRE_MDT_NAME "_seqm",
367 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
369 .bc_nbufs = MDS_NBUFS,
370 .bc_buf_size = SEQ_BUFSIZE,
371 .bc_req_max_size = SEQ_MAXREQSIZE,
372 .bc_rep_max_size = SEQ_MAXREPSIZE,
373 .bc_req_portal = SEQ_METADATA_PORTAL,
374 .bc_rep_portal = MDC_REPLY_PORTAL,
377 .tc_thr_name = LUSTRE_MDT_NAME "_seqm",
378 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
379 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
380 .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD
383 .so_req_handler = tgt_request_handle,
384 .so_req_printer = target_print_req,
385 .so_hpreq_handler = NULL,
388 m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry);
389 if (IS_ERR(m->mds_mdss_service)) {
390 rc = PTR_ERR(m->mds_mdss_service);
391 CERROR("failed to start metadata seq server service: %d\n", rc);
392 m->mds_mdss_service = NULL;
394 GOTO(err_mds_svc, rc);
397 /* FLD service start */
398 memset(&conf, 0, sizeof(conf));
399 conf = (typeof(conf)) {
400 .psc_name = LUSTRE_MDT_NAME "_fld",
401 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
403 .bc_nbufs = MDS_NBUFS,
404 .bc_buf_size = FLD_BUFSIZE,
405 .bc_req_max_size = FLD_MAXREQSIZE,
406 .bc_rep_max_size = FLD_MAXREPSIZE,
407 .bc_req_portal = FLD_REQUEST_PORTAL,
408 .bc_rep_portal = MDC_REPLY_PORTAL,
411 .tc_thr_name = LUSTRE_MDT_NAME "_fld",
412 .tc_nthrs_init = MDS_OTHR_NTHRS_INIT,
413 .tc_nthrs_max = MDS_OTHR_NTHRS_MAX,
414 .tc_ctx_tags = LCT_DT_THREAD | LCT_MD_THREAD,
417 .so_req_handler = tgt_request_handle,
418 .so_req_printer = target_print_req,
419 .so_hpreq_handler = NULL,
422 m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry);
423 if (IS_ERR(m->mds_fld_service)) {
424 rc = PTR_ERR(m->mds_fld_service);
425 CERROR("failed to start fld service: %d\n", rc);
426 m->mds_fld_service = NULL;
428 GOTO(err_mds_svc, rc);
434 mds_stop_ptlrpc_service(m);
439 static inline struct mds_device *mds_dev(struct lu_device *d)
441 return container_of0(d, struct mds_device, mds_md_dev.md_lu_dev);
444 static struct lu_device *mds_device_fini(const struct lu_env *env,
447 struct mds_device *m = mds_dev(d);
448 struct obd_device *obd = d->ld_obd;
451 mds_stop_ptlrpc_service(m);
452 lprocfs_obd_cleanup(obd);
456 static struct lu_device *mds_device_free(const struct lu_env *env,
459 struct mds_device *m = mds_dev(d);
462 md_device_fini(&m->mds_md_dev);
467 LPROC_SEQ_FOPS_RO_TYPE(mds, uuid);
469 static struct lprocfs_seq_vars lprocfs_mds_obd_vars[] = {
470 { "uuid", &mds_uuid_fops },
474 static struct lu_device *mds_device_alloc(const struct lu_env *env,
475 struct lu_device_type *t,
476 struct lustre_cfg *cfg)
478 struct mds_device *m;
479 struct obd_device *obd;
485 return ERR_PTR(-ENOMEM);
487 md_device_init(&m->mds_md_dev, t);
488 l = &m->mds_md_dev.md_lu_dev;
490 obd = class_name2obd(lustre_cfg_string(cfg, 0));
491 LASSERT(obd != NULL);
494 /* set this lu_device to obd, because error handling need it */
497 obd->obd_vars = lprocfs_mds_obd_vars;
498 rc = lprocfs_obd_setup(obd);
500 mds_device_free(env, l);
505 rc = mds_start_ptlrpc_service(m);
508 mds_device_free(env, l);
515 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
516 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
518 static struct lu_device_type_operations mds_device_type_ops = {
519 .ldto_init = mds_type_init,
520 .ldto_fini = mds_type_fini,
522 .ldto_start = mds_type_start,
523 .ldto_stop = mds_type_stop,
525 .ldto_device_alloc = mds_device_alloc,
526 .ldto_device_free = mds_device_free,
527 .ldto_device_fini = mds_device_fini
530 static struct lu_device_type mds_device_type = {
531 .ldt_tags = LU_DEVICE_MD,
532 .ldt_name = LUSTRE_MDS_NAME,
533 .ldt_ops = &mds_device_type_ops,
534 .ldt_ctx_tags = LCT_MD_THREAD
537 static struct obd_ops mds_obd_device_ops = {
538 .o_owner = THIS_MODULE,
541 int mds_mod_init(void)
543 if (mdt_num_threads != 0 && mds_num_threads == 0) {
544 LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, "
545 "use mds_num_threads instead or unset both for "
546 "dynamic thread startup\n");
547 mds_num_threads = mdt_num_threads;
550 return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
551 LUSTRE_MDS_NAME, &mds_device_type);
554 void mds_mod_exit(void)
556 class_unregister_type(LUSTRE_MDS_NAME);