Whamcloud - gitweb
43d8e729473bbbaef8fe53104b3512d195365bd0
[fs/lustre-release.git] / lustre / mdt / mdt_mds.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2013, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mdt/mdt_mds.c
32  *
33  * Lustre Metadata Service Layer
34  *
35  * Author: Di Wang <di.wang@whamcloud.com>
36  **/
37
38 #define DEBUG_SUBSYSTEM S_MDS
39
40 #include <linux/module.h>
41
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
48 #include <obd.h>
49 /* lu2dt_dev() */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <uapi/linux/lustre/lustre_param.h>
56
57 struct mds_device {
58         /* super-class */
59         struct md_device         mds_md_dev;
60         struct ptlrpc_service   *mds_regular_service;
61         struct ptlrpc_service   *mds_readpage_service;
62         struct ptlrpc_service   *mds_out_service;
63         struct ptlrpc_service   *mds_setattr_service;
64         struct ptlrpc_service   *mds_mdsc_service;
65         struct ptlrpc_service   *mds_mdss_service;
66         struct ptlrpc_service   *mds_fld_service;
67         struct ptlrpc_service   *mds_io_service;
68         struct mutex             mds_health_mutex;
69 };
70
71 /*
72  *  * Initialized in mds_mod_init().
73  *   */
74 static unsigned long mds_num_threads;
75 module_param(mds_num_threads, ulong, 0444);
76 MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start");
77
78 static unsigned int mds_cpu_bind = 1;
79 module_param(mds_cpu_bind, uint, 0444);
80 MODULE_PARM_DESC(mds_cpu_bind,
81                  "bind MDS threads to particular CPU partitions");
82
83 int mds_max_io_threads = 512;
84 module_param(mds_max_io_threads, int, 0444);
85 MODULE_PARM_DESC(mds_max_io_threads,
86                  "maximum number of MDS IO service threads");
87
88 static unsigned int mds_io_cpu_bind = 1;
89 module_param(mds_io_cpu_bind, uint, 0444);
90 MODULE_PARM_DESC(mds_io_cpu_bind,
91                  "bind MDS IO threads to particular CPU partitions");
92
93 static char *mds_io_num_cpts;
94 module_param(mds_io_num_cpts, charp, 0444);
95 MODULE_PARM_DESC(mds_io_num_cpts,
96                  "CPU partitions MDS IO threads should run on");
97
98 static struct cfs_cpt_table *mdt_io_cptable;
99
100 static char *mds_num_cpts;
101 module_param(mds_num_cpts, charp, 0444);
102 MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on");
103
104 static unsigned long mds_rdpg_num_threads;
105 module_param(mds_rdpg_num_threads, ulong, 0444);
106 MODULE_PARM_DESC(mds_rdpg_num_threads,
107                  "number of MDS readpage service threads to start");
108
109 static unsigned int mds_rdpg_cpu_bind = 1;
110 module_param(mds_rdpg_cpu_bind, uint, 0444);
111 MODULE_PARM_DESC(mds_rdpg_cpu_bind,
112                  "bind MDS readpage threads to particular CPU partitions");
113
114 static char *mds_rdpg_num_cpts;
115 module_param(mds_rdpg_num_cpts, charp, 0444);
116 MODULE_PARM_DESC(mds_rdpg_num_cpts,
117                  "CPU partitions MDS readpage threads should run on");
118
119 /* NB: these two should be removed along with setattr service in the future */
120 static unsigned long mds_attr_num_threads;
121 module_param(mds_attr_num_threads, ulong, 0444);
122 MODULE_PARM_DESC(mds_attr_num_threads,
123                  "number of MDS setattr service threads to start");
124
125 static unsigned int mds_attr_cpu_bind = 1;
126 module_param(mds_attr_cpu_bind, uint, 0444);
127 MODULE_PARM_DESC(mds_attr_cpu_bind,
128                  "bind MDS setattr threads to particular CPU partitions");
129
130 static char *mds_attr_num_cpts;
131 module_param(mds_attr_num_cpts, charp, 0444);
132 MODULE_PARM_DESC(mds_attr_num_cpts,
133                  "CPU partitions MDS setattr threads should run on");
134
135 /* device init/fini methods */
136 static void mds_stop_ptlrpc_service(struct mds_device *m)
137 {
138         ENTRY;
139
140         mutex_lock(&m->mds_health_mutex);
141         if (m->mds_regular_service != NULL) {
142                 ptlrpc_unregister_service(m->mds_regular_service);
143                 m->mds_regular_service = NULL;
144         }
145         if (m->mds_readpage_service != NULL) {
146                 ptlrpc_unregister_service(m->mds_readpage_service);
147                 m->mds_readpage_service = NULL;
148         }
149         if (m->mds_out_service != NULL) {
150                 ptlrpc_unregister_service(m->mds_out_service);
151                 m->mds_out_service = NULL;
152         }
153         if (m->mds_setattr_service != NULL) {
154                 ptlrpc_unregister_service(m->mds_setattr_service);
155                 m->mds_setattr_service = NULL;
156         }
157         if (m->mds_mdsc_service != NULL) {
158                 ptlrpc_unregister_service(m->mds_mdsc_service);
159                 m->mds_mdsc_service = NULL;
160         }
161         if (m->mds_mdss_service != NULL) {
162                 ptlrpc_unregister_service(m->mds_mdss_service);
163                 m->mds_mdss_service = NULL;
164         }
165         if (m->mds_fld_service != NULL) {
166                 ptlrpc_unregister_service(m->mds_fld_service);
167                 m->mds_fld_service = NULL;
168         }
169         if (m->mds_io_service != NULL) {
170                 ptlrpc_unregister_service(m->mds_io_service);
171                 m->mds_io_service = NULL;
172         }
173         mutex_unlock(&m->mds_health_mutex);
174
175         if (mdt_io_cptable != NULL) {
176                 cfs_cpt_table_free(mdt_io_cptable);
177                 mdt_io_cptable = NULL;
178         }
179
180         EXIT;
181 }
182
183 static int mds_start_ptlrpc_service(struct mds_device *m)
184 {
185         static struct ptlrpc_service_conf conf;
186         struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
187         nodemask_t *mask;
188         int rc = 0;
189
190         ENTRY;
191
192         conf = (typeof(conf)) {
193                 .psc_name               = LUSTRE_MDT_NAME,
194                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
195                 .psc_buf                = {
196                         .bc_nbufs               = MDS_NBUFS,
197                         .bc_buf_size            = MDS_REG_BUFSIZE,
198                         .bc_req_max_size        = MDS_REG_MAXREQSIZE,
199                         .bc_rep_max_size        = MDS_REG_MAXREPSIZE,
200                         .bc_req_portal          = MDS_REQUEST_PORTAL,
201                         .bc_rep_portal          = MDC_REPLY_PORTAL,
202                 },
203                 /*
204                  * We'd like to have a mechanism to set this on a per-device
205                  * basis, but alas...
206                  */
207                 .psc_thr                = {
208                         .tc_thr_name            = LUSTRE_MDT_NAME,
209                         .tc_thr_factor          = MDS_THR_FACTOR,
210                         .tc_nthrs_init          = MDS_NTHRS_INIT,
211                         .tc_nthrs_base          = MDS_NTHRS_BASE,
212                         .tc_nthrs_max           = MDS_NTHRS_MAX,
213                         .tc_nthrs_user          = mds_num_threads,
214                         .tc_cpu_bind            = mds_cpu_bind,
215                         .tc_ctx_tags            = LCT_MD_THREAD,
216                 },
217                 .psc_cpt                = {
218                         .cc_pattern             = mds_num_cpts,
219                         .cc_affinity            = true,
220                 },
221                 .psc_ops                = {
222                         .so_req_handler         = tgt_request_handle,
223                         .so_req_printer         = target_print_req,
224                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
225                 },
226         };
227         m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,
228                                                          obd->obd_debugfs_entry);
229         if (IS_ERR(m->mds_regular_service)) {
230                 rc = PTR_ERR(m->mds_regular_service);
231                 CERROR("failed to start regular mdt service: %d\n", rc);
232                 m->mds_regular_service = NULL;
233
234                 RETURN(rc);
235         }
236
237         /*
238          * readpage service configuration. Parameters have to be adjusted,
239          * ideally.
240          */
241         memset(&conf, 0, sizeof(conf));
242         conf = (typeof(conf)) {
243                 .psc_name               = LUSTRE_MDT_NAME "_readpage",
244                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
245                 .psc_buf                = {
246                         .bc_nbufs               = MDS_NBUFS,
247                         .bc_buf_size            = MDS_BUFSIZE,
248                         .bc_req_max_size        = MDS_MAXREQSIZE,
249                         .bc_rep_max_size        = MDS_MAXREPSIZE,
250                         .bc_req_portal          = MDS_READPAGE_PORTAL,
251                         .bc_rep_portal          = MDC_REPLY_PORTAL,
252                 },
253                 .psc_thr                = {
254                         .tc_thr_name            = LUSTRE_MDT_NAME "_rdpg",
255                         .tc_thr_factor          = MDS_RDPG_THR_FACTOR,
256                         .tc_nthrs_init          = MDS_RDPG_NTHRS_INIT,
257                         .tc_nthrs_base          = MDS_RDPG_NTHRS_BASE,
258                         .tc_nthrs_max           = MDS_RDPG_NTHRS_MAX,
259                         .tc_nthrs_user          = mds_rdpg_num_threads,
260                         .tc_cpu_bind            = mds_rdpg_cpu_bind,
261                         .tc_ctx_tags            = LCT_MD_THREAD,
262                 },
263                 .psc_cpt                = {
264                         .cc_pattern             = mds_rdpg_num_cpts,
265                         .cc_affinity            = true,
266                 },
267                 .psc_ops                = {
268                         .so_req_handler         = tgt_request_handle,
269                         .so_req_printer         = target_print_req,
270                 },
271         };
272         m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset,
273                                                           obd->obd_debugfs_entry);
274         if (IS_ERR(m->mds_readpage_service)) {
275                 rc = PTR_ERR(m->mds_readpage_service);
276                 CERROR("failed to start readpage service: %d\n", rc);
277                 m->mds_readpage_service = NULL;
278
279                 GOTO(err_mds_svc, rc);
280         }
281
282         /*
283          * setattr service configuration.
284          *
285          * XXX To keep the compatibility with old client(< 2.2), we need to
286          * preserve this portal for a certain time, it should be removed
287          * eventually. LU-617.
288          */
289         memset(&conf, 0, sizeof(conf));
290         conf = (typeof(conf)) {
291                 .psc_name               = LUSTRE_MDT_NAME "_setattr",
292                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
293                 .psc_buf                = {
294                         .bc_nbufs               = MDS_NBUFS,
295                         .bc_buf_size            = MDS_BUFSIZE,
296                         .bc_req_max_size        = MDS_MAXREQSIZE,
297                         .bc_rep_max_size        = MDS_LOV_MAXREPSIZE,
298                         .bc_req_portal          = MDS_SETATTR_PORTAL,
299                         .bc_rep_portal          = MDC_REPLY_PORTAL,
300                 },
301                 .psc_thr                = {
302                         .tc_thr_name            = LUSTRE_MDT_NAME "_attr",
303                         .tc_thr_factor          = MDS_SETA_THR_FACTOR,
304                         .tc_nthrs_init          = MDS_SETA_NTHRS_INIT,
305                         .tc_nthrs_base          = MDS_SETA_NTHRS_BASE,
306                         .tc_nthrs_max           = MDS_SETA_NTHRS_MAX,
307                         .tc_nthrs_user          = mds_attr_num_threads,
308                         .tc_cpu_bind            = mds_attr_cpu_bind,
309                         .tc_ctx_tags            = LCT_MD_THREAD,
310                 },
311                 .psc_cpt                = {
312                         .cc_pattern             = mds_attr_num_cpts,
313                         .cc_affinity            = true,
314                 },
315                 .psc_ops                = {
316                         .so_req_handler         = tgt_request_handle,
317                         .so_req_printer         = target_print_req,
318                         .so_hpreq_handler       = NULL,
319                 },
320         };
321         m->mds_setattr_service = ptlrpc_register_service(&conf, &obd->obd_kset,
322                                                          obd->obd_debugfs_entry);
323         if (IS_ERR(m->mds_setattr_service)) {
324                 rc = PTR_ERR(m->mds_setattr_service);
325                 CERROR("failed to start setattr service: %d\n", rc);
326                 m->mds_setattr_service = NULL;
327
328                 GOTO(err_mds_svc, rc);
329         }
330
331         /* Object update service */
332         conf = (typeof(conf)) {
333                 .psc_name               = LUSTRE_MDT_NAME "_out",
334                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
335                 .psc_buf                = {
336                         .bc_nbufs               = MDS_NBUFS,
337                         .bc_buf_size            = OUT_BUFSIZE,
338                         .bc_req_max_size        = OUT_MAXREQSIZE,
339                         .bc_rep_max_size        = OUT_MAXREPSIZE,
340                         .bc_req_portal          = OUT_PORTAL,
341                         .bc_rep_portal          = OSC_REPLY_PORTAL,
342                 },
343                 /*
344                  * We'd like to have a mechanism to set this on a per-device
345                  * basis, but alas...
346                  */
347                 .psc_thr                = {
348                         .tc_thr_name            = LUSTRE_MDT_NAME "_out",
349                         .tc_thr_factor          = MDS_THR_FACTOR,
350                         .tc_nthrs_init          = MDS_NTHRS_INIT,
351                         .tc_nthrs_base          = MDS_NTHRS_BASE,
352                         .tc_nthrs_max           = MDS_NTHRS_MAX,
353                         .tc_nthrs_user          = mds_num_threads,
354                         .tc_cpu_bind            = mds_cpu_bind,
355                         .tc_ctx_tags            = LCT_MD_THREAD |
356                                                   LCT_DT_THREAD,
357                 },
358                 .psc_cpt                = {
359                         .cc_pattern             = mds_num_cpts,
360                         .cc_affinity            = true,
361                 },
362                 .psc_ops                = {
363                         .so_req_handler         = tgt_request_handle,
364                         .so_req_printer         = target_print_req,
365                         .so_hpreq_handler       = NULL,
366                 },
367         };
368         m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset,
369                                                      obd->obd_debugfs_entry);
370         if (IS_ERR(m->mds_out_service)) {
371                 rc = PTR_ERR(m->mds_out_service);
372                 CERROR("failed to start out service: %d\n", rc);
373                 m->mds_out_service = NULL;
374                 GOTO(err_mds_svc, rc);
375         }
376
377         /*
378          * sequence controller service configuration
379          */
380         memset(&conf, 0, sizeof(conf));
381         conf = (typeof(conf)) {
382                 .psc_name               = LUSTRE_MDT_NAME "_seqs",
383                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
384                 .psc_buf                = {
385                         .bc_nbufs               = MDS_NBUFS,
386                         .bc_buf_size            = SEQ_BUFSIZE,
387                         .bc_req_max_size        = SEQ_MAXREQSIZE,
388                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
389                         .bc_req_portal          = SEQ_CONTROLLER_PORTAL,
390                         .bc_rep_portal          = MDC_REPLY_PORTAL,
391                 },
392                 .psc_thr                = {
393                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqs",
394                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
395                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
396                         .tc_ctx_tags            = LCT_MD_THREAD,
397                 },
398                 .psc_ops                = {
399                         .so_req_handler         = tgt_request_handle,
400                         .so_req_printer         = target_print_req,
401                         .so_hpreq_handler       = NULL,
402                 },
403         };
404         m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset,
405                                                       obd->obd_debugfs_entry);
406         if (IS_ERR(m->mds_mdsc_service)) {
407                 rc = PTR_ERR(m->mds_mdsc_service);
408                 CERROR("failed to start seq controller service: %d\n", rc);
409                 m->mds_mdsc_service = NULL;
410
411                 GOTO(err_mds_svc, rc);
412         }
413
414         /*
415          * metadata sequence server service configuration
416          */
417         memset(&conf, 0, sizeof(conf));
418         conf = (typeof(conf)) {
419                 .psc_name               = LUSTRE_MDT_NAME "_seqm",
420                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
421                 .psc_buf                = {
422                         .bc_nbufs               = MDS_NBUFS,
423                         .bc_buf_size            = SEQ_BUFSIZE,
424                         .bc_req_max_size        = SEQ_MAXREQSIZE,
425                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
426                         .bc_req_portal          = SEQ_METADATA_PORTAL,
427                         .bc_rep_portal          = MDC_REPLY_PORTAL,
428                 },
429                 .psc_thr                = {
430                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqm",
431                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
432                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
433                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD
434                 },
435                 .psc_ops                = {
436                         .so_req_handler         = tgt_request_handle,
437                         .so_req_printer         = target_print_req,
438                         .so_hpreq_handler       = NULL,
439                 },
440         };
441         m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset,
442                                                       obd->obd_debugfs_entry);
443         if (IS_ERR(m->mds_mdss_service)) {
444                 rc = PTR_ERR(m->mds_mdss_service);
445                 CERROR("failed to start metadata seq server service: %d\n", rc);
446                 m->mds_mdss_service = NULL;
447
448                 GOTO(err_mds_svc, rc);
449         }
450
451         /* FLD service start */
452         memset(&conf, 0, sizeof(conf));
453         conf = (typeof(conf)) {
454                 .psc_name            = LUSTRE_MDT_NAME "_fld",
455                 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
456                 .psc_buf                = {
457                         .bc_nbufs               = MDS_NBUFS,
458                         .bc_buf_size            = FLD_BUFSIZE,
459                         .bc_req_max_size        = FLD_MAXREQSIZE,
460                         .bc_rep_max_size        = FLD_MAXREPSIZE,
461                         .bc_req_portal          = FLD_REQUEST_PORTAL,
462                         .bc_rep_portal          = MDC_REPLY_PORTAL,
463                 },
464                 .psc_thr                = {
465                         .tc_thr_name            = LUSTRE_MDT_NAME "_fld",
466                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
467                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
468                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
469                 },
470                 .psc_ops                = {
471                         .so_req_handler         = tgt_request_handle,
472                         .so_req_printer         = target_print_req,
473                         .so_hpreq_handler       = NULL,
474                 },
475         };
476         m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset,
477                                                      obd->obd_debugfs_entry);
478         if (IS_ERR(m->mds_fld_service)) {
479                 rc = PTR_ERR(m->mds_fld_service);
480                 CERROR("failed to start fld service: %d\n", rc);
481                 m->mds_fld_service = NULL;
482
483                 GOTO(err_mds_svc, rc);
484         }
485
486
487         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
488         /* event CPT feature is disabled in libcfs level by set partition
489          * number to 1, we still want to set node affinity for io service */
490         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
491                 int cpt = 0;
492                 int i;
493
494                 mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
495                 for_each_node_mask(i, *mask) {
496                         if (mdt_io_cptable == NULL) {
497                                 CWARN("MDS failed to create CPT table\n");
498                                 break;
499                         }
500
501                         rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i);
502                         if (!rc) {
503                                 CWARN("MDS Failed to set node %d for"
504                                       "IO CPT table\n", i);
505                                 cfs_cpt_table_free(mdt_io_cptable);
506                                 mdt_io_cptable = NULL;
507                                 break;
508                         }
509                 }
510         }
511
512         memset(&conf, 0, sizeof(conf));
513         conf = (typeof(conf)) {
514                 .psc_name               = LUSTRE_MDT_NAME "_io",
515                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
516                 .psc_buf                = {
517                         .bc_nbufs               = OST_NBUFS,
518                         .bc_buf_size            = OST_IO_BUFSIZE,
519                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
520                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
521                         .bc_req_portal          = MDS_IO_PORTAL,
522                         .bc_rep_portal          = MDC_REPLY_PORTAL,
523                 },
524                 .psc_thr                = {
525                         .tc_thr_name            = LUSTRE_MDT_NAME "_io",
526                         .tc_thr_factor          = OSS_THR_FACTOR,
527                         .tc_nthrs_init          = OSS_NTHRS_INIT,
528                         .tc_nthrs_base          = OSS_NTHRS_BASE,
529                         .tc_nthrs_max           = mds_max_io_threads,
530                         .tc_nthrs_user          = mds_num_threads,
531                         .tc_cpu_bind            = mds_io_cpu_bind,
532                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
533                 },
534                 .psc_cpt                = {
535                         .cc_cptable             = mdt_io_cptable,
536                         .cc_pattern             = mdt_io_cptable == NULL ?
537                                                   mds_io_num_cpts : NULL,
538                         .cc_affinity            = true,
539                 },
540                 .psc_ops                = {
541                         .so_thr_init            = tgt_io_thread_init,
542                         .so_thr_done            = tgt_io_thread_done,
543                         .so_req_handler         = tgt_request_handle,
544                         .so_req_printer         = target_print_req,
545                         .so_hpreq_handler       = tgt_hpreq_handler,
546                 },
547         };
548         m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset,
549                                                     obd->obd_debugfs_entry);
550         if (IS_ERR(m->mds_io_service)) {
551                 rc = PTR_ERR(m->mds_io_service);
552                 CERROR("failed to start MDT I/O service: %d\n", rc);
553                 m->mds_io_service = NULL;
554                 GOTO(err_mds_svc, rc);
555         }
556
557         EXIT;
558 err_mds_svc:
559         if (rc)
560                 mds_stop_ptlrpc_service(m);
561
562         return rc;
563 }
564
565 static inline struct mds_device *mds_dev(struct lu_device *d)
566 {
567         return container_of0(d, struct mds_device, mds_md_dev.md_lu_dev);
568 }
569
570 static struct lu_device *mds_device_fini(const struct lu_env *env,
571                                          struct lu_device *d)
572 {
573         struct mds_device *m = mds_dev(d);
574         struct obd_device *obd = d->ld_obd;
575         ENTRY;
576
577         mds_stop_ptlrpc_service(m);
578         lprocfs_obd_cleanup(obd);
579         RETURN(NULL);
580 }
581
582 static struct lu_device *mds_device_free(const struct lu_env *env,
583                                          struct lu_device *d)
584 {
585         struct mds_device *m = mds_dev(d);
586         ENTRY;
587
588         md_device_fini(&m->mds_md_dev);
589         OBD_FREE_PTR(m);
590         RETURN(NULL);
591 }
592
593 static struct lu_device *mds_device_alloc(const struct lu_env *env,
594                                           struct lu_device_type *t,
595                                           struct lustre_cfg *cfg)
596 {
597         struct mds_device        *m;
598         struct obd_device        *obd;
599         struct lu_device          *l;
600         int rc;
601
602         OBD_ALLOC_PTR(m);
603         if (m == NULL)
604                 return ERR_PTR(-ENOMEM);
605
606         md_device_init(&m->mds_md_dev, t);
607         l = &m->mds_md_dev.md_lu_dev;
608
609         obd = class_name2obd(lustre_cfg_string(cfg, 0));
610         LASSERT(obd != NULL);
611
612         l->ld_obd = obd;
613         /* set this lu_device to obd, because error handling need it */
614         obd->obd_lu_dev = l;
615
616         rc = lprocfs_obd_setup(obd, true);
617         if (rc != 0) {
618                 mds_device_free(env, l);
619                 l = ERR_PTR(rc);
620                 return l;
621         }
622
623         mutex_init(&m->mds_health_mutex);
624
625         rc = mds_start_ptlrpc_service(m);
626         if (rc != 0) {
627                 lprocfs_obd_cleanup(obd);
628                 mds_device_free(env, l);
629                 l = ERR_PTR(rc);
630                 return l;
631         }
632         return l;
633 }
634
635 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
636 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
637
638 static struct lu_device_type_operations mds_device_type_ops = {
639         .ldto_init = mds_type_init,
640         .ldto_fini = mds_type_fini,
641
642         .ldto_start = mds_type_start,
643         .ldto_stop  = mds_type_stop,
644
645         .ldto_device_alloc = mds_device_alloc,
646         .ldto_device_free  = mds_device_free,
647         .ldto_device_fini  = mds_device_fini
648 };
649
650 static struct lu_device_type mds_device_type = {
651         .ldt_tags     = LU_DEVICE_MD,
652         .ldt_name     = LUSTRE_MDS_NAME,
653         .ldt_ops      = &mds_device_type_ops,
654         .ldt_ctx_tags = LCT_MD_THREAD
655 };
656
657 static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
658 {
659         struct mds_device *mds = mds_dev(obd->obd_lu_dev);
660         int rc = 0;
661
662
663         mutex_lock(&mds->mds_health_mutex);
664         rc |= ptlrpc_service_health_check(mds->mds_regular_service);
665         rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
666         rc |= ptlrpc_service_health_check(mds->mds_out_service);
667         rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
668         rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
669         rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
670         rc |= ptlrpc_service_health_check(mds->mds_fld_service);
671         rc |= ptlrpc_service_health_check(mds->mds_io_service);
672         mutex_unlock(&mds->mds_health_mutex);
673
674         return rc != 0 ? 1 : 0;
675 }
676
677 static struct obd_ops mds_obd_device_ops = {
678         .o_owner           = THIS_MODULE,
679         .o_health_check    = mds_health_check,
680 };
681
682 int mds_mod_init(void)
683 {
684         return class_register_type(&mds_obd_device_ops, NULL, false, NULL,
685                                    LUSTRE_MDS_NAME, &mds_device_type);
686 }
687
688 void mds_mod_exit(void)
689 {
690         class_unregister_type(LUSTRE_MDS_NAME);
691 }