Whamcloud - gitweb
LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier()
[fs/lustre-release.git] / lustre / mdt / mdt_mds.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2013, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mdt/mdt_mds.c
32  *
33  * Lustre Metadata Service Layer
34  *
35  * Author: Di Wang <di.wang@whamcloud.com>
36  **/
37
38 #define DEBUG_SUBSYSTEM S_MDS
39
40 #include <linux/module.h>
41
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
48 #include <obd.h>
49 /* lu2dt_dev() */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <uapi/linux/lustre/lustre_param.h>
56
57 struct mds_device {
58         /* super-class */
59         struct md_device         mds_md_dev;
60         struct ptlrpc_service   *mds_regular_service;
61         struct ptlrpc_service   *mds_readpage_service;
62         struct ptlrpc_service   *mds_out_service;
63         struct ptlrpc_service   *mds_mdsc_service;
64         struct ptlrpc_service   *mds_mdss_service;
65         struct ptlrpc_service   *mds_fld_service;
66         struct ptlrpc_service   *mds_io_service;
67         struct mutex             mds_health_mutex;
68 };
69
70 /*
71  *  * Initialized in mds_mod_init().
72  *   */
73 static unsigned long mds_num_threads;
74 module_param(mds_num_threads, ulong, 0444);
75 MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start");
76
77 static unsigned int mds_cpu_bind = 1;
78 module_param(mds_cpu_bind, uint, 0444);
79 MODULE_PARM_DESC(mds_cpu_bind,
80                  "bind MDS threads to particular CPU partitions");
81
82 int mds_max_io_threads = 512;
83 module_param(mds_max_io_threads, int, 0444);
84 MODULE_PARM_DESC(mds_max_io_threads,
85                  "maximum number of MDS IO service threads");
86
87 static unsigned int mds_io_cpu_bind = 1;
88 module_param(mds_io_cpu_bind, uint, 0444);
89 MODULE_PARM_DESC(mds_io_cpu_bind,
90                  "bind MDS IO threads to particular CPU partitions");
91
92 static char *mds_io_num_cpts;
93 module_param(mds_io_num_cpts, charp, 0444);
94 MODULE_PARM_DESC(mds_io_num_cpts,
95                  "CPU partitions MDS IO threads should run on");
96
97 static struct cfs_cpt_table *mdt_io_cptable;
98
99 static char *mds_num_cpts;
100 module_param(mds_num_cpts, charp, 0444);
101 MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on");
102
103 static unsigned long mds_rdpg_num_threads;
104 module_param(mds_rdpg_num_threads, ulong, 0444);
105 MODULE_PARM_DESC(mds_rdpg_num_threads,
106                  "number of MDS readpage service threads to start");
107
108 static unsigned int mds_rdpg_cpu_bind = 1;
109 module_param(mds_rdpg_cpu_bind, uint, 0444);
110 MODULE_PARM_DESC(mds_rdpg_cpu_bind,
111                  "bind MDS readpage threads to particular CPU partitions");
112
113 static char *mds_rdpg_num_cpts;
114 module_param(mds_rdpg_num_cpts, charp, 0444);
115 MODULE_PARM_DESC(mds_rdpg_num_cpts,
116                  "CPU partitions MDS readpage threads should run on");
117
118 /* device init/fini methods */
119 static void mds_stop_ptlrpc_service(struct mds_device *m)
120 {
121         ENTRY;
122
123         mutex_lock(&m->mds_health_mutex);
124         if (m->mds_regular_service != NULL) {
125                 ptlrpc_unregister_service(m->mds_regular_service);
126                 m->mds_regular_service = NULL;
127         }
128         if (m->mds_readpage_service != NULL) {
129                 ptlrpc_unregister_service(m->mds_readpage_service);
130                 m->mds_readpage_service = NULL;
131         }
132         if (m->mds_out_service != NULL) {
133                 ptlrpc_unregister_service(m->mds_out_service);
134                 m->mds_out_service = NULL;
135         }
136         if (m->mds_mdsc_service != NULL) {
137                 ptlrpc_unregister_service(m->mds_mdsc_service);
138                 m->mds_mdsc_service = NULL;
139         }
140         if (m->mds_mdss_service != NULL) {
141                 ptlrpc_unregister_service(m->mds_mdss_service);
142                 m->mds_mdss_service = NULL;
143         }
144         if (m->mds_fld_service != NULL) {
145                 ptlrpc_unregister_service(m->mds_fld_service);
146                 m->mds_fld_service = NULL;
147         }
148         if (m->mds_io_service != NULL) {
149                 ptlrpc_unregister_service(m->mds_io_service);
150                 m->mds_io_service = NULL;
151         }
152         mutex_unlock(&m->mds_health_mutex);
153
154         if (mdt_io_cptable != NULL) {
155                 cfs_cpt_table_free(mdt_io_cptable);
156                 mdt_io_cptable = NULL;
157         }
158
159         EXIT;
160 }
161
162 static int ldlm_enqueue_hpreq_check(struct ptlrpc_request *req)
163 {
164         struct ldlm_request *dlm_req;
165         int rc = 0;
166         ENTRY;
167
168         if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_REPLAY|MSG_RESENT)) !=
169             MSG_RESENT)
170                 RETURN(0);
171
172         req_capsule_init(&req->rq_pill, req, RCL_SERVER);
173         req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
174         dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
175         if (dlm_req == NULL)
176                 RETURN(-EFAULT);
177
178         if (dlm_req->lock_count > 0) {
179                 struct ldlm_lock *lock;
180
181                 lock = cfs_hash_lookup(req->rq_export->exp_lock_hash,
182                                        (void *)&dlm_req->lock_handle[0]);
183
184                 DEBUG_REQ(D_RPCTRACE, req, "lock %p cookie 0x%llx",
185                         lock, dlm_req->lock_handle[0].cookie);
186                 if (lock != NULL) {
187                         rc = lock->l_granted_mode == lock->l_req_mode;
188                         if (rc)
189                                 LDLM_DEBUG(lock, "hpreq resend");
190                         LDLM_LOCK_RELEASE(lock);
191                 }
192         }
193
194         RETURN(rc);
195 }
196
197 static struct ptlrpc_hpreq_ops ldlm_enqueue_hpreq_ops = {
198         .hpreq_lock_match = NULL,
199         .hpreq_check      = ldlm_enqueue_hpreq_check,
200         .hpreq_fini       = NULL,
201 };
202
203 static int mds_hpreq_handler(struct ptlrpc_request *req)
204 {
205         if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_ENQUEUE)
206                 req->rq_ops = &ldlm_enqueue_hpreq_ops;
207         else
208                 ptlrpc_hpreq_handler(req);
209         return 0;
210 }
211
212 static int mds_start_ptlrpc_service(struct mds_device *m)
213 {
214         static struct ptlrpc_service_conf conf;
215         struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
216         nodemask_t *mask;
217         int rc = 0;
218
219         ENTRY;
220
221         conf = (typeof(conf)) {
222                 .psc_name               = LUSTRE_MDT_NAME,
223                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
224                 .psc_buf                = {
225                         .bc_nbufs               = MDS_NBUFS,
226                         .bc_buf_size            = MDS_REG_BUFSIZE,
227                         .bc_req_max_size        = MDS_REG_MAXREQSIZE,
228                         .bc_rep_max_size        = MDS_REG_MAXREPSIZE,
229                         .bc_req_portal          = MDS_REQUEST_PORTAL,
230                         .bc_rep_portal          = MDC_REPLY_PORTAL,
231                 },
232                 /*
233                  * We'd like to have a mechanism to set this on a per-device
234                  * basis, but alas...
235                  */
236                 .psc_thr                = {
237                         .tc_thr_name            = LUSTRE_MDT_NAME,
238                         .tc_thr_factor          = MDS_THR_FACTOR,
239                         .tc_nthrs_init          = MDS_NTHRS_INIT,
240                         .tc_nthrs_base          = MDS_NTHRS_BASE,
241                         .tc_nthrs_max           = MDS_NTHRS_MAX,
242                         .tc_nthrs_user          = mds_num_threads,
243                         .tc_cpu_bind            = mds_cpu_bind,
244                         /* LCT_DT_THREAD is required as MDT threads may scan
245                          * all LDLM namespaces (including OFD-originated) to
246                          * cancel LDLM locks */
247                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD,
248                 },
249                 .psc_cpt                = {
250                         .cc_pattern             = mds_num_cpts,
251                         .cc_affinity            = true,
252                 },
253                 .psc_ops                = {
254                         .so_req_handler         = tgt_request_handle,
255                         .so_req_printer         = target_print_req,
256                         .so_hpreq_handler       = mds_hpreq_handler,
257                 },
258         };
259         m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,
260                                                          obd->obd_debugfs_entry);
261         if (IS_ERR(m->mds_regular_service)) {
262                 rc = PTR_ERR(m->mds_regular_service);
263                 CERROR("failed to start regular mdt service: %d\n", rc);
264                 m->mds_regular_service = NULL;
265
266                 RETURN(rc);
267         }
268
269         /*
270          * readpage service configuration. Parameters have to be adjusted,
271          * ideally.
272          */
273         memset(&conf, 0, sizeof(conf));
274         conf = (typeof(conf)) {
275                 .psc_name               = LUSTRE_MDT_NAME "_readpage",
276                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
277                 .psc_buf                = {
278                         .bc_nbufs               = MDS_NBUFS,
279                         .bc_buf_size            = MDS_BUFSIZE,
280                         .bc_req_max_size        = MDS_MAXREQSIZE,
281                         .bc_rep_max_size        = MDS_MAXREPSIZE,
282                         .bc_req_portal          = MDS_READPAGE_PORTAL,
283                         .bc_rep_portal          = MDC_REPLY_PORTAL,
284                 },
285                 .psc_thr                = {
286                         .tc_thr_name            = LUSTRE_MDT_NAME "_rdpg",
287                         .tc_thr_factor          = MDS_RDPG_THR_FACTOR,
288                         .tc_nthrs_init          = MDS_RDPG_NTHRS_INIT,
289                         .tc_nthrs_base          = MDS_RDPG_NTHRS_BASE,
290                         .tc_nthrs_max           = MDS_RDPG_NTHRS_MAX,
291                         .tc_nthrs_user          = mds_rdpg_num_threads,
292                         .tc_cpu_bind            = mds_rdpg_cpu_bind,
293                         .tc_ctx_tags            = LCT_MD_THREAD,
294                 },
295                 .psc_cpt                = {
296                         .cc_pattern             = mds_rdpg_num_cpts,
297                         .cc_affinity            = true,
298                 },
299                 .psc_ops                = {
300                         .so_req_handler         = tgt_request_handle,
301                         .so_req_printer         = target_print_req,
302                 },
303         };
304         m->mds_readpage_service = ptlrpc_register_service(&conf, &obd->obd_kset,
305                                                           obd->obd_debugfs_entry);
306         if (IS_ERR(m->mds_readpage_service)) {
307                 rc = PTR_ERR(m->mds_readpage_service);
308                 CERROR("failed to start readpage service: %d\n", rc);
309                 m->mds_readpage_service = NULL;
310
311                 GOTO(err_mds_svc, rc);
312         }
313
314         /* Object update service */
315         conf = (typeof(conf)) {
316                 .psc_name               = LUSTRE_MDT_NAME "_out",
317                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
318                 .psc_buf                = {
319                         .bc_nbufs               = MDS_NBUFS,
320                         .bc_buf_size            = OUT_BUFSIZE,
321                         .bc_req_max_size        = OUT_MAXREQSIZE,
322                         .bc_rep_max_size        = OUT_MAXREPSIZE,
323                         .bc_req_portal          = OUT_PORTAL,
324                         .bc_rep_portal          = OSC_REPLY_PORTAL,
325                 },
326                 /*
327                  * We'd like to have a mechanism to set this on a per-device
328                  * basis, but alas...
329                  */
330                 .psc_thr                = {
331                         .tc_thr_name            = LUSTRE_MDT_NAME "_out",
332                         .tc_thr_factor          = MDS_THR_FACTOR,
333                         .tc_nthrs_init          = MDS_NTHRS_INIT,
334                         .tc_nthrs_base          = MDS_NTHRS_BASE,
335                         .tc_nthrs_max           = MDS_NTHRS_MAX,
336                         .tc_nthrs_user          = mds_num_threads,
337                         .tc_cpu_bind            = mds_cpu_bind,
338                         .tc_ctx_tags            = LCT_MD_THREAD |
339                                                   LCT_DT_THREAD,
340                 },
341                 .psc_cpt                = {
342                         .cc_pattern             = mds_num_cpts,
343                         .cc_affinity            = true,
344                 },
345                 .psc_ops                = {
346                         .so_req_handler         = tgt_request_handle,
347                         .so_req_printer         = target_print_req,
348                         .so_hpreq_handler       = NULL,
349                 },
350         };
351         m->mds_out_service = ptlrpc_register_service(&conf, &obd->obd_kset,
352                                                      obd->obd_debugfs_entry);
353         if (IS_ERR(m->mds_out_service)) {
354                 rc = PTR_ERR(m->mds_out_service);
355                 CERROR("failed to start out service: %d\n", rc);
356                 m->mds_out_service = NULL;
357                 GOTO(err_mds_svc, rc);
358         }
359
360         /*
361          * sequence controller service configuration
362          */
363         memset(&conf, 0, sizeof(conf));
364         conf = (typeof(conf)) {
365                 .psc_name               = LUSTRE_MDT_NAME "_seqs",
366                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
367                 .psc_buf                = {
368                         .bc_nbufs               = MDS_NBUFS,
369                         .bc_buf_size            = SEQ_BUFSIZE,
370                         .bc_req_max_size        = SEQ_MAXREQSIZE,
371                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
372                         .bc_req_portal          = SEQ_CONTROLLER_PORTAL,
373                         .bc_rep_portal          = MDC_REPLY_PORTAL,
374                 },
375                 .psc_thr                = {
376                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqs",
377                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
378                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
379                         .tc_ctx_tags            = LCT_MD_THREAD,
380                 },
381                 .psc_ops                = {
382                         .so_req_handler         = tgt_request_handle,
383                         .so_req_printer         = target_print_req,
384                         .so_hpreq_handler       = NULL,
385                 },
386         };
387         m->mds_mdsc_service = ptlrpc_register_service(&conf, &obd->obd_kset,
388                                                       obd->obd_debugfs_entry);
389         if (IS_ERR(m->mds_mdsc_service)) {
390                 rc = PTR_ERR(m->mds_mdsc_service);
391                 CERROR("failed to start seq controller service: %d\n", rc);
392                 m->mds_mdsc_service = NULL;
393
394                 GOTO(err_mds_svc, rc);
395         }
396
397         /*
398          * metadata sequence server service configuration
399          */
400         memset(&conf, 0, sizeof(conf));
401         conf = (typeof(conf)) {
402                 .psc_name               = LUSTRE_MDT_NAME "_seqm",
403                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
404                 .psc_buf                = {
405                         .bc_nbufs               = MDS_NBUFS,
406                         .bc_buf_size            = SEQ_BUFSIZE,
407                         .bc_req_max_size        = SEQ_MAXREQSIZE,
408                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
409                         .bc_req_portal          = SEQ_METADATA_PORTAL,
410                         .bc_rep_portal          = MDC_REPLY_PORTAL,
411                 },
412                 .psc_thr                = {
413                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqm",
414                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
415                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
416                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD
417                 },
418                 .psc_ops                = {
419                         .so_req_handler         = tgt_request_handle,
420                         .so_req_printer         = target_print_req,
421                         .so_hpreq_handler       = NULL,
422                 },
423         };
424         m->mds_mdss_service = ptlrpc_register_service(&conf, &obd->obd_kset,
425                                                       obd->obd_debugfs_entry);
426         if (IS_ERR(m->mds_mdss_service)) {
427                 rc = PTR_ERR(m->mds_mdss_service);
428                 CERROR("failed to start metadata seq server service: %d\n", rc);
429                 m->mds_mdss_service = NULL;
430
431                 GOTO(err_mds_svc, rc);
432         }
433
434         /* FLD service start */
435         memset(&conf, 0, sizeof(conf));
436         conf = (typeof(conf)) {
437                 .psc_name            = LUSTRE_MDT_NAME "_fld",
438                 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
439                 .psc_buf                = {
440                         .bc_nbufs               = MDS_NBUFS,
441                         .bc_buf_size            = FLD_BUFSIZE,
442                         .bc_req_max_size        = FLD_MAXREQSIZE,
443                         .bc_rep_max_size        = FLD_MAXREPSIZE,
444                         .bc_req_portal          = FLD_REQUEST_PORTAL,
445                         .bc_rep_portal          = MDC_REPLY_PORTAL,
446                 },
447                 .psc_thr                = {
448                         .tc_thr_name            = LUSTRE_MDT_NAME "_fld",
449                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
450                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
451                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
452                 },
453                 .psc_ops                = {
454                         .so_req_handler         = tgt_request_handle,
455                         .so_req_printer         = target_print_req,
456                         .so_hpreq_handler       = NULL,
457                 },
458         };
459         m->mds_fld_service = ptlrpc_register_service(&conf, &obd->obd_kset,
460                                                      obd->obd_debugfs_entry);
461         if (IS_ERR(m->mds_fld_service)) {
462                 rc = PTR_ERR(m->mds_fld_service);
463                 CERROR("failed to start fld service: %d\n", rc);
464                 m->mds_fld_service = NULL;
465
466                 GOTO(err_mds_svc, rc);
467         }
468
469
470         mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY);
471         /* event CPT feature is disabled in libcfs level by set partition
472          * number to 1, we still want to set node affinity for io service */
473         if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) {
474                 int cpt = 0;
475                 int i;
476
477                 mdt_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
478                 for_each_node_mask(i, *mask) {
479                         if (mdt_io_cptable == NULL) {
480                                 CWARN("MDS failed to create CPT table\n");
481                                 break;
482                         }
483
484                         rc = cfs_cpt_set_node(mdt_io_cptable, cpt++, i);
485                         if (!rc) {
486                                 CWARN("MDS Failed to set node %d for IO CPT table\n",
487                                       i);
488                                 cfs_cpt_table_free(mdt_io_cptable);
489                                 mdt_io_cptable = NULL;
490                                 break;
491                         }
492                 }
493         }
494
495         memset(&conf, 0, sizeof(conf));
496         conf = (typeof(conf)) {
497                 .psc_name               = LUSTRE_MDT_NAME "_io",
498                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
499                 .psc_buf                = {
500                         .bc_nbufs               = OST_NBUFS,
501                         .bc_buf_size            = OST_IO_BUFSIZE,
502                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
503                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
504                         .bc_req_portal          = MDS_IO_PORTAL,
505                         .bc_rep_portal          = MDC_REPLY_PORTAL,
506                 },
507                 .psc_thr                = {
508                         .tc_thr_name            = LUSTRE_MDT_NAME "_io",
509                         .tc_thr_factor          = OSS_THR_FACTOR,
510                         .tc_nthrs_init          = OSS_NTHRS_INIT,
511                         .tc_nthrs_base          = OSS_NTHRS_BASE,
512                         .tc_nthrs_max           = mds_max_io_threads,
513                         .tc_nthrs_user          = mds_num_threads,
514                         .tc_cpu_bind            = mds_io_cpu_bind,
515                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
516                 },
517                 .psc_cpt                = {
518                         .cc_cptable             = mdt_io_cptable,
519                         .cc_pattern             = mdt_io_cptable == NULL ?
520                                                   mds_io_num_cpts : NULL,
521                         .cc_affinity            = true,
522                 },
523                 .psc_ops                = {
524                         .so_thr_init            = tgt_io_thread_init,
525                         .so_thr_done            = tgt_io_thread_done,
526                         .so_req_handler         = tgt_request_handle,
527                         .so_req_printer         = target_print_req,
528                         .so_hpreq_handler       = tgt_hpreq_handler,
529                 },
530         };
531         m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset,
532                                                     obd->obd_debugfs_entry);
533         if (IS_ERR(m->mds_io_service)) {
534                 rc = PTR_ERR(m->mds_io_service);
535                 CERROR("failed to start MDT I/O service: %d\n", rc);
536                 m->mds_io_service = NULL;
537                 GOTO(err_mds_svc, rc);
538         }
539
540         EXIT;
541 err_mds_svc:
542         if (rc)
543                 mds_stop_ptlrpc_service(m);
544
545         return rc;
546 }
547
548 static inline struct mds_device *mds_dev(struct lu_device *d)
549 {
550         return container_of_safe(d, struct mds_device, mds_md_dev.md_lu_dev);
551 }
552
553 static struct lu_device *mds_device_fini(const struct lu_env *env,
554                                          struct lu_device *d)
555 {
556         struct mds_device *m = mds_dev(d);
557         struct obd_device *obd = d->ld_obd;
558         ENTRY;
559
560         mds_stop_ptlrpc_service(m);
561         lprocfs_obd_cleanup(obd);
562         RETURN(NULL);
563 }
564
565 static struct lu_device *mds_device_free(const struct lu_env *env,
566                                          struct lu_device *d)
567 {
568         struct mds_device *m = mds_dev(d);
569         ENTRY;
570
571         md_device_fini(&m->mds_md_dev);
572         OBD_FREE_PTR(m);
573         RETURN(NULL);
574 }
575
576 static struct lu_device *mds_device_alloc(const struct lu_env *env,
577                                           struct lu_device_type *t,
578                                           struct lustre_cfg *cfg)
579 {
580         struct mds_device        *m;
581         struct obd_device        *obd;
582         struct lu_device          *l;
583         int rc;
584
585         OBD_ALLOC_PTR(m);
586         if (m == NULL)
587                 return ERR_PTR(-ENOMEM);
588
589         md_device_init(&m->mds_md_dev, t);
590         l = &m->mds_md_dev.md_lu_dev;
591
592         obd = class_name2obd(lustre_cfg_string(cfg, 0));
593         LASSERT(obd != NULL);
594
595         l->ld_obd = obd;
596         /* set this lu_device to obd, because error handling need it */
597         obd->obd_lu_dev = l;
598
599         rc = lprocfs_obd_setup(obd, true);
600         if (rc != 0) {
601                 mds_device_free(env, l);
602                 l = ERR_PTR(rc);
603                 return l;
604         }
605
606         mutex_init(&m->mds_health_mutex);
607
608         rc = mds_start_ptlrpc_service(m);
609         if (rc != 0) {
610                 lprocfs_obd_cleanup(obd);
611                 mds_device_free(env, l);
612                 l = ERR_PTR(rc);
613                 return l;
614         }
615         return l;
616 }
617
618 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
619 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
620
621 static const struct lu_device_type_operations mds_device_type_ops = {
622         .ldto_init = mds_type_init,
623         .ldto_fini = mds_type_fini,
624
625         .ldto_start = mds_type_start,
626         .ldto_stop  = mds_type_stop,
627
628         .ldto_device_alloc = mds_device_alloc,
629         .ldto_device_free  = mds_device_free,
630         .ldto_device_fini  = mds_device_fini
631 };
632
633 static struct lu_device_type mds_device_type = {
634         .ldt_tags     = LU_DEVICE_MD,
635         .ldt_name     = LUSTRE_MDS_NAME,
636         .ldt_ops      = &mds_device_type_ops,
637         .ldt_ctx_tags = LCT_MD_THREAD
638 };
639
640 static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
641 {
642         struct mds_device *mds = mds_dev(obd->obd_lu_dev);
643         int rc = 0;
644
645
646         mutex_lock(&mds->mds_health_mutex);
647         rc |= ptlrpc_service_health_check(mds->mds_regular_service);
648         rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
649         rc |= ptlrpc_service_health_check(mds->mds_out_service);
650         rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
651         rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
652         rc |= ptlrpc_service_health_check(mds->mds_fld_service);
653         rc |= ptlrpc_service_health_check(mds->mds_io_service);
654         mutex_unlock(&mds->mds_health_mutex);
655
656         return rc != 0 ? 1 : 0;
657 }
658
659 static const struct obd_ops mds_obd_device_ops = {
660         .o_owner           = THIS_MODULE,
661         .o_health_check    = mds_health_check,
662 };
663
664 int mds_mod_init(void)
665 {
666         return class_register_type(&mds_obd_device_ops, NULL, false,
667                                    LUSTRE_MDS_NAME, &mds_device_type);
668 }
669
670 void mds_mod_exit(void)
671 {
672         class_unregister_type(LUSTRE_MDS_NAME);
673 }