Whamcloud - gitweb
LU-8066 ptlrpc: migrate ptlrpc proc files to sysfs
[fs/lustre-release.git] / lustre / mdt / mdt_mds.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2013, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mdt/mdt_mds.c
32  *
33  * Lustre Metadata Service Layer
34  *
35  * Author: Di Wang <di.wang@whamcloud.com>
36  **/
37
38 #define DEBUG_SUBSYSTEM S_MDS
39
40 #include <linux/module.h>
41
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
48 #include <obd.h>
49 /* lu2dt_dev() */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <uapi/linux/lustre_param.h>
56
57 struct mds_device {
58         /* super-class */
59         struct md_device         mds_md_dev;
60         struct ptlrpc_service   *mds_regular_service;
61         struct ptlrpc_service   *mds_readpage_service;
62         struct ptlrpc_service   *mds_out_service;
63         struct ptlrpc_service   *mds_setattr_service;
64         struct ptlrpc_service   *mds_mdsc_service;
65         struct ptlrpc_service   *mds_mdss_service;
66         struct ptlrpc_service   *mds_fld_service;
67         struct mutex             mds_health_mutex;
68         struct kset             *mds_kset;
69 };
70
71 /*
72  *  * Initialized in mds_mod_init().
73  *   */
74 static unsigned long mds_num_threads;
75 module_param(mds_num_threads, ulong, 0444);
76 MODULE_PARM_DESC(mds_num_threads, "number of MDS service threads to start");
77
78 static char *mds_num_cpts;
79 module_param(mds_num_cpts, charp, 0444);
80 MODULE_PARM_DESC(mds_num_cpts, "CPU partitions MDS threads should run on");
81
82 static unsigned long mds_rdpg_num_threads;
83 module_param(mds_rdpg_num_threads, ulong, 0444);
84 MODULE_PARM_DESC(mds_rdpg_num_threads,
85                  "number of MDS readpage service threads to start");
86
87 static char *mds_rdpg_num_cpts;
88 module_param(mds_rdpg_num_cpts, charp, 0444);
89 MODULE_PARM_DESC(mds_rdpg_num_cpts,
90                  "CPU partitions MDS readpage threads should run on");
91
92 /* NB: these two should be removed along with setattr service in the future */
93 static unsigned long mds_attr_num_threads;
94 module_param(mds_attr_num_threads, ulong, 0444);
95 MODULE_PARM_DESC(mds_attr_num_threads,
96                  "number of MDS setattr service threads to start");
97
98 static char *mds_attr_num_cpts;
99 module_param(mds_attr_num_cpts, charp, 0444);
100 MODULE_PARM_DESC(mds_attr_num_cpts,
101                  "CPU partitions MDS setattr threads should run on");
102
103 /* device init/fini methods */
104 static void mds_stop_ptlrpc_service(struct mds_device *m)
105 {
106         ENTRY;
107
108         mutex_lock(&m->mds_health_mutex);
109         if (m->mds_regular_service != NULL) {
110                 ptlrpc_unregister_service(m->mds_regular_service);
111                 m->mds_regular_service = NULL;
112         }
113         if (m->mds_readpage_service != NULL) {
114                 ptlrpc_unregister_service(m->mds_readpage_service);
115                 m->mds_readpage_service = NULL;
116         }
117         if (m->mds_out_service != NULL) {
118                 ptlrpc_unregister_service(m->mds_out_service);
119                 m->mds_out_service = NULL;
120         }
121         if (m->mds_setattr_service != NULL) {
122                 ptlrpc_unregister_service(m->mds_setattr_service);
123                 m->mds_setattr_service = NULL;
124         }
125         if (m->mds_mdsc_service != NULL) {
126                 ptlrpc_unregister_service(m->mds_mdsc_service);
127                 m->mds_mdsc_service = NULL;
128         }
129         if (m->mds_mdss_service != NULL) {
130                 ptlrpc_unregister_service(m->mds_mdss_service);
131                 m->mds_mdss_service = NULL;
132         }
133         if (m->mds_fld_service != NULL) {
134                 ptlrpc_unregister_service(m->mds_fld_service);
135                 m->mds_fld_service = NULL;
136         }
137         mutex_unlock(&m->mds_health_mutex);
138
139         EXIT;
140 }
141
142 static int mds_start_ptlrpc_service(struct mds_device *m)
143 {
144         static struct ptlrpc_service_conf conf;
145         struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
146         struct proc_dir_entry *procfs_entry;
147         int rc = 0;
148         ENTRY;
149
150         procfs_entry = obd->obd_proc_entry;
151         LASSERT(procfs_entry != NULL);
152
153         conf = (typeof(conf)) {
154                 .psc_name               = LUSTRE_MDT_NAME,
155                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
156                 .psc_buf                = {
157                         .bc_nbufs               = MDS_NBUFS,
158                         .bc_buf_size            = MDS_REG_BUFSIZE,
159                         .bc_req_max_size        = MDS_REG_MAXREQSIZE,
160                         .bc_rep_max_size        = MDS_REG_MAXREPSIZE,
161                         .bc_req_portal          = MDS_REQUEST_PORTAL,
162                         .bc_rep_portal          = MDC_REPLY_PORTAL,
163                 },
164                 /*
165                  * We'd like to have a mechanism to set this on a per-device
166                  * basis, but alas...
167                  */
168                 .psc_thr                = {
169                         .tc_thr_name            = LUSTRE_MDT_NAME,
170                         .tc_thr_factor          = MDS_THR_FACTOR,
171                         .tc_nthrs_init          = MDS_NTHRS_INIT,
172                         .tc_nthrs_base          = MDS_NTHRS_BASE,
173                         .tc_nthrs_max           = MDS_NTHRS_MAX,
174                         .tc_nthrs_user          = mds_num_threads,
175                         .tc_cpu_affinity        = 1,
176                         .tc_ctx_tags            = LCT_MD_THREAD,
177                 },
178                 .psc_cpt                = {
179                         .cc_pattern             = mds_num_cpts,
180                 },
181                 .psc_ops                = {
182                         .so_req_handler         = tgt_request_handle,
183                         .so_req_printer         = target_print_req,
184                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
185                 },
186         };
187         m->mds_regular_service = ptlrpc_register_service(&conf, m->mds_kset,
188                                                          procfs_entry);
189         if (IS_ERR(m->mds_regular_service)) {
190                 rc = PTR_ERR(m->mds_regular_service);
191                 CERROR("failed to start regular mdt service: %d\n", rc);
192                 m->mds_regular_service = NULL;
193
194                 RETURN(rc);
195         }
196
197         /*
198          * readpage service configuration. Parameters have to be adjusted,
199          * ideally.
200          */
201         memset(&conf, 0, sizeof(conf));
202         conf = (typeof(conf)) {
203                 .psc_name               = LUSTRE_MDT_NAME "_readpage",
204                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
205                 .psc_buf                = {
206                         .bc_nbufs               = MDS_NBUFS,
207                         .bc_buf_size            = MDS_BUFSIZE,
208                         .bc_req_max_size        = MDS_MAXREQSIZE,
209                         .bc_rep_max_size        = MDS_MAXREPSIZE,
210                         .bc_req_portal          = MDS_READPAGE_PORTAL,
211                         .bc_rep_portal          = MDC_REPLY_PORTAL,
212                 },
213                 .psc_thr                = {
214                         .tc_thr_name            = LUSTRE_MDT_NAME "_rdpg",
215                         .tc_thr_factor          = MDS_RDPG_THR_FACTOR,
216                         .tc_nthrs_init          = MDS_RDPG_NTHRS_INIT,
217                         .tc_nthrs_base          = MDS_RDPG_NTHRS_BASE,
218                         .tc_nthrs_max           = MDS_RDPG_NTHRS_MAX,
219                         .tc_nthrs_user          = mds_rdpg_num_threads,
220                         .tc_cpu_affinity        = 1,
221                         .tc_ctx_tags            = LCT_MD_THREAD,
222                 },
223                 .psc_cpt                = {
224                         .cc_pattern             = mds_rdpg_num_cpts,
225                 },
226                 .psc_ops                = {
227                         .so_req_handler         = tgt_request_handle,
228                         .so_req_printer         = target_print_req,
229                 },
230         };
231         m->mds_readpage_service = ptlrpc_register_service(&conf, m->mds_kset,
232                                                           procfs_entry);
233         if (IS_ERR(m->mds_readpage_service)) {
234                 rc = PTR_ERR(m->mds_readpage_service);
235                 CERROR("failed to start readpage service: %d\n", rc);
236                 m->mds_readpage_service = NULL;
237
238                 GOTO(err_mds_svc, rc);
239         }
240
241         /*
242          * setattr service configuration.
243          *
244          * XXX To keep the compatibility with old client(< 2.2), we need to
245          * preserve this portal for a certain time, it should be removed
246          * eventually. LU-617.
247          */
248         memset(&conf, 0, sizeof(conf));
249         conf = (typeof(conf)) {
250                 .psc_name               = LUSTRE_MDT_NAME "_setattr",
251                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
252                 .psc_buf                = {
253                         .bc_nbufs               = MDS_NBUFS,
254                         .bc_buf_size            = MDS_BUFSIZE,
255                         .bc_req_max_size        = MDS_MAXREQSIZE,
256                         .bc_rep_max_size        = MDS_LOV_MAXREPSIZE,
257                         .bc_req_portal          = MDS_SETATTR_PORTAL,
258                         .bc_rep_portal          = MDC_REPLY_PORTAL,
259                 },
260                 .psc_thr                = {
261                         .tc_thr_name            = LUSTRE_MDT_NAME "_attr",
262                         .tc_thr_factor          = MDS_SETA_THR_FACTOR,
263                         .tc_nthrs_init          = MDS_SETA_NTHRS_INIT,
264                         .tc_nthrs_base          = MDS_SETA_NTHRS_BASE,
265                         .tc_nthrs_max           = MDS_SETA_NTHRS_MAX,
266                         .tc_nthrs_user          = mds_attr_num_threads,
267                         .tc_cpu_affinity        = 1,
268                         .tc_ctx_tags            = LCT_MD_THREAD,
269                 },
270                 .psc_cpt                = {
271                         .cc_pattern             = mds_attr_num_cpts,
272                 },
273                 .psc_ops                = {
274                         .so_req_handler         = tgt_request_handle,
275                         .so_req_printer         = target_print_req,
276                         .so_hpreq_handler       = NULL,
277                 },
278         };
279         m->mds_setattr_service = ptlrpc_register_service(&conf, m->mds_kset,
280                                                          procfs_entry);
281         if (IS_ERR(m->mds_setattr_service)) {
282                 rc = PTR_ERR(m->mds_setattr_service);
283                 CERROR("failed to start setattr service: %d\n", rc);
284                 m->mds_setattr_service = NULL;
285
286                 GOTO(err_mds_svc, rc);
287         }
288
289         /* Object update service */
290         conf = (typeof(conf)) {
291                 .psc_name               = LUSTRE_MDT_NAME "_out",
292                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
293                 .psc_buf                = {
294                         .bc_nbufs               = MDS_NBUFS,
295                         .bc_buf_size            = OUT_BUFSIZE,
296                         .bc_req_max_size        = OUT_MAXREQSIZE,
297                         .bc_rep_max_size        = OUT_MAXREPSIZE,
298                         .bc_req_portal          = OUT_PORTAL,
299                         .bc_rep_portal          = OSC_REPLY_PORTAL,
300                 },
301                 /*
302                  * We'd like to have a mechanism to set this on a per-device
303                  * basis, but alas...
304                  */
305                 .psc_thr                = {
306                         .tc_thr_name            = LUSTRE_MDT_NAME "_out",
307                         .tc_thr_factor          = MDS_THR_FACTOR,
308                         .tc_nthrs_init          = MDS_NTHRS_INIT,
309                         .tc_nthrs_base          = MDS_NTHRS_BASE,
310                         .tc_nthrs_max           = MDS_NTHRS_MAX,
311                         .tc_nthrs_user          = mds_num_threads,
312                         .tc_cpu_affinity        = 1,
313                         .tc_ctx_tags            = LCT_MD_THREAD |
314                                                   LCT_DT_THREAD,
315                 },
316                 .psc_cpt                = {
317                         .cc_pattern             = mds_num_cpts,
318                 },
319                 .psc_ops                = {
320                         .so_req_handler         = tgt_request_handle,
321                         .so_req_printer         = target_print_req,
322                         .so_hpreq_handler       = NULL,
323                 },
324         };
325         m->mds_out_service = ptlrpc_register_service(&conf, m->mds_kset,
326                                                      procfs_entry);
327         if (IS_ERR(m->mds_out_service)) {
328                 rc = PTR_ERR(m->mds_out_service);
329                 CERROR("failed to start out service: %d\n", rc);
330                 m->mds_out_service = NULL;
331                 GOTO(err_mds_svc, rc);
332         }
333
334         /*
335          * sequence controller service configuration
336          */
337         memset(&conf, 0, sizeof(conf));
338         conf = (typeof(conf)) {
339                 .psc_name               = LUSTRE_MDT_NAME "_seqs",
340                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
341                 .psc_buf                = {
342                         .bc_nbufs               = MDS_NBUFS,
343                         .bc_buf_size            = SEQ_BUFSIZE,
344                         .bc_req_max_size        = SEQ_MAXREQSIZE,
345                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
346                         .bc_req_portal          = SEQ_CONTROLLER_PORTAL,
347                         .bc_rep_portal          = MDC_REPLY_PORTAL,
348                 },
349                 .psc_thr                = {
350                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqs",
351                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
352                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
353                         .tc_ctx_tags            = LCT_MD_THREAD,
354                 },
355                 .psc_ops                = {
356                         .so_req_handler         = tgt_request_handle,
357                         .so_req_printer         = target_print_req,
358                         .so_hpreq_handler       = NULL,
359                 },
360         };
361         m->mds_mdsc_service = ptlrpc_register_service(&conf, m->mds_kset,
362                                                       procfs_entry);
363         if (IS_ERR(m->mds_mdsc_service)) {
364                 rc = PTR_ERR(m->mds_mdsc_service);
365                 CERROR("failed to start seq controller service: %d\n", rc);
366                 m->mds_mdsc_service = NULL;
367
368                 GOTO(err_mds_svc, rc);
369         }
370
371         /*
372          * metadata sequence server service configuration
373          */
374         memset(&conf, 0, sizeof(conf));
375         conf = (typeof(conf)) {
376                 .psc_name               = LUSTRE_MDT_NAME "_seqm",
377                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
378                 .psc_buf                = {
379                         .bc_nbufs               = MDS_NBUFS,
380                         .bc_buf_size            = SEQ_BUFSIZE,
381                         .bc_req_max_size        = SEQ_MAXREQSIZE,
382                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
383                         .bc_req_portal          = SEQ_METADATA_PORTAL,
384                         .bc_rep_portal          = MDC_REPLY_PORTAL,
385                 },
386                 .psc_thr                = {
387                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqm",
388                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
389                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
390                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD
391                 },
392                 .psc_ops                = {
393                         .so_req_handler         = tgt_request_handle,
394                         .so_req_printer         = target_print_req,
395                         .so_hpreq_handler       = NULL,
396                 },
397         };
398         m->mds_mdss_service = ptlrpc_register_service(&conf, m->mds_kset,
399                                                       procfs_entry);
400         if (IS_ERR(m->mds_mdss_service)) {
401                 rc = PTR_ERR(m->mds_mdss_service);
402                 CERROR("failed to start metadata seq server service: %d\n", rc);
403                 m->mds_mdss_service = NULL;
404
405                 GOTO(err_mds_svc, rc);
406         }
407
408         /* FLD service start */
409         memset(&conf, 0, sizeof(conf));
410         conf = (typeof(conf)) {
411                 .psc_name            = LUSTRE_MDT_NAME "_fld",
412                 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
413                 .psc_buf                = {
414                         .bc_nbufs               = MDS_NBUFS,
415                         .bc_buf_size            = FLD_BUFSIZE,
416                         .bc_req_max_size        = FLD_MAXREQSIZE,
417                         .bc_rep_max_size        = FLD_MAXREPSIZE,
418                         .bc_req_portal          = FLD_REQUEST_PORTAL,
419                         .bc_rep_portal          = MDC_REPLY_PORTAL,
420                 },
421                 .psc_thr                = {
422                         .tc_thr_name            = LUSTRE_MDT_NAME "_fld",
423                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
424                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
425                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
426                 },
427                 .psc_ops                = {
428                         .so_req_handler         = tgt_request_handle,
429                         .so_req_printer         = target_print_req,
430                         .so_hpreq_handler       = NULL,
431                 },
432         };
433         m->mds_fld_service = ptlrpc_register_service(&conf, m->mds_kset,
434                                                      procfs_entry);
435         if (IS_ERR(m->mds_fld_service)) {
436                 rc = PTR_ERR(m->mds_fld_service);
437                 CERROR("failed to start fld service: %d\n", rc);
438                 m->mds_fld_service = NULL;
439
440                 GOTO(err_mds_svc, rc);
441         }
442
443         EXIT;
444 err_mds_svc:
445         if (rc)
446                 mds_stop_ptlrpc_service(m);
447
448         return rc;
449 }
450
451 static inline struct mds_device *mds_dev(struct lu_device *d)
452 {
453         return container_of0(d, struct mds_device, mds_md_dev.md_lu_dev);
454 }
455
456 static struct lu_device *mds_device_fini(const struct lu_env *env,
457                                          struct lu_device *d)
458 {
459         struct mds_device *m = mds_dev(d);
460         struct obd_device *obd = d->ld_obd;
461         ENTRY;
462
463         mds_stop_ptlrpc_service(m);
464         lprocfs_kset_unregister(obd, m->mds_kset);
465         RETURN(NULL);
466 }
467
468 static struct lu_device *mds_device_free(const struct lu_env *env,
469                                          struct lu_device *d)
470 {
471         struct mds_device *m = mds_dev(d);
472         ENTRY;
473
474         md_device_fini(&m->mds_md_dev);
475         OBD_FREE_PTR(m);
476         RETURN(NULL);
477 }
478
479 static struct lu_device *mds_device_alloc(const struct lu_env *env,
480                                           struct lu_device_type *t,
481                                           struct lustre_cfg *cfg)
482 {
483         struct mds_device        *m;
484         struct obd_device        *obd;
485         struct lu_device          *l;
486         int rc;
487
488         OBD_ALLOC_PTR(m);
489         if (m == NULL)
490                 return ERR_PTR(-ENOMEM);
491
492         md_device_init(&m->mds_md_dev, t);
493         l = &m->mds_md_dev.md_lu_dev;
494
495         obd = class_name2obd(lustre_cfg_string(cfg, 0));
496         LASSERT(obd != NULL);
497
498         l->ld_obd = obd;
499         /* set this lu_device to obd, because error handling need it */
500         obd->obd_lu_dev = l;
501
502         rc = lprocfs_kset_register(obd, &m->mds_kset);
503         if (rc != 0) {
504                 mds_device_free(env, l);
505                 l = ERR_PTR(rc);
506                 return l;
507         }
508
509         mutex_init(&m->mds_health_mutex);
510
511         rc = mds_start_ptlrpc_service(m);
512         if (rc != 0) {
513                 lprocfs_kset_unregister(obd, m->mds_kset);
514                 mds_device_free(env, l);
515                 l = ERR_PTR(rc);
516                 return l;
517         }
518         return l;
519 }
520
521 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
522 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
523
524 static struct lu_device_type_operations mds_device_type_ops = {
525         .ldto_init = mds_type_init,
526         .ldto_fini = mds_type_fini,
527
528         .ldto_start = mds_type_start,
529         .ldto_stop  = mds_type_stop,
530
531         .ldto_device_alloc = mds_device_alloc,
532         .ldto_device_free  = mds_device_free,
533         .ldto_device_fini  = mds_device_fini
534 };
535
536 static struct lu_device_type mds_device_type = {
537         .ldt_tags     = LU_DEVICE_MD,
538         .ldt_name     = LUSTRE_MDS_NAME,
539         .ldt_ops      = &mds_device_type_ops,
540         .ldt_ctx_tags = LCT_MD_THREAD
541 };
542
543 static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
544 {
545         struct mds_device *mds = mds_dev(obd->obd_lu_dev);
546         int rc = 0;
547
548
549         mutex_lock(&mds->mds_health_mutex);
550         rc |= ptlrpc_service_health_check(mds->mds_regular_service);
551         rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
552         rc |= ptlrpc_service_health_check(mds->mds_out_service);
553         rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
554         rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
555         rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
556         rc |= ptlrpc_service_health_check(mds->mds_fld_service);
557         mutex_unlock(&mds->mds_health_mutex);
558
559         return rc != 0 ? 1 : 0;
560 }
561
562 static struct obd_ops mds_obd_device_ops = {
563         .o_owner           = THIS_MODULE,
564         .o_health_check    = mds_health_check,
565 };
566
567 int mds_mod_init(void)
568 {
569         return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
570                                    LUSTRE_MDS_NAME, &mds_device_type);
571 }
572
573 void mds_mod_exit(void)
574 {
575         class_unregister_type(LUSTRE_MDS_NAME);
576 }