Whamcloud - gitweb
2f68264e9bd3b7284c3f53e927ed81cf6650225f
[fs/lustre-release.git] / lustre / mdt / mdt_mds.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2013, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mdt/mdt_mds.c
32  *
33  * Lustre Metadata Service Layer
34  *
35  * Author: Di Wang <di.wang@whamcloud.com>
36  **/
37
38 #define DEBUG_SUBSYSTEM S_MDS
39
40 #include <linux/module.h>
41
42 #include <obd_support.h>
43 /* struct ptlrpc_request */
44 #include <lustre_net.h>
45 /* struct obd_export */
46 #include <lustre_export.h>
47 /* struct obd_device */
48 #include <obd.h>
49 /* lu2dt_dev() */
50 #include <dt_object.h>
51 #include <lustre_mds.h>
52 #include "mdt_internal.h"
53 #include <lustre_quota.h>
54 #include <lustre_acl.h>
55 #include <lustre_param.h>
56
57 struct mds_device {
58         /* super-class */
59         struct md_device           mds_md_dev;
60         struct ptlrpc_service     *mds_regular_service;
61         struct ptlrpc_service     *mds_readpage_service;
62         struct ptlrpc_service     *mds_out_service;
63         struct ptlrpc_service     *mds_setattr_service;
64         struct ptlrpc_service     *mds_mdsc_service;
65         struct ptlrpc_service     *mds_mdss_service;
66         struct ptlrpc_service     *mds_fld_service;
67 };
68
69 /*
70  *  * Initialized in mdt_mod_init().
71  *   */
72 static unsigned long mdt_num_threads;
73 CFS_MODULE_PARM(mdt_num_threads, "ul", ulong, 0444,
74                 "number of MDS service threads to start "
75                 "(deprecated in favor of mds_num_threads)");
76
77 static unsigned long mds_num_threads;
78 CFS_MODULE_PARM(mds_num_threads, "ul", ulong, 0444,
79                 "number of MDS service threads to start");
80
81 static char *mds_num_cpts;
82 CFS_MODULE_PARM(mds_num_cpts, "c", charp, 0444,
83                 "CPU partitions MDS threads should run on");
84
85 static unsigned long mds_rdpg_num_threads;
86 CFS_MODULE_PARM(mds_rdpg_num_threads, "ul", ulong, 0444,
87                 "number of MDS readpage service threads to start");
88
89 static char *mds_rdpg_num_cpts;
90 CFS_MODULE_PARM(mds_rdpg_num_cpts, "c", charp, 0444,
91                 "CPU partitions MDS readpage threads should run on");
92
93 /* NB: these two should be removed along with setattr service in the future */
94 static unsigned long mds_attr_num_threads;
95 CFS_MODULE_PARM(mds_attr_num_threads, "ul", ulong, 0444,
96                 "number of MDS setattr service threads to start");
97
98 static char *mds_attr_num_cpts;
99 CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
100                 "CPU partitions MDS setattr threads should run on");
101
102 /* device init/fini methods */
103 static void mds_stop_ptlrpc_service(struct mds_device *m)
104 {
105         ENTRY;
106         if (m->mds_regular_service != NULL) {
107                 ptlrpc_unregister_service(m->mds_regular_service);
108                 m->mds_regular_service = NULL;
109         }
110         if (m->mds_readpage_service != NULL) {
111                 ptlrpc_unregister_service(m->mds_readpage_service);
112                 m->mds_readpage_service = NULL;
113         }
114         if (m->mds_out_service != NULL) {
115                 ptlrpc_unregister_service(m->mds_out_service);
116                 m->mds_out_service = NULL;
117         }
118         if (m->mds_setattr_service != NULL) {
119                 ptlrpc_unregister_service(m->mds_setattr_service);
120                 m->mds_setattr_service = NULL;
121         }
122         if (m->mds_mdsc_service != NULL) {
123                 ptlrpc_unregister_service(m->mds_mdsc_service);
124                 m->mds_mdsc_service = NULL;
125         }
126         if (m->mds_mdss_service != NULL) {
127                 ptlrpc_unregister_service(m->mds_mdss_service);
128                 m->mds_mdss_service = NULL;
129         }
130         if (m->mds_fld_service != NULL) {
131                 ptlrpc_unregister_service(m->mds_fld_service);
132                 m->mds_fld_service = NULL;
133         }
134         EXIT;
135 }
136
137 static int mds_start_ptlrpc_service(struct mds_device *m)
138 {
139         static struct ptlrpc_service_conf conf;
140         struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
141         struct proc_dir_entry *procfs_entry;
142         int rc = 0;
143         ENTRY;
144
145         procfs_entry = obd->obd_proc_entry;
146         LASSERT(procfs_entry != NULL);
147
148         conf = (typeof(conf)) {
149                 .psc_name               = LUSTRE_MDT_NAME,
150                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
151                 .psc_buf                = {
152                         .bc_nbufs               = MDS_NBUFS,
153                         .bc_buf_size            = MDS_REG_BUFSIZE,
154                         .bc_req_max_size        = MDS_REG_MAXREQSIZE,
155                         .bc_rep_max_size        = MDS_REG_MAXREPSIZE,
156                         .bc_req_portal          = MDS_REQUEST_PORTAL,
157                         .bc_rep_portal          = MDC_REPLY_PORTAL,
158                 },
159                 /*
160                  * We'd like to have a mechanism to set this on a per-device
161                  * basis, but alas...
162                  */
163                 .psc_thr                = {
164                         .tc_thr_name            = LUSTRE_MDT_NAME,
165                         .tc_thr_factor          = MDS_THR_FACTOR,
166                         .tc_nthrs_init          = MDS_NTHRS_INIT,
167                         .tc_nthrs_base          = MDS_NTHRS_BASE,
168                         .tc_nthrs_max           = MDS_NTHRS_MAX,
169                         .tc_nthrs_user          = mds_num_threads,
170                         .tc_cpu_affinity        = 1,
171                         .tc_ctx_tags            = LCT_MD_THREAD,
172                 },
173                 .psc_cpt                = {
174                         .cc_pattern             = mds_num_cpts,
175                 },
176                 .psc_ops                = {
177                         .so_req_handler         = tgt_request_handle,
178                         .so_req_printer         = target_print_req,
179                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
180                 },
181         };
182         m->mds_regular_service = ptlrpc_register_service(&conf, procfs_entry);
183         if (IS_ERR(m->mds_regular_service)) {
184                 rc = PTR_ERR(m->mds_regular_service);
185                 CERROR("failed to start regular mdt service: %d\n", rc);
186                 m->mds_regular_service = NULL;
187
188                 RETURN(rc);
189         }
190
191         /*
192          * readpage service configuration. Parameters have to be adjusted,
193          * ideally.
194          */
195         memset(&conf, 0, sizeof(conf));
196         conf = (typeof(conf)) {
197                 .psc_name               = LUSTRE_MDT_NAME "_readpage",
198                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
199                 .psc_buf                = {
200                         .bc_nbufs               = MDS_NBUFS,
201                         .bc_buf_size            = MDS_BUFSIZE,
202                         .bc_req_max_size        = MDS_MAXREQSIZE,
203                         .bc_rep_max_size        = MDS_MAXREPSIZE,
204                         .bc_req_portal          = MDS_READPAGE_PORTAL,
205                         .bc_rep_portal          = MDC_REPLY_PORTAL,
206                 },
207                 .psc_thr                = {
208                         .tc_thr_name            = LUSTRE_MDT_NAME "_rdpg",
209                         .tc_thr_factor          = MDS_RDPG_THR_FACTOR,
210                         .tc_nthrs_init          = MDS_RDPG_NTHRS_INIT,
211                         .tc_nthrs_base          = MDS_RDPG_NTHRS_BASE,
212                         .tc_nthrs_max           = MDS_RDPG_NTHRS_MAX,
213                         .tc_nthrs_user          = mds_rdpg_num_threads,
214                         .tc_cpu_affinity        = 1,
215                         .tc_ctx_tags            = LCT_MD_THREAD,
216                 },
217                 .psc_cpt                = {
218                         .cc_pattern             = mds_rdpg_num_cpts,
219                 },
220                 .psc_ops                = {
221                         .so_req_handler         = tgt_request_handle,
222                         .so_req_printer         = target_print_req,
223                 },
224         };
225         m->mds_readpage_service = ptlrpc_register_service(&conf, procfs_entry);
226         if (IS_ERR(m->mds_readpage_service)) {
227                 rc = PTR_ERR(m->mds_readpage_service);
228                 CERROR("failed to start readpage service: %d\n", rc);
229                 m->mds_readpage_service = NULL;
230
231                 GOTO(err_mds_svc, rc);
232         }
233
234         /*
235          * setattr service configuration.
236          *
237          * XXX To keep the compatibility with old client(< 2.2), we need to
238          * preserve this portal for a certain time, it should be removed
239          * eventually. LU-617.
240          */
241         memset(&conf, 0, sizeof(conf));
242         conf = (typeof(conf)) {
243                 .psc_name               = LUSTRE_MDT_NAME "_setattr",
244                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
245                 .psc_buf                = {
246                         .bc_nbufs               = MDS_NBUFS,
247                         .bc_buf_size            = MDS_BUFSIZE,
248                         .bc_req_max_size        = MDS_MAXREQSIZE,
249                         .bc_rep_max_size        = MDS_LOV_MAXREPSIZE,
250                         .bc_req_portal          = MDS_SETATTR_PORTAL,
251                         .bc_rep_portal          = MDC_REPLY_PORTAL,
252                 },
253                 .psc_thr                = {
254                         .tc_thr_name            = LUSTRE_MDT_NAME "_attr",
255                         .tc_thr_factor          = MDS_SETA_THR_FACTOR,
256                         .tc_nthrs_init          = MDS_SETA_NTHRS_INIT,
257                         .tc_nthrs_base          = MDS_SETA_NTHRS_BASE,
258                         .tc_nthrs_max           = MDS_SETA_NTHRS_MAX,
259                         .tc_nthrs_user          = mds_attr_num_threads,
260                         .tc_cpu_affinity        = 1,
261                         .tc_ctx_tags            = LCT_MD_THREAD,
262                 },
263                 .psc_cpt                = {
264                         .cc_pattern             = mds_attr_num_cpts,
265                 },
266                 .psc_ops                = {
267                         .so_req_handler         = tgt_request_handle,
268                         .so_req_printer         = target_print_req,
269                         .so_hpreq_handler       = NULL,
270                 },
271         };
272         m->mds_setattr_service = ptlrpc_register_service(&conf, procfs_entry);
273         if (IS_ERR(m->mds_setattr_service)) {
274                 rc = PTR_ERR(m->mds_setattr_service);
275                 CERROR("failed to start setattr service: %d\n", rc);
276                 m->mds_setattr_service = NULL;
277
278                 GOTO(err_mds_svc, rc);
279         }
280
281         /* Object update service */
282         conf = (typeof(conf)) {
283                 .psc_name               = LUSTRE_MDT_NAME "_out",
284                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
285                 .psc_buf                = {
286                         .bc_nbufs               = MDS_NBUFS,
287                         .bc_buf_size            = OUT_BUFSIZE,
288                         .bc_req_max_size        = OUT_MAXREQSIZE,
289                         .bc_rep_max_size        = OUT_MAXREPSIZE,
290                         .bc_req_portal          = OUT_PORTAL,
291                         .bc_rep_portal          = OSC_REPLY_PORTAL,
292                 },
293                 /*
294                  * We'd like to have a mechanism to set this on a per-device
295                  * basis, but alas...
296                  */
297                 .psc_thr                = {
298                         .tc_thr_name            = LUSTRE_MDT_NAME "_out",
299                         .tc_thr_factor          = MDS_THR_FACTOR,
300                         .tc_nthrs_init          = MDS_NTHRS_INIT,
301                         .tc_nthrs_base          = MDS_NTHRS_BASE,
302                         .tc_nthrs_max           = MDS_NTHRS_MAX,
303                         .tc_nthrs_user          = mds_num_threads,
304                         .tc_cpu_affinity        = 1,
305                         .tc_ctx_tags            = LCT_MD_THREAD |
306                                                   LCT_DT_THREAD,
307                 },
308                 .psc_cpt                = {
309                         .cc_pattern             = mds_num_cpts,
310                 },
311                 .psc_ops                = {
312                         .so_req_handler         = tgt_request_handle,
313                         .so_req_printer         = target_print_req,
314                         .so_hpreq_handler       = NULL,
315                 },
316         };
317         m->mds_out_service = ptlrpc_register_service(&conf, procfs_entry);
318         if (IS_ERR(m->mds_out_service)) {
319                 rc = PTR_ERR(m->mds_out_service);
320                 CERROR("failed to start out service: %d\n", rc);
321                 m->mds_out_service = NULL;
322                 GOTO(err_mds_svc, rc);
323         }
324
325         /*
326          * sequence controller service configuration
327          */
328         memset(&conf, 0, sizeof(conf));
329         conf = (typeof(conf)) {
330                 .psc_name               = LUSTRE_MDT_NAME "_seqs",
331                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
332                 .psc_buf                = {
333                         .bc_nbufs               = MDS_NBUFS,
334                         .bc_buf_size            = SEQ_BUFSIZE,
335                         .bc_req_max_size        = SEQ_MAXREQSIZE,
336                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
337                         .bc_req_portal          = SEQ_CONTROLLER_PORTAL,
338                         .bc_rep_portal          = MDC_REPLY_PORTAL,
339                 },
340                 .psc_thr                = {
341                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqs",
342                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
343                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
344                         .tc_ctx_tags            = LCT_MD_THREAD,
345                 },
346                 .psc_ops                = {
347                         .so_req_handler         = tgt_request_handle,
348                         .so_req_printer         = target_print_req,
349                         .so_hpreq_handler       = NULL,
350                 },
351         };
352         m->mds_mdsc_service = ptlrpc_register_service(&conf, procfs_entry);
353         if (IS_ERR(m->mds_mdsc_service)) {
354                 rc = PTR_ERR(m->mds_mdsc_service);
355                 CERROR("failed to start seq controller service: %d\n", rc);
356                 m->mds_mdsc_service = NULL;
357
358                 GOTO(err_mds_svc, rc);
359         }
360
361         /*
362          * metadata sequence server service configuration
363          */
364         memset(&conf, 0, sizeof(conf));
365         conf = (typeof(conf)) {
366                 .psc_name               = LUSTRE_MDT_NAME "_seqm",
367                 .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
368                 .psc_buf                = {
369                         .bc_nbufs               = MDS_NBUFS,
370                         .bc_buf_size            = SEQ_BUFSIZE,
371                         .bc_req_max_size        = SEQ_MAXREQSIZE,
372                         .bc_rep_max_size        = SEQ_MAXREPSIZE,
373                         .bc_req_portal          = SEQ_METADATA_PORTAL,
374                         .bc_rep_portal          = MDC_REPLY_PORTAL,
375                 },
376                 .psc_thr                = {
377                         .tc_thr_name            = LUSTRE_MDT_NAME "_seqm",
378                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
379                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
380                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD
381                 },
382                 .psc_ops                = {
383                         .so_req_handler         = tgt_request_handle,
384                         .so_req_printer         = target_print_req,
385                         .so_hpreq_handler       = NULL,
386                 },
387         };
388         m->mds_mdss_service = ptlrpc_register_service(&conf, procfs_entry);
389         if (IS_ERR(m->mds_mdss_service)) {
390                 rc = PTR_ERR(m->mds_mdss_service);
391                 CERROR("failed to start metadata seq server service: %d\n", rc);
392                 m->mds_mdss_service = NULL;
393
394                 GOTO(err_mds_svc, rc);
395         }
396
397         /* FLD service start */
398         memset(&conf, 0, sizeof(conf));
399         conf = (typeof(conf)) {
400                 .psc_name            = LUSTRE_MDT_NAME "_fld",
401                 .psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
402                 .psc_buf                = {
403                         .bc_nbufs               = MDS_NBUFS,
404                         .bc_buf_size            = FLD_BUFSIZE,
405                         .bc_req_max_size        = FLD_MAXREQSIZE,
406                         .bc_rep_max_size        = FLD_MAXREPSIZE,
407                         .bc_req_portal          = FLD_REQUEST_PORTAL,
408                         .bc_rep_portal          = MDC_REPLY_PORTAL,
409                 },
410                 .psc_thr                = {
411                         .tc_thr_name            = LUSTRE_MDT_NAME "_fld",
412                         .tc_nthrs_init          = MDS_OTHR_NTHRS_INIT,
413                         .tc_nthrs_max           = MDS_OTHR_NTHRS_MAX,
414                         .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
415                 },
416                 .psc_ops                = {
417                         .so_req_handler         = tgt_request_handle,
418                         .so_req_printer         = target_print_req,
419                         .so_hpreq_handler       = NULL,
420                 },
421         };
422         m->mds_fld_service = ptlrpc_register_service(&conf, procfs_entry);
423         if (IS_ERR(m->mds_fld_service)) {
424                 rc = PTR_ERR(m->mds_fld_service);
425                 CERROR("failed to start fld service: %d\n", rc);
426                 m->mds_fld_service = NULL;
427
428                 GOTO(err_mds_svc, rc);
429         }
430
431         EXIT;
432 err_mds_svc:
433         if (rc)
434                 mds_stop_ptlrpc_service(m);
435
436         return rc;
437 }
438
439 static inline struct mds_device *mds_dev(struct lu_device *d)
440 {
441         return container_of0(d, struct mds_device, mds_md_dev.md_lu_dev);
442 }
443
444 static struct lu_device *mds_device_fini(const struct lu_env *env,
445                                          struct lu_device *d)
446 {
447         struct mds_device *m = mds_dev(d);
448         struct obd_device *obd = d->ld_obd;
449         ENTRY;
450
451         mds_stop_ptlrpc_service(m);
452         lprocfs_obd_cleanup(obd);
453         RETURN(NULL);
454 }
455
456 static struct lu_device *mds_device_free(const struct lu_env *env,
457                                          struct lu_device *d)
458 {
459         struct mds_device *m = mds_dev(d);
460         ENTRY;
461
462         md_device_fini(&m->mds_md_dev);
463         OBD_FREE_PTR(m);
464         RETURN(NULL);
465 }
466
467 LPROC_SEQ_FOPS_RO_TYPE(mds, uuid);
468
469 static struct lprocfs_vars lprocfs_mds_obd_vars[] = {
470         { "uuid",       &mds_uuid_fops  },
471         { NULL }
472 };
473
474 static struct lu_device *mds_device_alloc(const struct lu_env *env,
475                                           struct lu_device_type *t,
476                                           struct lustre_cfg *cfg)
477 {
478         struct mds_device        *m;
479         struct obd_device        *obd;
480         struct lu_device          *l;
481         int rc;
482
483         OBD_ALLOC_PTR(m);
484         if (m == NULL)
485                 return ERR_PTR(-ENOMEM);
486
487         md_device_init(&m->mds_md_dev, t);
488         l = &m->mds_md_dev.md_lu_dev;
489
490         obd = class_name2obd(lustre_cfg_string(cfg, 0));
491         LASSERT(obd != NULL);
492
493         l->ld_obd = obd;
494         /* set this lu_device to obd, because error handling need it */
495         obd->obd_lu_dev = l;
496
497         obd->obd_vars = lprocfs_mds_obd_vars;
498         rc = lprocfs_obd_setup(obd);
499         if (rc != 0) {
500                 mds_device_free(env, l);
501                 l = ERR_PTR(rc);
502                 return l;
503         }
504
505         rc = mds_start_ptlrpc_service(m);
506
507         if (rc != 0) {
508                 mds_device_free(env, l);
509                 l = ERR_PTR(rc);
510                 return l;
511         }
512         return l;
513 }
514
515 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
516 LU_TYPE_INIT_FINI(mds, &mdt_thread_key);
517
518 static struct lu_device_type_operations mds_device_type_ops = {
519         .ldto_init = mds_type_init,
520         .ldto_fini = mds_type_fini,
521
522         .ldto_start = mds_type_start,
523         .ldto_stop  = mds_type_stop,
524
525         .ldto_device_alloc = mds_device_alloc,
526         .ldto_device_free  = mds_device_free,
527         .ldto_device_fini  = mds_device_fini
528 };
529
530 static struct lu_device_type mds_device_type = {
531         .ldt_tags     = LU_DEVICE_MD,
532         .ldt_name     = LUSTRE_MDS_NAME,
533         .ldt_ops      = &mds_device_type_ops,
534         .ldt_ctx_tags = LCT_MD_THREAD
535 };
536
537 static struct obd_ops mds_obd_device_ops = {
538         .o_owner           = THIS_MODULE,
539 };
540
541 int mds_mod_init(void)
542 {
543         if (mdt_num_threads != 0 && mds_num_threads == 0) {
544                 LCONSOLE_INFO("mdt_num_threads module parameter is deprecated, "
545                               "use mds_num_threads instead or unset both for "
546                               "dynamic thread startup\n");
547                 mds_num_threads = mdt_num_threads;
548         }
549
550         return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
551                                    LUSTRE_MDS_NAME, &mds_device_type);
552 }
553
554 void mds_mod_exit(void)
555 {
556         class_unregister_type(LUSTRE_MDS_NAME);
557 }