Whamcloud - gitweb
LU-6245 libcfs: replace CFS_MODULE_PARAM with linux kernel module api
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2015, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ost/ost_handler.c
37  *
38  * Author: Peter J. Braam <braam@clusterfs.com>
39  * Author: Phil Schwan <phil@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_OST
43
44 #include <linux/module.h>
45 #include <lustre_dlm.h>
46 #include <lprocfs_status.h>
47 #include <obd_class.h>
48 #include "ost_internal.h"
49
50 int oss_max_threads = 512;
51 module_param(oss_max_threads, int, 0444);
52 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
53
54 static int oss_num_threads;
55 module_param(oss_num_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
57
58 static int ost_num_threads;
59 module_param(ost_num_threads, int, 0444);
60 MODULE_PARM_DESC(ost_num_threads, "number of OST service threads to start (deprecated)");
61
62 static int oss_num_create_threads;
63 module_param(oss_num_create_threads, int, 0444);
64 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
65
66 static char *oss_cpts;
67 module_param(oss_cpts, charp, 0444);
68 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
69
70 static char *oss_io_cpts;
71 module_param(oss_io_cpts, charp, 0444);
72 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
73
74 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
75
76 static struct cfs_cpt_table     *ost_io_cptable;
77
78 #ifdef CONFIG_PROC_FS
79 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
80
81 static struct lprocfs_vars lprocfs_ost_obd_vars[] = {
82         { .name =       "uuid",
83           .fops =       &ost_uuid_fops  },
84         { NULL }
85 };
86 #endif /* CONFIG_PROC_FS */
87
88 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
89 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
90 {
91         static struct ptlrpc_service_conf       svc_conf;
92         struct ost_obd *ost = &obd->u.ost;
93         nodemask_t              *mask;
94         int rc;
95         ENTRY;
96
97 #ifdef CONFIG_PROC_FS
98         obd->obd_vars = lprocfs_ost_obd_vars;
99         lprocfs_obd_setup(obd);
100 #endif
101         mutex_init(&ost->ost_health_mutex);
102
103         svc_conf = (typeof(svc_conf)) {
104                 .psc_name               = LUSTRE_OSS_NAME,
105                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
106                 .psc_buf                = {
107                         .bc_nbufs               = OST_NBUFS,
108                         .bc_buf_size            = OST_BUFSIZE,
109                         .bc_req_max_size        = OST_MAXREQSIZE,
110                         .bc_rep_max_size        = OST_MAXREPSIZE,
111                         .bc_req_portal          = OST_REQUEST_PORTAL,
112                         .bc_rep_portal          = OSC_REPLY_PORTAL,
113                 },
114                 .psc_thr                = {
115                         .tc_thr_name            = "ll_ost",
116                         .tc_thr_factor          = OSS_THR_FACTOR,
117                         .tc_nthrs_init          = OSS_NTHRS_INIT,
118                         .tc_nthrs_base          = OSS_NTHRS_BASE,
119                         .tc_nthrs_max           = oss_max_threads,
120                         .tc_nthrs_user          = oss_num_threads,
121                         .tc_cpu_affinity        = 1,
122                         .tc_ctx_tags            = LCT_DT_THREAD,
123                 },
124                 .psc_cpt                = {
125                         .cc_pattern             = oss_cpts,
126                 },
127                 .psc_ops                = {
128                         .so_req_handler         = tgt_request_handle,
129                         .so_req_printer         = target_print_req,
130                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
131                 },
132         };
133         ost->ost_service = ptlrpc_register_service(&svc_conf,
134                                                    obd->obd_proc_entry);
135         if (IS_ERR(ost->ost_service)) {
136                 rc = PTR_ERR(ost->ost_service);
137                 CERROR("failed to start service: %d\n", rc);
138                 GOTO(out_lprocfs, rc);
139         }
140
141         memset(&svc_conf, 0, sizeof(svc_conf));
142         svc_conf = (typeof(svc_conf)) {
143                 .psc_name               = "ost_create",
144                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
145                 .psc_buf                = {
146                         .bc_nbufs               = OST_NBUFS,
147                         .bc_buf_size            = OST_BUFSIZE,
148                         .bc_req_max_size        = OST_MAXREQSIZE,
149                         .bc_rep_max_size        = OST_MAXREPSIZE,
150                         .bc_req_portal          = OST_CREATE_PORTAL,
151                         .bc_rep_portal          = OSC_REPLY_PORTAL,
152                 },
153                 .psc_thr                = {
154                         .tc_thr_name            = "ll_ost_create",
155                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
156                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
157                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
158                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
159                         .tc_nthrs_user          = oss_num_create_threads,
160                         .tc_cpu_affinity        = 1,
161                         .tc_ctx_tags            = LCT_DT_THREAD,
162                 },
163                 .psc_cpt                = {
164                         .cc_pattern             = oss_cpts,
165                 },
166                 .psc_ops                = {
167                         .so_req_handler         = tgt_request_handle,
168                         .so_req_printer         = target_print_req,
169                 },
170         };
171         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
172                                                           obd->obd_proc_entry);
173         if (IS_ERR(ost->ost_create_service)) {
174                 rc = PTR_ERR(ost->ost_create_service);
175                 CERROR("failed to start OST create service: %d\n", rc);
176                 GOTO(out_service, rc);
177         }
178
179         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
180         /* event CPT feature is disabled in libcfs level by set partition
181          * number to 1, we still want to set node affinity for io service */
182         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
183                 int     cpt = 0;
184                 int     i;
185
186                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
187                 for_each_node_mask(i, *mask) {
188                         if (ost_io_cptable == NULL) {
189                                 CWARN("OSS failed to create CPT table\n");
190                                 break;
191                         }
192
193                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
194                         if (!rc) {
195                                 CWARN("OSS Failed to set node %d for"
196                                       "IO CPT table\n", i);
197                                 cfs_cpt_table_free(ost_io_cptable);
198                                 ost_io_cptable = NULL;
199                                 break;
200                         }
201                 }
202         }
203
204         memset(&svc_conf, 0, sizeof(svc_conf));
205         svc_conf = (typeof(svc_conf)) {
206                 .psc_name               = "ost_io",
207                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
208                 .psc_buf                = {
209                         .bc_nbufs               = OST_NBUFS,
210                         .bc_buf_size            = OST_IO_BUFSIZE,
211                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
212                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
213                         .bc_req_portal          = OST_IO_PORTAL,
214                         .bc_rep_portal          = OSC_REPLY_PORTAL,
215                 },
216                 .psc_thr                = {
217                         .tc_thr_name            = "ll_ost_io",
218                         .tc_thr_factor          = OSS_THR_FACTOR,
219                         .tc_nthrs_init          = OSS_NTHRS_INIT,
220                         .tc_nthrs_base          = OSS_NTHRS_BASE,
221                         .tc_nthrs_max           = oss_max_threads,
222                         .tc_nthrs_user          = oss_num_threads,
223                         .tc_cpu_affinity        = 1,
224                         .tc_ctx_tags            = LCT_DT_THREAD,
225                 },
226                 .psc_cpt                = {
227                         .cc_cptable             = ost_io_cptable,
228                         .cc_pattern             = ost_io_cptable == NULL ?
229                                                   oss_io_cpts : NULL,
230                 },
231                 .psc_ops                = {
232                         .so_thr_init            = tgt_io_thread_init,
233                         .so_thr_done            = tgt_io_thread_done,
234                         .so_req_handler         = tgt_request_handle,
235                         .so_hpreq_handler       = tgt_hpreq_handler,
236                         .so_req_printer         = target_print_req,
237                 },
238         };
239         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
240                                                       obd->obd_proc_entry);
241         if (IS_ERR(ost->ost_io_service)) {
242                 rc = PTR_ERR(ost->ost_io_service);
243                 CERROR("failed to start OST I/O service: %d\n", rc);
244                 ost->ost_io_service = NULL;
245                 GOTO(out_create, rc);
246         }
247
248         memset(&svc_conf, 0, sizeof(svc_conf));
249         svc_conf = (typeof(svc_conf)) {
250                 .psc_name               = "ost_seq",
251                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
252                 .psc_buf                = {
253                         .bc_nbufs               = OST_NBUFS,
254                         .bc_buf_size            = OST_BUFSIZE,
255                         .bc_req_max_size        = OST_MAXREQSIZE,
256                         .bc_rep_max_size        = OST_MAXREPSIZE,
257                         .bc_req_portal          = SEQ_DATA_PORTAL,
258                         .bc_rep_portal          = OSC_REPLY_PORTAL,
259                 },
260                 .psc_thr                = {
261                         .tc_thr_name            = "ll_ost_seq",
262                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
263                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
264                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
265                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
266                         .tc_nthrs_user          = oss_num_create_threads,
267                         .tc_cpu_affinity        = 1,
268                         .tc_ctx_tags            = LCT_DT_THREAD,
269                 },
270
271                 .psc_cpt                = {
272                         .cc_pattern          = oss_cpts,
273                 },
274                 .psc_ops                = {
275                         .so_req_handler         = tgt_request_handle,
276                         .so_req_printer         = target_print_req,
277                         .so_hpreq_handler       = NULL,
278                 },
279         };
280         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
281                                                       obd->obd_proc_entry);
282         if (IS_ERR(ost->ost_seq_service)) {
283                 rc = PTR_ERR(ost->ost_seq_service);
284                 CERROR("failed to start OST seq service: %d\n", rc);
285                 ost->ost_seq_service = NULL;
286                 GOTO(out_io, rc);
287         }
288
289         /* Object update service */
290         memset(&svc_conf, 0, sizeof(svc_conf));
291         svc_conf = (typeof(svc_conf)) {
292                 .psc_name               = "ost_out",
293                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
294                 .psc_buf                = {
295                         .bc_nbufs               = OST_NBUFS,
296                         .bc_buf_size            = OUT_BUFSIZE,
297                         .bc_req_max_size        = OUT_MAXREQSIZE,
298                         .bc_rep_max_size        = OUT_MAXREPSIZE,
299                         .bc_req_portal          = OUT_PORTAL,
300                         .bc_rep_portal          = OSC_REPLY_PORTAL,
301                 },
302                 /*
303                  * We'd like to have a mechanism to set this on a per-device
304                  * basis, but alas...
305                  */
306                 .psc_thr                = {
307                         .tc_thr_name            = "ll_ost_out",
308                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
309                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
310                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
311                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
312                         .tc_nthrs_user          = oss_num_create_threads,
313                         .tc_cpu_affinity        = 1,
314                         .tc_ctx_tags            = LCT_MD_THREAD |
315                                                   LCT_DT_THREAD,
316                 },
317                 .psc_cpt                = {
318                         .cc_pattern             = oss_cpts,
319                 },
320                 .psc_ops                = {
321                         .so_req_handler         = tgt_request_handle,
322                         .so_req_printer         = target_print_req,
323                         .so_hpreq_handler       = NULL,
324                 },
325         };
326         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
327                                                        obd->obd_proc_entry);
328         if (IS_ERR(ost->ost_out_service)) {
329                 rc = PTR_ERR(ost->ost_out_service);
330                 CERROR("failed to start out service: %d\n", rc);
331                 ost->ost_out_service = NULL;
332                 GOTO(out_seq, rc);
333         }
334
335         ping_evictor_start();
336
337         RETURN(0);
338
339 out_seq:
340         ptlrpc_unregister_service(ost->ost_seq_service);
341         ost->ost_seq_service = NULL;
342 out_io:
343         ptlrpc_unregister_service(ost->ost_io_service);
344         ost->ost_io_service = NULL;
345 out_create:
346         ptlrpc_unregister_service(ost->ost_create_service);
347         ost->ost_create_service = NULL;
348 out_service:
349         ptlrpc_unregister_service(ost->ost_service);
350         ost->ost_service = NULL;
351 out_lprocfs:
352         lprocfs_obd_cleanup(obd);
353         RETURN(rc);
354 }
355
356 static int ost_cleanup(struct obd_device *obd)
357 {
358         struct ost_obd *ost = &obd->u.ost;
359         int err = 0;
360         ENTRY;
361
362         ping_evictor_stop();
363
364         /* there is no recovery for OST OBD, all recovery is controlled by
365          * obdfilter OBD */
366         LASSERT(obd->obd_recovering == 0);
367         mutex_lock(&ost->ost_health_mutex);
368         ptlrpc_unregister_service(ost->ost_service);
369         ptlrpc_unregister_service(ost->ost_create_service);
370         ptlrpc_unregister_service(ost->ost_io_service);
371         ptlrpc_unregister_service(ost->ost_seq_service);
372         ptlrpc_unregister_service(ost->ost_out_service);
373
374         ost->ost_service = NULL;
375         ost->ost_create_service = NULL;
376         ost->ost_io_service = NULL;
377         ost->ost_seq_service = NULL;
378         ost->ost_out_service = NULL;
379
380         mutex_unlock(&ost->ost_health_mutex);
381
382         lprocfs_obd_cleanup(obd);
383
384         if (ost_io_cptable != NULL) {
385                 cfs_cpt_table_free(ost_io_cptable);
386                 ost_io_cptable = NULL;
387         }
388
389         RETURN(err);
390 }
391
392 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
393 {
394         struct ost_obd *ost = &obd->u.ost;
395         int rc = 0;
396
397         mutex_lock(&ost->ost_health_mutex);
398         rc |= ptlrpc_service_health_check(ost->ost_service);
399         rc |= ptlrpc_service_health_check(ost->ost_create_service);
400         rc |= ptlrpc_service_health_check(ost->ost_io_service);
401         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
402         mutex_unlock(&ost->ost_health_mutex);
403
404         return rc != 0 ? 1 : 0;
405 }
406
407 /* use obd ops to offer management infrastructure */
408 static struct obd_ops ost_obd_ops = {
409         .o_owner        = THIS_MODULE,
410         .o_setup        = ost_setup,
411         .o_cleanup      = ost_cleanup,
412         .o_health_check = ost_health_check,
413 };
414
415
416 static int __init ost_init(void)
417 {
418         int rc;
419
420         ENTRY;
421
422         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
423                                  LUSTRE_OSS_NAME, NULL);
424
425         if (ost_num_threads != 0 && oss_num_threads == 0) {
426                 LCONSOLE_INFO("ost_num_threads module parameter is deprecated, "
427                               "use oss_num_threads instead or unset both for "
428                               "dynamic thread startup\n");
429                 oss_num_threads = ost_num_threads;
430         }
431
432         RETURN(rc);
433 }
434
435 static void __exit ost_exit(void)
436 {
437         class_unregister_type(LUSTRE_OSS_NAME);
438 }
439
440 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
441 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
442 MODULE_VERSION(LUSTRE_VERSION_STRING);
443 MODULE_LICENSE("GPL");
444
445 module_init(ost_init);
446 module_exit(ost_exit);