Whamcloud - gitweb
LU-8066 obdclass : Add infrastructure for procfs to sysfs migration
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ost/ost_handler.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_OST
39
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
45
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
49
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
53
54 static int oss_num_create_threads;
55 module_param(oss_num_create_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
57
58 static char *oss_cpts;
59 module_param(oss_cpts, charp, 0444);
60 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
61
62 static char *oss_io_cpts;
63 module_param(oss_io_cpts, charp, 0444);
64 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
65
66 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
67
68 static struct cfs_cpt_table     *ost_io_cptable;
69
70 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
71 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
72 {
73         static struct ptlrpc_service_conf       svc_conf;
74         struct ost_obd *ost = &obd->u.ost;
75         nodemask_t              *mask;
76         int rc;
77         ENTRY;
78
79         lprocfs_obd_setup(obd, true);
80
81         mutex_init(&ost->ost_health_mutex);
82
83         svc_conf = (typeof(svc_conf)) {
84                 .psc_name               = LUSTRE_OSS_NAME,
85                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
86                 .psc_buf                = {
87                         .bc_nbufs               = OST_NBUFS,
88                         .bc_buf_size            = OST_BUFSIZE,
89                         .bc_req_max_size        = OST_MAXREQSIZE,
90                         .bc_rep_max_size        = OST_MAXREPSIZE,
91                         .bc_req_portal          = OST_REQUEST_PORTAL,
92                         .bc_rep_portal          = OSC_REPLY_PORTAL,
93                 },
94                 .psc_thr                = {
95                         .tc_thr_name            = "ll_ost",
96                         .tc_thr_factor          = OSS_THR_FACTOR,
97                         .tc_nthrs_init          = OSS_NTHRS_INIT,
98                         .tc_nthrs_base          = OSS_NTHRS_BASE,
99                         .tc_nthrs_max           = oss_max_threads,
100                         .tc_nthrs_user          = oss_num_threads,
101                         .tc_cpu_affinity        = 1,
102                         .tc_ctx_tags            = LCT_DT_THREAD,
103                 },
104                 .psc_cpt                = {
105                         .cc_pattern             = oss_cpts,
106                 },
107                 .psc_ops                = {
108                         .so_req_handler         = tgt_request_handle,
109                         .so_req_printer         = target_print_req,
110                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
111                 },
112         };
113         ost->ost_service = ptlrpc_register_service(&svc_conf,
114                                                    obd->obd_proc_entry);
115         if (IS_ERR(ost->ost_service)) {
116                 rc = PTR_ERR(ost->ost_service);
117                 CERROR("failed to start service: %d\n", rc);
118                 GOTO(out_lprocfs, rc);
119         }
120
121         memset(&svc_conf, 0, sizeof(svc_conf));
122         svc_conf = (typeof(svc_conf)) {
123                 .psc_name               = "ost_create",
124                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
125                 .psc_buf                = {
126                         .bc_nbufs               = OST_NBUFS,
127                         .bc_buf_size            = OST_BUFSIZE,
128                         .bc_req_max_size        = OST_MAXREQSIZE,
129                         .bc_rep_max_size        = OST_MAXREPSIZE,
130                         .bc_req_portal          = OST_CREATE_PORTAL,
131                         .bc_rep_portal          = OSC_REPLY_PORTAL,
132                 },
133                 .psc_thr                = {
134                         .tc_thr_name            = "ll_ost_create",
135                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
136                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
137                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
138                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
139                         .tc_nthrs_user          = oss_num_create_threads,
140                         .tc_cpu_affinity        = 1,
141                         .tc_ctx_tags            = LCT_DT_THREAD,
142                 },
143                 .psc_cpt                = {
144                         .cc_pattern             = oss_cpts,
145                 },
146                 .psc_ops                = {
147                         .so_req_handler         = tgt_request_handle,
148                         .so_req_printer         = target_print_req,
149                 },
150         };
151         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
152                                                           obd->obd_proc_entry);
153         if (IS_ERR(ost->ost_create_service)) {
154                 rc = PTR_ERR(ost->ost_create_service);
155                 CERROR("failed to start OST create service: %d\n", rc);
156                 GOTO(out_service, rc);
157         }
158
159         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
160         /* event CPT feature is disabled in libcfs level by set partition
161          * number to 1, we still want to set node affinity for io service */
162         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
163                 int     cpt = 0;
164                 int     i;
165
166                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
167                 for_each_node_mask(i, *mask) {
168                         if (ost_io_cptable == NULL) {
169                                 CWARN("OSS failed to create CPT table\n");
170                                 break;
171                         }
172
173                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
174                         if (!rc) {
175                                 CWARN("OSS Failed to set node %d for"
176                                       "IO CPT table\n", i);
177                                 cfs_cpt_table_free(ost_io_cptable);
178                                 ost_io_cptable = NULL;
179                                 break;
180                         }
181                 }
182         }
183
184         memset(&svc_conf, 0, sizeof(svc_conf));
185         svc_conf = (typeof(svc_conf)) {
186                 .psc_name               = "ost_io",
187                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
188                 .psc_buf                = {
189                         .bc_nbufs               = OST_NBUFS,
190                         .bc_buf_size            = OST_IO_BUFSIZE,
191                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
192                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
193                         .bc_req_portal          = OST_IO_PORTAL,
194                         .bc_rep_portal          = OSC_REPLY_PORTAL,
195                 },
196                 .psc_thr                = {
197                         .tc_thr_name            = "ll_ost_io",
198                         .tc_thr_factor          = OSS_THR_FACTOR,
199                         .tc_nthrs_init          = OSS_NTHRS_INIT,
200                         .tc_nthrs_base          = OSS_NTHRS_BASE,
201                         .tc_nthrs_max           = oss_max_threads,
202                         .tc_nthrs_user          = oss_num_threads,
203                         .tc_cpu_affinity        = 1,
204                         .tc_ctx_tags            = LCT_DT_THREAD,
205                 },
206                 .psc_cpt                = {
207                         .cc_cptable             = ost_io_cptable,
208                         .cc_pattern             = ost_io_cptable == NULL ?
209                                                   oss_io_cpts : NULL,
210                 },
211                 .psc_ops                = {
212                         .so_thr_init            = tgt_io_thread_init,
213                         .so_thr_done            = tgt_io_thread_done,
214                         .so_req_handler         = tgt_request_handle,
215                         .so_hpreq_handler       = tgt_hpreq_handler,
216                         .so_req_printer         = target_print_req,
217                 },
218         };
219         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
220                                                       obd->obd_proc_entry);
221         if (IS_ERR(ost->ost_io_service)) {
222                 rc = PTR_ERR(ost->ost_io_service);
223                 CERROR("failed to start OST I/O service: %d\n", rc);
224                 ost->ost_io_service = NULL;
225                 GOTO(out_create, rc);
226         }
227
228         memset(&svc_conf, 0, sizeof(svc_conf));
229         svc_conf = (typeof(svc_conf)) {
230                 .psc_name               = "ost_seq",
231                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
232                 .psc_buf                = {
233                         .bc_nbufs               = OST_NBUFS,
234                         .bc_buf_size            = OST_BUFSIZE,
235                         .bc_req_max_size        = OST_MAXREQSIZE,
236                         .bc_rep_max_size        = OST_MAXREPSIZE,
237                         .bc_req_portal          = SEQ_DATA_PORTAL,
238                         .bc_rep_portal          = OSC_REPLY_PORTAL,
239                 },
240                 .psc_thr                = {
241                         .tc_thr_name            = "ll_ost_seq",
242                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
243                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
244                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
245                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
246                         .tc_nthrs_user          = oss_num_create_threads,
247                         .tc_cpu_affinity        = 1,
248                         .tc_ctx_tags            = LCT_DT_THREAD,
249                 },
250
251                 .psc_cpt                = {
252                         .cc_pattern          = oss_cpts,
253                 },
254                 .psc_ops                = {
255                         .so_req_handler         = tgt_request_handle,
256                         .so_req_printer         = target_print_req,
257                         .so_hpreq_handler       = NULL,
258                 },
259         };
260         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
261                                                       obd->obd_proc_entry);
262         if (IS_ERR(ost->ost_seq_service)) {
263                 rc = PTR_ERR(ost->ost_seq_service);
264                 CERROR("failed to start OST seq service: %d\n", rc);
265                 ost->ost_seq_service = NULL;
266                 GOTO(out_io, rc);
267         }
268
269         /* Object update service */
270         memset(&svc_conf, 0, sizeof(svc_conf));
271         svc_conf = (typeof(svc_conf)) {
272                 .psc_name               = "ost_out",
273                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
274                 .psc_buf                = {
275                         .bc_nbufs               = OST_NBUFS,
276                         .bc_buf_size            = OUT_BUFSIZE,
277                         .bc_req_max_size        = OUT_MAXREQSIZE,
278                         .bc_rep_max_size        = OUT_MAXREPSIZE,
279                         .bc_req_portal          = OUT_PORTAL,
280                         .bc_rep_portal          = OSC_REPLY_PORTAL,
281                 },
282                 /*
283                  * We'd like to have a mechanism to set this on a per-device
284                  * basis, but alas...
285                  */
286                 .psc_thr                = {
287                         .tc_thr_name            = "ll_ost_out",
288                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
289                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
290                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
291                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
292                         .tc_nthrs_user          = oss_num_create_threads,
293                         .tc_cpu_affinity        = 1,
294                         .tc_ctx_tags            = LCT_MD_THREAD |
295                                                   LCT_DT_THREAD,
296                 },
297                 .psc_cpt                = {
298                         .cc_pattern             = oss_cpts,
299                 },
300                 .psc_ops                = {
301                         .so_req_handler         = tgt_request_handle,
302                         .so_req_printer         = target_print_req,
303                         .so_hpreq_handler       = NULL,
304                 },
305         };
306         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
307                                                        obd->obd_proc_entry);
308         if (IS_ERR(ost->ost_out_service)) {
309                 rc = PTR_ERR(ost->ost_out_service);
310                 CERROR("failed to start out service: %d\n", rc);
311                 ost->ost_out_service = NULL;
312                 GOTO(out_seq, rc);
313         }
314
315         ping_evictor_start();
316
317         RETURN(0);
318
319 out_seq:
320         ptlrpc_unregister_service(ost->ost_seq_service);
321         ost->ost_seq_service = NULL;
322 out_io:
323         ptlrpc_unregister_service(ost->ost_io_service);
324         ost->ost_io_service = NULL;
325 out_create:
326         ptlrpc_unregister_service(ost->ost_create_service);
327         ost->ost_create_service = NULL;
328 out_service:
329         ptlrpc_unregister_service(ost->ost_service);
330         ost->ost_service = NULL;
331 out_lprocfs:
332         lprocfs_obd_cleanup(obd);
333         RETURN(rc);
334 }
335
336 static int ost_cleanup(struct obd_device *obd)
337 {
338         struct ost_obd *ost = &obd->u.ost;
339         int err = 0;
340         ENTRY;
341
342         ping_evictor_stop();
343
344         /* there is no recovery for OST OBD, all recovery is controlled by
345          * obdfilter OBD */
346         LASSERT(obd->obd_recovering == 0);
347         mutex_lock(&ost->ost_health_mutex);
348         ptlrpc_unregister_service(ost->ost_service);
349         ptlrpc_unregister_service(ost->ost_create_service);
350         ptlrpc_unregister_service(ost->ost_io_service);
351         ptlrpc_unregister_service(ost->ost_seq_service);
352         ptlrpc_unregister_service(ost->ost_out_service);
353
354         ost->ost_service = NULL;
355         ost->ost_create_service = NULL;
356         ost->ost_io_service = NULL;
357         ost->ost_seq_service = NULL;
358         ost->ost_out_service = NULL;
359
360         mutex_unlock(&ost->ost_health_mutex);
361
362         lprocfs_obd_cleanup(obd);
363
364         if (ost_io_cptable != NULL) {
365                 cfs_cpt_table_free(ost_io_cptable);
366                 ost_io_cptable = NULL;
367         }
368
369         RETURN(err);
370 }
371
372 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
373 {
374         struct ost_obd *ost = &obd->u.ost;
375         int rc = 0;
376
377         mutex_lock(&ost->ost_health_mutex);
378         rc |= ptlrpc_service_health_check(ost->ost_service);
379         rc |= ptlrpc_service_health_check(ost->ost_create_service);
380         rc |= ptlrpc_service_health_check(ost->ost_io_service);
381         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
382         mutex_unlock(&ost->ost_health_mutex);
383
384         return rc != 0 ? 1 : 0;
385 }
386
387 /* use obd ops to offer management infrastructure */
388 static struct obd_ops ost_obd_ops = {
389         .o_owner        = THIS_MODULE,
390         .o_setup        = ost_setup,
391         .o_cleanup      = ost_cleanup,
392         .o_health_check = ost_health_check,
393 };
394
395
396 static int __init ost_init(void)
397 {
398         int rc;
399
400         ENTRY;
401
402         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
403                                  LUSTRE_OSS_NAME, NULL);
404
405         RETURN(rc);
406 }
407
408 static void __exit ost_exit(void)
409 {
410         class_unregister_type(LUSTRE_OSS_NAME);
411 }
412
413 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
414 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
415 MODULE_VERSION(LUSTRE_VERSION_STRING);
416 MODULE_LICENSE("GPL");
417
418 module_init(ost_init);
419 module_exit(ost_exit);