Whamcloud - gitweb
LU-8066 ptlrpc: migrate ptlrpc proc files to sysfs
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ost/ost_handler.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_OST
39
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
45
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
49
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
53
54 static int oss_num_create_threads;
55 module_param(oss_num_create_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
57
58 static char *oss_cpts;
59 module_param(oss_cpts, charp, 0444);
60 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
61
62 static char *oss_io_cpts;
63 module_param(oss_io_cpts, charp, 0444);
64 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
65
66 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
67
68 static struct cfs_cpt_table     *ost_io_cptable;
69
70 static struct kset *oss_kset;
71
72 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
73 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
74 {
75         static struct ptlrpc_service_conf       svc_conf;
76         struct ost_obd *ost = &obd->u.ost;
77         nodemask_t              *mask;
78         int rc;
79         ENTRY;
80
81         rc = lprocfs_kset_register(obd, &oss_kset);
82         if (rc)
83                 return rc;
84
85         mutex_init(&ost->ost_health_mutex);
86
87         svc_conf = (typeof(svc_conf)) {
88                 .psc_name               = LUSTRE_OSS_NAME,
89                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
90                 .psc_buf                = {
91                         .bc_nbufs               = OST_NBUFS,
92                         .bc_buf_size            = OST_BUFSIZE,
93                         .bc_req_max_size        = OST_MAXREQSIZE,
94                         .bc_rep_max_size        = OST_MAXREPSIZE,
95                         .bc_req_portal          = OST_REQUEST_PORTAL,
96                         .bc_rep_portal          = OSC_REPLY_PORTAL,
97                 },
98                 .psc_thr                = {
99                         .tc_thr_name            = "ll_ost",
100                         .tc_thr_factor          = OSS_THR_FACTOR,
101                         .tc_nthrs_init          = OSS_NTHRS_INIT,
102                         .tc_nthrs_base          = OSS_NTHRS_BASE,
103                         .tc_nthrs_max           = oss_max_threads,
104                         .tc_nthrs_user          = oss_num_threads,
105                         .tc_cpu_affinity        = 1,
106                         .tc_ctx_tags            = LCT_DT_THREAD,
107                 },
108                 .psc_cpt                = {
109                         .cc_pattern             = oss_cpts,
110                 },
111                 .psc_ops                = {
112                         .so_req_handler         = tgt_request_handle,
113                         .so_req_printer         = target_print_req,
114                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
115                 },
116         };
117         ost->ost_service = ptlrpc_register_service(&svc_conf, oss_kset,
118                                                    obd->obd_proc_entry);
119         if (IS_ERR(ost->ost_service)) {
120                 rc = PTR_ERR(ost->ost_service);
121                 CERROR("failed to start service: %d\n", rc);
122                 GOTO(out_lprocfs, rc);
123         }
124
125         memset(&svc_conf, 0, sizeof(svc_conf));
126         svc_conf = (typeof(svc_conf)) {
127                 .psc_name               = "ost_create",
128                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
129                 .psc_buf                = {
130                         .bc_nbufs               = OST_NBUFS,
131                         .bc_buf_size            = OST_BUFSIZE,
132                         .bc_req_max_size        = OST_MAXREQSIZE,
133                         .bc_rep_max_size        = OST_MAXREPSIZE,
134                         .bc_req_portal          = OST_CREATE_PORTAL,
135                         .bc_rep_portal          = OSC_REPLY_PORTAL,
136                 },
137                 .psc_thr                = {
138                         .tc_thr_name            = "ll_ost_create",
139                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
140                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
141                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
142                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
143                         .tc_nthrs_user          = oss_num_create_threads,
144                         .tc_cpu_affinity        = 1,
145                         .tc_ctx_tags            = LCT_DT_THREAD,
146                 },
147                 .psc_cpt                = {
148                         .cc_pattern             = oss_cpts,
149                 },
150                 .psc_ops                = {
151                         .so_req_handler         = tgt_request_handle,
152                         .so_req_printer         = target_print_req,
153                 },
154         };
155         ost->ost_create_service = ptlrpc_register_service(&svc_conf, oss_kset,
156                                                           obd->obd_proc_entry);
157         if (IS_ERR(ost->ost_create_service)) {
158                 rc = PTR_ERR(ost->ost_create_service);
159                 CERROR("failed to start OST create service: %d\n", rc);
160                 GOTO(out_service, rc);
161         }
162
163         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
164         /* event CPT feature is disabled in libcfs level by set partition
165          * number to 1, we still want to set node affinity for io service */
166         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
167                 int     cpt = 0;
168                 int     i;
169
170                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
171                 for_each_node_mask(i, *mask) {
172                         if (ost_io_cptable == NULL) {
173                                 CWARN("OSS failed to create CPT table\n");
174                                 break;
175                         }
176
177                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
178                         if (!rc) {
179                                 CWARN("OSS Failed to set node %d for"
180                                       "IO CPT table\n", i);
181                                 cfs_cpt_table_free(ost_io_cptable);
182                                 ost_io_cptable = NULL;
183                                 break;
184                         }
185                 }
186         }
187
188         memset(&svc_conf, 0, sizeof(svc_conf));
189         svc_conf = (typeof(svc_conf)) {
190                 .psc_name               = "ost_io",
191                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
192                 .psc_buf                = {
193                         .bc_nbufs               = OST_NBUFS,
194                         .bc_buf_size            = OST_IO_BUFSIZE,
195                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
196                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
197                         .bc_req_portal          = OST_IO_PORTAL,
198                         .bc_rep_portal          = OSC_REPLY_PORTAL,
199                 },
200                 .psc_thr                = {
201                         .tc_thr_name            = "ll_ost_io",
202                         .tc_thr_factor          = OSS_THR_FACTOR,
203                         .tc_nthrs_init          = OSS_NTHRS_INIT,
204                         .tc_nthrs_base          = OSS_NTHRS_BASE,
205                         .tc_nthrs_max           = oss_max_threads,
206                         .tc_nthrs_user          = oss_num_threads,
207                         .tc_cpu_affinity        = 1,
208                         .tc_ctx_tags            = LCT_DT_THREAD,
209                 },
210                 .psc_cpt                = {
211                         .cc_cptable             = ost_io_cptable,
212                         .cc_pattern             = ost_io_cptable == NULL ?
213                                                   oss_io_cpts : NULL,
214                 },
215                 .psc_ops                = {
216                         .so_thr_init            = tgt_io_thread_init,
217                         .so_thr_done            = tgt_io_thread_done,
218                         .so_req_handler         = tgt_request_handle,
219                         .so_hpreq_handler       = tgt_hpreq_handler,
220                         .so_req_printer         = target_print_req,
221                 },
222         };
223         ost->ost_io_service = ptlrpc_register_service(&svc_conf, oss_kset,
224                                                       obd->obd_proc_entry);
225         if (IS_ERR(ost->ost_io_service)) {
226                 rc = PTR_ERR(ost->ost_io_service);
227                 CERROR("failed to start OST I/O service: %d\n", rc);
228                 ost->ost_io_service = NULL;
229                 GOTO(out_create, rc);
230         }
231
232         memset(&svc_conf, 0, sizeof(svc_conf));
233         svc_conf = (typeof(svc_conf)) {
234                 .psc_name               = "ost_seq",
235                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
236                 .psc_buf                = {
237                         .bc_nbufs               = OST_NBUFS,
238                         .bc_buf_size            = OST_BUFSIZE,
239                         .bc_req_max_size        = OST_MAXREQSIZE,
240                         .bc_rep_max_size        = OST_MAXREPSIZE,
241                         .bc_req_portal          = SEQ_DATA_PORTAL,
242                         .bc_rep_portal          = OSC_REPLY_PORTAL,
243                 },
244                 .psc_thr                = {
245                         .tc_thr_name            = "ll_ost_seq",
246                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
247                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
248                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
249                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
250                         .tc_nthrs_user          = oss_num_create_threads,
251                         .tc_cpu_affinity        = 1,
252                         .tc_ctx_tags            = LCT_DT_THREAD,
253                 },
254
255                 .psc_cpt                = {
256                         .cc_pattern          = oss_cpts,
257                 },
258                 .psc_ops                = {
259                         .so_req_handler         = tgt_request_handle,
260                         .so_req_printer         = target_print_req,
261                         .so_hpreq_handler       = NULL,
262                 },
263         };
264         ost->ost_seq_service = ptlrpc_register_service(&svc_conf, oss_kset,
265                                                       obd->obd_proc_entry);
266         if (IS_ERR(ost->ost_seq_service)) {
267                 rc = PTR_ERR(ost->ost_seq_service);
268                 CERROR("failed to start OST seq service: %d\n", rc);
269                 ost->ost_seq_service = NULL;
270                 GOTO(out_io, rc);
271         }
272
273         /* Object update service */
274         memset(&svc_conf, 0, sizeof(svc_conf));
275         svc_conf = (typeof(svc_conf)) {
276                 .psc_name               = "ost_out",
277                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
278                 .psc_buf                = {
279                         .bc_nbufs               = OST_NBUFS,
280                         .bc_buf_size            = OUT_BUFSIZE,
281                         .bc_req_max_size        = OUT_MAXREQSIZE,
282                         .bc_rep_max_size        = OUT_MAXREPSIZE,
283                         .bc_req_portal          = OUT_PORTAL,
284                         .bc_rep_portal          = OSC_REPLY_PORTAL,
285                 },
286                 /*
287                  * We'd like to have a mechanism to set this on a per-device
288                  * basis, but alas...
289                  */
290                 .psc_thr                = {
291                         .tc_thr_name            = "ll_ost_out",
292                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
293                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
294                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
295                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
296                         .tc_nthrs_user          = oss_num_create_threads,
297                         .tc_cpu_affinity        = 1,
298                         .tc_ctx_tags            = LCT_MD_THREAD |
299                                                   LCT_DT_THREAD,
300                 },
301                 .psc_cpt                = {
302                         .cc_pattern             = oss_cpts,
303                 },
304                 .psc_ops                = {
305                         .so_req_handler         = tgt_request_handle,
306                         .so_req_printer         = target_print_req,
307                         .so_hpreq_handler       = NULL,
308                 },
309         };
310         ost->ost_out_service = ptlrpc_register_service(&svc_conf, oss_kset,
311                                                        obd->obd_proc_entry);
312         if (IS_ERR(ost->ost_out_service)) {
313                 rc = PTR_ERR(ost->ost_out_service);
314                 CERROR("failed to start out service: %d\n", rc);
315                 ost->ost_out_service = NULL;
316                 GOTO(out_seq, rc);
317         }
318
319         ping_evictor_start();
320
321         RETURN(0);
322
323 out_seq:
324         ptlrpc_unregister_service(ost->ost_seq_service);
325         ost->ost_seq_service = NULL;
326 out_io:
327         ptlrpc_unregister_service(ost->ost_io_service);
328         ost->ost_io_service = NULL;
329 out_create:
330         ptlrpc_unregister_service(ost->ost_create_service);
331         ost->ost_create_service = NULL;
332 out_service:
333         ptlrpc_unregister_service(ost->ost_service);
334         ost->ost_service = NULL;
335 out_lprocfs:
336         lprocfs_kset_unregister(obd, oss_kset);
337         RETURN(rc);
338 }
339
340 static int ost_cleanup(struct obd_device *obd)
341 {
342         struct ost_obd *ost = &obd->u.ost;
343         int err = 0;
344         ENTRY;
345
346         ping_evictor_stop();
347
348         /* there is no recovery for OST OBD, all recovery is controlled by
349          * obdfilter OBD */
350         LASSERT(obd->obd_recovering == 0);
351         mutex_lock(&ost->ost_health_mutex);
352         ptlrpc_unregister_service(ost->ost_service);
353         ptlrpc_unregister_service(ost->ost_create_service);
354         ptlrpc_unregister_service(ost->ost_io_service);
355         ptlrpc_unregister_service(ost->ost_seq_service);
356         ptlrpc_unregister_service(ost->ost_out_service);
357
358         ost->ost_service = NULL;
359         ost->ost_create_service = NULL;
360         ost->ost_io_service = NULL;
361         ost->ost_seq_service = NULL;
362         ost->ost_out_service = NULL;
363
364         mutex_unlock(&ost->ost_health_mutex);
365
366         lprocfs_kset_unregister(obd, oss_kset);
367
368         if (ost_io_cptable != NULL) {
369                 cfs_cpt_table_free(ost_io_cptable);
370                 ost_io_cptable = NULL;
371         }
372
373         RETURN(err);
374 }
375
376 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
377 {
378         struct ost_obd *ost = &obd->u.ost;
379         int rc = 0;
380
381         mutex_lock(&ost->ost_health_mutex);
382         rc |= ptlrpc_service_health_check(ost->ost_service);
383         rc |= ptlrpc_service_health_check(ost->ost_create_service);
384         rc |= ptlrpc_service_health_check(ost->ost_io_service);
385         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
386         mutex_unlock(&ost->ost_health_mutex);
387
388         return rc != 0 ? 1 : 0;
389 }
390
391 /* use obd ops to offer management infrastructure */
392 static struct obd_ops ost_obd_ops = {
393         .o_owner        = THIS_MODULE,
394         .o_setup        = ost_setup,
395         .o_cleanup      = ost_cleanup,
396         .o_health_check = ost_health_check,
397 };
398
399
400 static int __init ost_init(void)
401 {
402         int rc;
403
404         ENTRY;
405
406         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
407                                  LUSTRE_OSS_NAME, NULL);
408
409         RETURN(rc);
410 }
411
412 static void __exit ost_exit(void)
413 {
414         class_unregister_type(LUSTRE_OSS_NAME);
415 }
416
417 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
418 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
419 MODULE_VERSION(LUSTRE_VERSION_STRING);
420 MODULE_LICENSE("GPL");
421
422 module_init(ost_init);
423 module_exit(ost_exit);