Whamcloud - gitweb
LU-14475 log: Rewrite some log messages
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/ost/ost_handler.c
32  *
33  * Author: Peter J. Braam <braam@clusterfs.com>
34  * Author: Phil Schwan <phil@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_OST
38
39 #include <linux/module.h>
40 #include <lustre_dlm.h>
41 #include <lprocfs_status.h>
42 #include <obd_class.h>
43 #include "ost_internal.h"
44
45 int oss_max_threads = 512;
46 module_param(oss_max_threads, int, 0444);
47 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
48
49 static int oss_num_threads;
50 module_param(oss_num_threads, int, 0444);
51 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
52
53 static unsigned int oss_cpu_bind = 1;
54 module_param(oss_cpu_bind, uint, 0444);
55 MODULE_PARM_DESC(oss_cpu_bind,
56                  "bind OSS service threads to particular CPU partitions");
57
58 static int oss_num_create_threads;
59 module_param(oss_num_create_threads, int, 0444);
60 MODULE_PARM_DESC(oss_num_create_threads,
61                  "number of OSS create threads to start");
62
63 static unsigned int oss_create_cpu_bind = 1;
64 module_param(oss_create_cpu_bind, uint, 0444);
65 MODULE_PARM_DESC(oss_create_cpu_bind,
66                  "bind OSS create threads to particular CPU partitions");
67
68 static char *oss_cpts;
69 module_param(oss_cpts, charp, 0444);
70 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
71
72 static char *oss_io_cpts;
73 module_param(oss_io_cpts, charp, 0444);
74 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
75
76 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
77
78 static struct cfs_cpt_table *ost_io_cptable;
79
80 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
81 static int ost_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
82 {
83         static struct ptlrpc_service_conf svc_conf;
84         struct ost_obd *ost = &obd->u.ost;
85         nodemask_t *mask;
86         int rc;
87
88         ENTRY;
89
90         rc = lprocfs_obd_setup(obd, true);
91         if (rc)
92                 return rc;
93
94         mutex_init(&ost->ost_health_mutex);
95
96         svc_conf = (typeof(svc_conf)) {
97                 .psc_name               = LUSTRE_OSS_NAME,
98                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
99                 .psc_buf                = {
100                         .bc_nbufs               = OST_NBUFS,
101                         .bc_buf_size            = OST_BUFSIZE,
102                         .bc_req_max_size        = OST_MAXREQSIZE,
103                         .bc_rep_max_size        = OST_MAXREPSIZE,
104                         .bc_req_portal          = OST_REQUEST_PORTAL,
105                         .bc_rep_portal          = OSC_REPLY_PORTAL,
106                 },
107                 .psc_thr                = {
108                         .tc_thr_name            = "ll_ost",
109                         .tc_thr_factor          = OSS_THR_FACTOR,
110                         .tc_nthrs_init          = OSS_NTHRS_INIT,
111                         .tc_nthrs_base          = OSS_NTHRS_BASE,
112                         .tc_nthrs_max           = oss_max_threads,
113                         .tc_nthrs_user          = oss_num_threads,
114                         .tc_cpu_bind            = oss_cpu_bind,
115                         .tc_ctx_tags            = LCT_DT_THREAD,
116                 },
117                 .psc_cpt                = {
118                         .cc_pattern             = oss_cpts,
119                         .cc_affinity            = true,
120                 },
121                 .psc_ops                = {
122                         .so_req_handler         = tgt_request_handle,
123                         .so_req_printer         = target_print_req,
124                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
125                 },
126         };
127         ost->ost_service = ptlrpc_register_service(&svc_conf,
128                                                    &obd->obd_kset,
129                                                    obd->obd_debugfs_entry);
130         if (IS_ERR(ost->ost_service)) {
131                 rc = PTR_ERR(ost->ost_service);
132                 CERROR("failed to start service: %d\n", rc);
133                 GOTO(out_lprocfs, rc);
134         }
135
136         memset(&svc_conf, 0, sizeof(svc_conf));
137         svc_conf = (typeof(svc_conf)) {
138                 .psc_name               = "ost_create",
139                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
140                 .psc_buf                = {
141                         .bc_nbufs               = OST_NBUFS,
142                         .bc_buf_size            = OST_BUFSIZE,
143                         .bc_req_max_size        = OST_MAXREQSIZE,
144                         .bc_rep_max_size        = OST_MAXREPSIZE,
145                         .bc_req_portal          = OST_CREATE_PORTAL,
146                         .bc_rep_portal          = OSC_REPLY_PORTAL,
147                 },
148                 .psc_thr                = {
149                         .tc_thr_name            = "ll_ost_create",
150                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
151                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
152                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
153                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
154                         .tc_nthrs_user          = oss_num_create_threads,
155                         .tc_cpu_bind            = oss_create_cpu_bind,
156                         .tc_ctx_tags            = LCT_DT_THREAD,
157                 },
158                 .psc_cpt                = {
159                         .cc_pattern             = oss_cpts,
160                         .cc_affinity            = true,
161                 },
162                 .psc_ops                = {
163                         .so_req_handler         = tgt_request_handle,
164                         .so_req_printer         = target_print_req,
165                 },
166         };
167         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
168                                                           &obd->obd_kset,
169                                                           obd->obd_debugfs_entry
170                                                           );
171         if (IS_ERR(ost->ost_create_service)) {
172                 rc = PTR_ERR(ost->ost_create_service);
173                 CERROR("failed to start OST create service: %d\n", rc);
174                 GOTO(out_service, rc);
175         }
176
177         mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY);
178         /* event CPT feature is disabled in libcfs level by set partition
179          * number to 1, we still want to set node affinity for io service
180          */
181         if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) {
182                 int     cpt = 0;
183                 int     i;
184
185                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
186                 for_each_node_mask(i, *mask) {
187                         if (!ost_io_cptable) {
188                                 CWARN("OSS failed to create CPT table\n");
189                                 break;
190                         }
191
192                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
193                         if (!rc) {
194                                 CWARN("OSS Failed to set node %d for IO CPT table\n",
195                                       i);
196                                 cfs_cpt_table_free(ost_io_cptable);
197                                 ost_io_cptable = NULL;
198                                 break;
199                         }
200                 }
201         }
202
203         memset(&svc_conf, 0, sizeof(svc_conf));
204         svc_conf = (typeof(svc_conf)) {
205                 .psc_name               = "ost_io",
206                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
207                 .psc_buf                = {
208                         .bc_nbufs               = OST_NBUFS,
209                         .bc_buf_size            = OST_IO_BUFSIZE,
210                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
211                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
212                         .bc_req_portal          = OST_IO_PORTAL,
213                         .bc_rep_portal          = OSC_REPLY_PORTAL,
214                 },
215                 .psc_thr                = {
216                         .tc_thr_name            = "ll_ost_io",
217                         .tc_thr_factor          = OSS_THR_FACTOR,
218                         .tc_nthrs_init          = OSS_NTHRS_INIT,
219                         .tc_nthrs_base          = OSS_NTHRS_BASE,
220                         .tc_nthrs_max           = oss_max_threads,
221                         .tc_nthrs_user          = oss_num_threads,
222                         .tc_cpu_bind            = oss_cpu_bind,
223                         .tc_ctx_tags            = LCT_DT_THREAD,
224                 },
225                 .psc_cpt                = {
226                         .cc_cptable             = ost_io_cptable,
227                         .cc_pattern             = ost_io_cptable == NULL ?
228                                                   oss_io_cpts : NULL,
229                         .cc_affinity            = true,
230                 },
231                 .psc_ops                = {
232                         .so_thr_init            = tgt_io_thread_init,
233                         .so_thr_done            = tgt_io_thread_done,
234                         .so_req_handler         = tgt_request_handle,
235                         .so_hpreq_handler       = tgt_hpreq_handler,
236                         .so_req_printer         = target_print_req,
237                 },
238         };
239         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
240                                                       &obd->obd_kset,
241                                                       obd->obd_debugfs_entry);
242         if (IS_ERR(ost->ost_io_service)) {
243                 rc = PTR_ERR(ost->ost_io_service);
244                 CERROR("failed to start OST I/O service: %d\n", rc);
245                 ost->ost_io_service = NULL;
246                 GOTO(out_create, rc);
247         }
248
249         memset(&svc_conf, 0, sizeof(svc_conf));
250         svc_conf = (typeof(svc_conf)) {
251                 .psc_name               = "ost_seq",
252                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
253                 .psc_buf                = {
254                         .bc_nbufs               = OST_NBUFS,
255                         .bc_buf_size            = OST_BUFSIZE,
256                         .bc_req_max_size        = OST_MAXREQSIZE,
257                         .bc_rep_max_size        = OST_MAXREPSIZE,
258                         .bc_req_portal          = SEQ_DATA_PORTAL,
259                         .bc_rep_portal          = OSC_REPLY_PORTAL,
260                 },
261                 .psc_thr                = {
262                         .tc_thr_name            = "ll_ost_seq",
263                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
264                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
265                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
266                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
267                         .tc_nthrs_user          = oss_num_create_threads,
268                         .tc_cpu_bind            = oss_create_cpu_bind,
269                         .tc_ctx_tags            = LCT_DT_THREAD,
270                 },
271
272                 .psc_cpt                = {
273                         .cc_pattern             = oss_cpts,
274                         .cc_affinity            = true,
275                 },
276                 .psc_ops                = {
277                         .so_req_handler         = tgt_request_handle,
278                         .so_req_printer         = target_print_req,
279                         .so_hpreq_handler       = NULL,
280                 },
281         };
282         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
283                                                        &obd->obd_kset,
284                                                        obd->obd_debugfs_entry);
285         if (IS_ERR(ost->ost_seq_service)) {
286                 rc = PTR_ERR(ost->ost_seq_service);
287                 CERROR("failed to start OST seq service: %d\n", rc);
288                 ost->ost_seq_service = NULL;
289                 GOTO(out_io, rc);
290         }
291
292         /* Object update service */
293         memset(&svc_conf, 0, sizeof(svc_conf));
294         svc_conf = (typeof(svc_conf)) {
295                 .psc_name               = "ost_out",
296                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
297                 .psc_buf                = {
298                         .bc_nbufs               = OST_NBUFS,
299                         .bc_buf_size            = OUT_BUFSIZE,
300                         .bc_req_max_size        = OUT_MAXREQSIZE,
301                         .bc_rep_max_size        = OUT_MAXREPSIZE,
302                         .bc_req_portal          = OUT_PORTAL,
303                         .bc_rep_portal          = OSC_REPLY_PORTAL,
304                 },
305                 /*
306                  * We'd like to have a mechanism to set this on a per-device
307                  * basis, but alas...
308                  */
309                 .psc_thr                = {
310                         .tc_thr_name            = "ll_ost_out",
311                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
312                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
313                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
314                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
315                         .tc_nthrs_user          = oss_num_create_threads,
316                         .tc_cpu_bind            = oss_create_cpu_bind,
317                         .tc_ctx_tags            = LCT_MD_THREAD |
318                                                   LCT_DT_THREAD,
319                 },
320                 .psc_cpt                = {
321                         .cc_pattern             = oss_cpts,
322                         .cc_affinity            = true,
323                 },
324                 .psc_ops                = {
325                         .so_req_handler         = tgt_request_handle,
326                         .so_req_printer         = target_print_req,
327                         .so_hpreq_handler       = NULL,
328                 },
329         };
330         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
331                                                        &obd->obd_kset,
332                                                        obd->obd_debugfs_entry);
333         if (IS_ERR(ost->ost_out_service)) {
334                 rc = PTR_ERR(ost->ost_out_service);
335                 CERROR("failed to start out service: %d\n", rc);
336                 ost->ost_out_service = NULL;
337                 GOTO(out_seq, rc);
338         }
339
340         ping_evictor_start();
341
342         RETURN(0);
343
344 out_seq:
345         ptlrpc_unregister_service(ost->ost_seq_service);
346         ost->ost_seq_service = NULL;
347 out_io:
348         ptlrpc_unregister_service(ost->ost_io_service);
349         ost->ost_io_service = NULL;
350 out_create:
351         ptlrpc_unregister_service(ost->ost_create_service);
352         ost->ost_create_service = NULL;
353 out_service:
354         ptlrpc_unregister_service(ost->ost_service);
355         ost->ost_service = NULL;
356 out_lprocfs:
357         lprocfs_obd_cleanup(obd);
358         RETURN(rc);
359 }
360
361 static int ost_cleanup(struct obd_device *obd)
362 {
363         struct ost_obd *ost = &obd->u.ost;
364         int err = 0;
365
366         ENTRY;
367
368         ping_evictor_stop();
369
370         /* there is no recovery for OST OBD, all recovery is controlled by
371          * obdfilter OBD
372          */
373         LASSERT(obd->obd_recovering == 0);
374         mutex_lock(&ost->ost_health_mutex);
375         ptlrpc_unregister_service(ost->ost_service);
376         ptlrpc_unregister_service(ost->ost_create_service);
377         ptlrpc_unregister_service(ost->ost_io_service);
378         ptlrpc_unregister_service(ost->ost_seq_service);
379         ptlrpc_unregister_service(ost->ost_out_service);
380
381         ost->ost_service = NULL;
382         ost->ost_create_service = NULL;
383         ost->ost_io_service = NULL;
384         ost->ost_seq_service = NULL;
385         ost->ost_out_service = NULL;
386
387         mutex_unlock(&ost->ost_health_mutex);
388
389         lprocfs_obd_cleanup(obd);
390
391         if (ost_io_cptable) {
392                 cfs_cpt_table_free(ost_io_cptable);
393                 ost_io_cptable = NULL;
394         }
395
396         RETURN(err);
397 }
398
399 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
400 {
401         struct ost_obd *ost = &obd->u.ost;
402         int rc = 0;
403
404         mutex_lock(&ost->ost_health_mutex);
405         rc |= ptlrpc_service_health_check(ost->ost_service);
406         rc |= ptlrpc_service_health_check(ost->ost_create_service);
407         rc |= ptlrpc_service_health_check(ost->ost_io_service);
408         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
409         mutex_unlock(&ost->ost_health_mutex);
410
411         return rc != 0 ? 1 : 0;
412 }
413
414 /* use obd ops to offer management infrastructure */
415 static const struct obd_ops ost_obd_ops = {
416         .o_owner        = THIS_MODULE,
417         .o_setup        = ost_setup,
418         .o_cleanup      = ost_cleanup,
419         .o_health_check = ost_health_check,
420 };
421
422 static int __init ost_init(void)
423 {
424         int rc;
425
426         ENTRY;
427
428         rc = class_register_type(&ost_obd_ops, NULL, false,
429                                  LUSTRE_OSS_NAME, NULL);
430
431         RETURN(rc);
432 }
433
434 static void __exit ost_exit(void)
435 {
436         class_unregister_type(LUSTRE_OSS_NAME);
437 }
438
439 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
440 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
441 MODULE_VERSION(LUSTRE_VERSION_STRING);
442 MODULE_LICENSE("GPL");
443
444 module_init(ost_init);
445 module_exit(ost_exit);