Whamcloud - gitweb
LU-6175 ha: add health_check routine to the MDS, MGS and OSD
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ost/ost_handler.c
37  *
38  * Author: Peter J. Braam <braam@clusterfs.com>
39  * Author: Phil Schwan <phil@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_OST
43
44 #include <linux/module.h>
45 #include <lustre_dlm.h>
46 #include <lprocfs_status.h>
47 #include <obd_class.h>
48 #include "ost_internal.h"
49
50 static int oss_num_threads;
51 CFS_MODULE_PARM(oss_num_threads, "i", int, 0444,
52                 "number of OSS service threads to start");
53
54 static int ost_num_threads;
55 CFS_MODULE_PARM(ost_num_threads, "i", int, 0444,
56                 "number of OST service threads to start (deprecated)");
57
58 static int oss_num_create_threads;
59 CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444,
60                 "number of OSS create threads to start");
61
62 static char *oss_cpts;
63 CFS_MODULE_PARM(oss_cpts, "s", charp, 0444,
64                 "CPU partitions OSS threads should run on");
65
66 static char *oss_io_cpts;
67 CFS_MODULE_PARM(oss_io_cpts, "s", charp, 0444,
68                 "CPU partitions OSS IO threads should run on");
69
70 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
71
72 static struct cfs_cpt_table     *ost_io_cptable;
73
74 #ifdef CONFIG_PROC_FS
75 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
76
77 static struct lprocfs_vars lprocfs_ost_obd_vars[] = {
78         { .name =       "uuid",
79           .fops =       &ost_uuid_fops  },
80         { NULL }
81 };
82 #endif /* CONFIG_PROC_FS */
83
84 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
85 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
86 {
87         static struct ptlrpc_service_conf       svc_conf;
88         struct ost_obd *ost = &obd->u.ost;
89         nodemask_t              *mask;
90         int rc;
91         ENTRY;
92
93 #ifdef CONFIG_PROC_FS
94         obd->obd_vars = lprocfs_ost_obd_vars;
95         lprocfs_obd_setup(obd);
96 #endif
97         mutex_init(&ost->ost_health_mutex);
98
99         svc_conf = (typeof(svc_conf)) {
100                 .psc_name               = LUSTRE_OSS_NAME,
101                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
102                 .psc_buf                = {
103                         .bc_nbufs               = OST_NBUFS,
104                         .bc_buf_size            = OST_BUFSIZE,
105                         .bc_req_max_size        = OST_MAXREQSIZE,
106                         .bc_rep_max_size        = OST_MAXREPSIZE,
107                         .bc_req_portal          = OST_REQUEST_PORTAL,
108                         .bc_rep_portal          = OSC_REPLY_PORTAL,
109                 },
110                 .psc_thr                = {
111                         .tc_thr_name            = "ll_ost",
112                         .tc_thr_factor          = OSS_THR_FACTOR,
113                         .tc_nthrs_init          = OSS_NTHRS_INIT,
114                         .tc_nthrs_base          = OSS_NTHRS_BASE,
115                         .tc_nthrs_max           = OSS_NTHRS_MAX,
116                         .tc_nthrs_user          = oss_num_threads,
117                         .tc_cpu_affinity        = 1,
118                         .tc_ctx_tags            = LCT_DT_THREAD,
119                 },
120                 .psc_cpt                = {
121                         .cc_pattern             = oss_cpts,
122                 },
123                 .psc_ops                = {
124                         .so_req_handler         = tgt_request_handle,
125                         .so_req_printer         = target_print_req,
126                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
127                 },
128         };
129         ost->ost_service = ptlrpc_register_service(&svc_conf,
130                                                    obd->obd_proc_entry);
131         if (IS_ERR(ost->ost_service)) {
132                 rc = PTR_ERR(ost->ost_service);
133                 CERROR("failed to start service: %d\n", rc);
134                 GOTO(out_lprocfs, rc);
135         }
136
137         memset(&svc_conf, 0, sizeof(svc_conf));
138         svc_conf = (typeof(svc_conf)) {
139                 .psc_name               = "ost_create",
140                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
141                 .psc_buf                = {
142                         .bc_nbufs               = OST_NBUFS,
143                         .bc_buf_size            = OST_BUFSIZE,
144                         .bc_req_max_size        = OST_MAXREQSIZE,
145                         .bc_rep_max_size        = OST_MAXREPSIZE,
146                         .bc_req_portal          = OST_CREATE_PORTAL,
147                         .bc_rep_portal          = OSC_REPLY_PORTAL,
148                 },
149                 .psc_thr                = {
150                         .tc_thr_name            = "ll_ost_create",
151                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
152                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
153                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
154                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
155                         .tc_nthrs_user          = oss_num_create_threads,
156                         .tc_cpu_affinity        = 1,
157                         .tc_ctx_tags            = LCT_DT_THREAD,
158                 },
159                 .psc_cpt                = {
160                         .cc_pattern             = oss_cpts,
161                 },
162                 .psc_ops                = {
163                         .so_req_handler         = tgt_request_handle,
164                         .so_req_printer         = target_print_req,
165                 },
166         };
167         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
168                                                           obd->obd_proc_entry);
169         if (IS_ERR(ost->ost_create_service)) {
170                 rc = PTR_ERR(ost->ost_create_service);
171                 CERROR("failed to start OST create service: %d\n", rc);
172                 GOTO(out_service, rc);
173         }
174
175         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
176         /* event CPT feature is disabled in libcfs level by set partition
177          * number to 1, we still want to set node affinity for io service */
178         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
179                 int     cpt = 0;
180                 int     i;
181
182                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
183                 for_each_node_mask(i, *mask) {
184                         if (ost_io_cptable == NULL) {
185                                 CWARN("OSS failed to create CPT table\n");
186                                 break;
187                         }
188
189                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
190                         if (!rc) {
191                                 CWARN("OSS Failed to set node %d for"
192                                       "IO CPT table\n", i);
193                                 cfs_cpt_table_free(ost_io_cptable);
194                                 ost_io_cptable = NULL;
195                                 break;
196                         }
197                 }
198         }
199
200         memset(&svc_conf, 0, sizeof(svc_conf));
201         svc_conf = (typeof(svc_conf)) {
202                 .psc_name               = "ost_io",
203                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
204                 .psc_buf                = {
205                         .bc_nbufs               = OST_NBUFS,
206                         .bc_buf_size            = OST_IO_BUFSIZE,
207                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
208                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
209                         .bc_req_portal          = OST_IO_PORTAL,
210                         .bc_rep_portal          = OSC_REPLY_PORTAL,
211                 },
212                 .psc_thr                = {
213                         .tc_thr_name            = "ll_ost_io",
214                         .tc_thr_factor          = OSS_THR_FACTOR,
215                         .tc_nthrs_init          = OSS_NTHRS_INIT,
216                         .tc_nthrs_base          = OSS_NTHRS_BASE,
217                         .tc_nthrs_max           = OSS_NTHRS_MAX,
218                         .tc_nthrs_user          = oss_num_threads,
219                         .tc_cpu_affinity        = 1,
220                         .tc_ctx_tags            = LCT_DT_THREAD,
221                 },
222                 .psc_cpt                = {
223                         .cc_cptable             = ost_io_cptable,
224                         .cc_pattern             = ost_io_cptable == NULL ?
225                                                   oss_io_cpts : NULL,
226                 },
227                 .psc_ops                = {
228                         .so_thr_init            = tgt_io_thread_init,
229                         .so_thr_done            = tgt_io_thread_done,
230                         .so_req_handler         = tgt_request_handle,
231                         .so_hpreq_handler       = tgt_hpreq_handler,
232                         .so_req_printer         = target_print_req,
233                 },
234         };
235         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
236                                                       obd->obd_proc_entry);
237         if (IS_ERR(ost->ost_io_service)) {
238                 rc = PTR_ERR(ost->ost_io_service);
239                 CERROR("failed to start OST I/O service: %d\n", rc);
240                 ost->ost_io_service = NULL;
241                 GOTO(out_create, rc);
242         }
243
244         memset(&svc_conf, 0, sizeof(svc_conf));
245         svc_conf = (typeof(svc_conf)) {
246                 .psc_name               = "ost_seq",
247                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
248                 .psc_buf                = {
249                         .bc_nbufs               = OST_NBUFS,
250                         .bc_buf_size            = OST_BUFSIZE,
251                         .bc_req_max_size        = OST_MAXREQSIZE,
252                         .bc_rep_max_size        = OST_MAXREPSIZE,
253                         .bc_req_portal          = SEQ_DATA_PORTAL,
254                         .bc_rep_portal          = OSC_REPLY_PORTAL,
255                 },
256                 .psc_thr                = {
257                         .tc_thr_name            = "ll_ost_seq",
258                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
259                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
260                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
261                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
262                         .tc_nthrs_user          = oss_num_create_threads,
263                         .tc_cpu_affinity        = 1,
264                         .tc_ctx_tags            = LCT_DT_THREAD,
265                 },
266
267                 .psc_cpt                = {
268                         .cc_pattern          = oss_cpts,
269                 },
270                 .psc_ops                = {
271                         .so_req_handler         = tgt_request_handle,
272                         .so_req_printer         = target_print_req,
273                         .so_hpreq_handler       = NULL,
274                 },
275         };
276         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
277                                                       obd->obd_proc_entry);
278         if (IS_ERR(ost->ost_seq_service)) {
279                 rc = PTR_ERR(ost->ost_seq_service);
280                 CERROR("failed to start OST seq service: %d\n", rc);
281                 ost->ost_seq_service = NULL;
282                 GOTO(out_io, rc);
283         }
284
285         /* Object update service */
286         memset(&svc_conf, 0, sizeof(svc_conf));
287         svc_conf = (typeof(svc_conf)) {
288                 .psc_name               = "ost_out",
289                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
290                 .psc_buf                = {
291                         .bc_nbufs               = OST_NBUFS,
292                         .bc_buf_size            = OUT_BUFSIZE,
293                         .bc_req_max_size        = OUT_MAXREQSIZE,
294                         .bc_rep_max_size        = OUT_MAXREPSIZE,
295                         .bc_req_portal          = OUT_PORTAL,
296                         .bc_rep_portal          = OSC_REPLY_PORTAL,
297                 },
298                 /*
299                  * We'd like to have a mechanism to set this on a per-device
300                  * basis, but alas...
301                  */
302                 .psc_thr                = {
303                         .tc_thr_name            = "ll_ost_out",
304                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
305                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
306                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
307                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
308                         .tc_nthrs_user          = oss_num_create_threads,
309                         .tc_cpu_affinity        = 1,
310                         .tc_ctx_tags            = LCT_MD_THREAD |
311                                                   LCT_DT_THREAD,
312                 },
313                 .psc_cpt                = {
314                         .cc_pattern             = oss_cpts,
315                 },
316                 .psc_ops                = {
317                         .so_req_handler         = tgt_request_handle,
318                         .so_req_printer         = target_print_req,
319                         .so_hpreq_handler       = NULL,
320                 },
321         };
322         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
323                                                        obd->obd_proc_entry);
324         if (IS_ERR(ost->ost_out_service)) {
325                 rc = PTR_ERR(ost->ost_out_service);
326                 CERROR("failed to start out service: %d\n", rc);
327                 ost->ost_out_service = NULL;
328                 GOTO(out_seq, rc);
329         }
330
331         ping_evictor_start();
332
333         RETURN(0);
334
335 out_seq:
336         ptlrpc_unregister_service(ost->ost_seq_service);
337         ost->ost_seq_service = NULL;
338 out_io:
339         ptlrpc_unregister_service(ost->ost_io_service);
340         ost->ost_io_service = NULL;
341 out_create:
342         ptlrpc_unregister_service(ost->ost_create_service);
343         ost->ost_create_service = NULL;
344 out_service:
345         ptlrpc_unregister_service(ost->ost_service);
346         ost->ost_service = NULL;
347 out_lprocfs:
348         lprocfs_obd_cleanup(obd);
349         RETURN(rc);
350 }
351
352 static int ost_cleanup(struct obd_device *obd)
353 {
354         struct ost_obd *ost = &obd->u.ost;
355         int err = 0;
356         ENTRY;
357
358         ping_evictor_stop();
359
360         /* there is no recovery for OST OBD, all recovery is controlled by
361          * obdfilter OBD */
362         LASSERT(obd->obd_recovering == 0);
363         mutex_lock(&ost->ost_health_mutex);
364         ptlrpc_unregister_service(ost->ost_service);
365         ptlrpc_unregister_service(ost->ost_create_service);
366         ptlrpc_unregister_service(ost->ost_io_service);
367         ptlrpc_unregister_service(ost->ost_seq_service);
368         ptlrpc_unregister_service(ost->ost_out_service);
369
370         ost->ost_service = NULL;
371         ost->ost_create_service = NULL;
372         ost->ost_io_service = NULL;
373         ost->ost_seq_service = NULL;
374         ost->ost_out_service = NULL;
375
376         mutex_unlock(&ost->ost_health_mutex);
377
378         lprocfs_obd_cleanup(obd);
379
380         if (ost_io_cptable != NULL) {
381                 cfs_cpt_table_free(ost_io_cptable);
382                 ost_io_cptable = NULL;
383         }
384
385         RETURN(err);
386 }
387
388 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
389 {
390         struct ost_obd *ost = &obd->u.ost;
391         int rc = 0;
392
393         mutex_lock(&ost->ost_health_mutex);
394         rc |= ptlrpc_service_health_check(ost->ost_service);
395         rc |= ptlrpc_service_health_check(ost->ost_create_service);
396         rc |= ptlrpc_service_health_check(ost->ost_io_service);
397         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
398         mutex_unlock(&ost->ost_health_mutex);
399
400         return rc != 0 ? 1 : 0;
401 }
402
403 /* use obd ops to offer management infrastructure */
404 static struct obd_ops ost_obd_ops = {
405         .o_owner        = THIS_MODULE,
406         .o_setup        = ost_setup,
407         .o_cleanup      = ost_cleanup,
408         .o_health_check = ost_health_check,
409 };
410
411
412 static int __init ost_init(void)
413 {
414         int rc;
415
416         ENTRY;
417
418         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
419                                  LUSTRE_OSS_NAME, NULL);
420
421         if (ost_num_threads != 0 && oss_num_threads == 0) {
422                 LCONSOLE_INFO("ost_num_threads module parameter is deprecated, "
423                               "use oss_num_threads instead or unset both for "
424                               "dynamic thread startup\n");
425                 oss_num_threads = ost_num_threads;
426         }
427
428         RETURN(rc);
429 }
430
431 static void /*__exit*/ ost_exit(void)
432 {
433         class_unregister_type(LUSTRE_OSS_NAME);
434 }
435
436 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
437 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
438 MODULE_LICENSE("GPL");
439
440 module_init(ost_init);
441 module_exit(ost_exit);