Whamcloud - gitweb
LU-3467 target: generic hpreq handler in target
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ost/ost_handler.c
37  *
38  * Author: Peter J. Braam <braam@clusterfs.com>
39  * Author: Phil Schwan <phil@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_OST
43
44 #include <linux/module.h>
45 #include <obd_ost.h>
46 #include <lustre_dlm.h>
47 #include <lprocfs_status.h>
48 #include "ost_internal.h"
49
50 static int oss_num_threads;
51 CFS_MODULE_PARM(oss_num_threads, "i", int, 0444,
52                 "number of OSS service threads to start");
53
54 static int ost_num_threads;
55 CFS_MODULE_PARM(ost_num_threads, "i", int, 0444,
56                 "number of OST service threads to start (deprecated)");
57
58 static int oss_num_create_threads;
59 CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444,
60                 "number of OSS create threads to start");
61
62 static char *oss_cpts;
63 CFS_MODULE_PARM(oss_cpts, "s", charp, 0444,
64                 "CPU partitions OSS threads should run on");
65
66 static char *oss_io_cpts;
67 CFS_MODULE_PARM(oss_io_cpts, "s", charp, 0444,
68                 "CPU partitions OSS IO threads should run on");
69
70 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
71
72 static struct cfs_cpt_table     *ost_io_cptable;
73
74 #ifdef LPROCFS
75 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
76
77 static struct lprocfs_seq_vars lprocfs_ost_obd_vars[] = {
78         { "uuid",       &ost_uuid_fops  },
79         { 0 }
80 };
81 #endif /* LPROCFS */
82
83 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
84 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
85 {
86         static struct ptlrpc_service_conf       svc_conf;
87         struct ost_obd *ost = &obd->u.ost;
88         nodemask_t              *mask;
89         int rc;
90         ENTRY;
91
92         rc = cfs_cleanup_group_info();
93         if (rc)
94                 RETURN(rc);
95
96 #ifdef LPROCFS
97         obd->obd_vars = lprocfs_ost_obd_vars;
98         lprocfs_seq_obd_setup(obd);
99 #endif
100         mutex_init(&ost->ost_health_mutex);
101
102         svc_conf = (typeof(svc_conf)) {
103                 .psc_name               = LUSTRE_OSS_NAME,
104                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
105                 .psc_buf                = {
106                         .bc_nbufs               = OST_NBUFS,
107                         .bc_buf_size            = OST_BUFSIZE,
108                         .bc_req_max_size        = OST_MAXREQSIZE,
109                         .bc_rep_max_size        = OST_MAXREPSIZE,
110                         .bc_req_portal          = OST_REQUEST_PORTAL,
111                         .bc_rep_portal          = OSC_REPLY_PORTAL,
112                 },
113                 .psc_thr                = {
114                         .tc_thr_name            = "ll_ost",
115                         .tc_thr_factor          = OSS_THR_FACTOR,
116                         .tc_nthrs_init          = OSS_NTHRS_INIT,
117                         .tc_nthrs_base          = OSS_NTHRS_BASE,
118                         .tc_nthrs_max           = OSS_NTHRS_MAX,
119                         .tc_nthrs_user          = oss_num_threads,
120                         .tc_cpu_affinity        = 1,
121                         .tc_ctx_tags            = LCT_DT_THREAD,
122                 },
123                 .psc_cpt                = {
124                         .cc_pattern             = oss_cpts,
125                 },
126                 .psc_ops                = {
127                         .so_req_handler         = tgt_request_handle,
128                         .so_req_printer         = target_print_req,
129                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
130                 },
131         };
132         ost->ost_service = ptlrpc_register_service(&svc_conf,
133                                                    obd->obd_proc_entry);
134         if (IS_ERR(ost->ost_service)) {
135                 rc = PTR_ERR(ost->ost_service);
136                 CERROR("failed to start service: %d\n", rc);
137                 GOTO(out_lprocfs, rc);
138         }
139
140         memset(&svc_conf, 0, sizeof(svc_conf));
141         svc_conf = (typeof(svc_conf)) {
142                 .psc_name               = "ost_create",
143                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
144                 .psc_buf                = {
145                         .bc_nbufs               = OST_NBUFS,
146                         .bc_buf_size            = OST_BUFSIZE,
147                         .bc_req_max_size        = OST_MAXREQSIZE,
148                         .bc_rep_max_size        = OST_MAXREPSIZE,
149                         .bc_req_portal          = OST_CREATE_PORTAL,
150                         .bc_rep_portal          = OSC_REPLY_PORTAL,
151                 },
152                 .psc_thr                = {
153                         .tc_thr_name            = "ll_ost_create",
154                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
155                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
156                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
157                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
158                         .tc_nthrs_user          = oss_num_create_threads,
159                         .tc_cpu_affinity        = 1,
160                         .tc_ctx_tags            = LCT_DT_THREAD,
161                 },
162                 .psc_cpt                = {
163                         .cc_pattern             = oss_cpts,
164                 },
165                 .psc_ops                = {
166                         .so_req_handler         = tgt_request_handle,
167                         .so_req_printer         = target_print_req,
168                 },
169         };
170         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
171                                                           obd->obd_proc_entry);
172         if (IS_ERR(ost->ost_create_service)) {
173                 rc = PTR_ERR(ost->ost_create_service);
174                 CERROR("failed to start OST create service: %d\n", rc);
175                 GOTO(out_service, rc);
176         }
177
178         mask = cfs_cpt_table->ctb_nodemask;
179         /* event CPT feature is disabled in libcfs level by set partition
180          * number to 1, we still want to set node affinity for io service */
181         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
182                 int     cpt = 0;
183                 int     i;
184
185                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
186                 for_each_node_mask(i, *mask) {
187                         if (ost_io_cptable == NULL) {
188                                 CWARN("OSS failed to create CPT table\n");
189                                 break;
190                         }
191
192                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
193                         if (!rc) {
194                                 CWARN("OSS Failed to set node %d for"
195                                       "IO CPT table\n", i);
196                                 cfs_cpt_table_free(ost_io_cptable);
197                                 ost_io_cptable = NULL;
198                                 break;
199                         }
200                 }
201         }
202
203         memset(&svc_conf, 0, sizeof(svc_conf));
204         svc_conf = (typeof(svc_conf)) {
205                 .psc_name               = "ost_io",
206                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
207                 .psc_buf                = {
208                         .bc_nbufs               = OST_NBUFS,
209                         .bc_buf_size            = OST_IO_BUFSIZE,
210                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
211                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
212                         .bc_req_portal          = OST_IO_PORTAL,
213                         .bc_rep_portal          = OSC_REPLY_PORTAL,
214                 },
215                 .psc_thr                = {
216                         .tc_thr_name            = "ll_ost_io",
217                         .tc_thr_factor          = OSS_THR_FACTOR,
218                         .tc_nthrs_init          = OSS_NTHRS_INIT,
219                         .tc_nthrs_base          = OSS_NTHRS_BASE,
220                         .tc_nthrs_max           = OSS_NTHRS_MAX,
221                         .tc_nthrs_user          = oss_num_threads,
222                         .tc_cpu_affinity        = 1,
223                         .tc_ctx_tags            = LCT_DT_THREAD,
224                 },
225                 .psc_cpt                = {
226                         .cc_cptable             = ost_io_cptable,
227                         .cc_pattern             = ost_io_cptable == NULL ?
228                                                   oss_io_cpts : NULL,
229                 },
230                 .psc_ops                = {
231                         .so_thr_init            = tgt_io_thread_init,
232                         .so_thr_done            = tgt_io_thread_done,
233                         .so_req_handler         = tgt_request_handle,
234                         .so_hpreq_handler       = tgt_hpreq_handler,
235                         .so_req_printer         = target_print_req,
236                 },
237         };
238         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
239                                                       obd->obd_proc_entry);
240         if (IS_ERR(ost->ost_io_service)) {
241                 rc = PTR_ERR(ost->ost_io_service);
242                 CERROR("failed to start OST I/O service: %d\n", rc);
243                 ost->ost_io_service = NULL;
244                 GOTO(out_create, rc);
245         }
246
247         memset(&svc_conf, 0, sizeof(svc_conf));
248         svc_conf = (typeof(svc_conf)) {
249                 .psc_name               = "ost_seq",
250                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
251                 .psc_buf                = {
252                         .bc_nbufs               = OST_NBUFS,
253                         .bc_buf_size            = OST_BUFSIZE,
254                         .bc_req_max_size        = OST_MAXREQSIZE,
255                         .bc_rep_max_size        = OST_MAXREPSIZE,
256                         .bc_req_portal          = SEQ_DATA_PORTAL,
257                         .bc_rep_portal          = OSC_REPLY_PORTAL,
258                 },
259                 .psc_thr                = {
260                         .tc_thr_name            = "ll_ost_seq",
261                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
262                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
263                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
264                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
265                         .tc_nthrs_user          = oss_num_create_threads,
266                         .tc_cpu_affinity        = 1,
267                         .tc_ctx_tags            = LCT_DT_THREAD,
268                 },
269
270                 .psc_cpt                = {
271                         .cc_pattern          = oss_cpts,
272                 },
273                 .psc_ops                = {
274                         .so_req_handler         = tgt_request_handle,
275                         .so_req_printer         = target_print_req,
276                         .so_hpreq_handler       = NULL,
277                 },
278         };
279         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
280                                                       obd->obd_proc_entry);
281         if (IS_ERR(ost->ost_seq_service)) {
282                 rc = PTR_ERR(ost->ost_seq_service);
283                 CERROR("failed to start OST seq service: %d\n", rc);
284                 ost->ost_seq_service = NULL;
285                 GOTO(out_io, rc);
286         }
287
288         /* Object update service */
289         memset(&svc_conf, 0, sizeof(svc_conf));
290         svc_conf = (typeof(svc_conf)) {
291                 .psc_name               = "ost_out",
292                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
293                 .psc_buf                = {
294                         .bc_nbufs               = OST_NBUFS,
295                         .bc_buf_size            = OUT_BUFSIZE,
296                         .bc_req_max_size        = OUT_MAXREQSIZE,
297                         .bc_rep_max_size        = OUT_MAXREPSIZE,
298                         .bc_req_portal          = OUT_PORTAL,
299                         .bc_rep_portal          = OSC_REPLY_PORTAL,
300                 },
301                 /*
302                  * We'd like to have a mechanism to set this on a per-device
303                  * basis, but alas...
304                  */
305                 .psc_thr                = {
306                         .tc_thr_name            = "ll_ost_out",
307                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
308                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
309                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
310                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
311                         .tc_nthrs_user          = oss_num_create_threads,
312                         .tc_cpu_affinity        = 1,
313                         .tc_ctx_tags            = LCT_MD_THREAD |
314                                                   LCT_DT_THREAD,
315                 },
316                 .psc_cpt                = {
317                         .cc_pattern             = oss_cpts,
318                 },
319                 .psc_ops                = {
320                         .so_req_handler         = tgt_request_handle,
321                         .so_req_printer         = target_print_req,
322                         .so_hpreq_handler       = NULL,
323                 },
324         };
325         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
326                                                        obd->obd_proc_entry);
327         if (IS_ERR(ost->ost_out_service)) {
328                 rc = PTR_ERR(ost->ost_out_service);
329                 CERROR("failed to start out service: %d\n", rc);
330                 ost->ost_out_service = NULL;
331                 GOTO(out_seq, rc);
332         }
333
334         ping_evictor_start();
335
336         RETURN(0);
337 out_seq:
338         ptlrpc_unregister_service(ost->ost_seq_service);
339         ost->ost_seq_service = NULL;
340 out_io:
341         ptlrpc_unregister_service(ost->ost_io_service);
342         ost->ost_io_service = NULL;
343 out_create:
344         ptlrpc_unregister_service(ost->ost_create_service);
345         ost->ost_create_service = NULL;
346 out_service:
347         ptlrpc_unregister_service(ost->ost_service);
348         ost->ost_service = NULL;
349 out_lprocfs:
350         lprocfs_obd_cleanup(obd);
351         RETURN(rc);
352 }
353
354 static int ost_cleanup(struct obd_device *obd)
355 {
356         struct ost_obd *ost = &obd->u.ost;
357         int err = 0;
358         ENTRY;
359
360         ping_evictor_stop();
361
362         /* there is no recovery for OST OBD, all recovery is controlled by
363          * obdfilter OBD */
364         LASSERT(obd->obd_recovering == 0);
365         mutex_lock(&ost->ost_health_mutex);
366         ptlrpc_unregister_service(ost->ost_service);
367         ptlrpc_unregister_service(ost->ost_create_service);
368         ptlrpc_unregister_service(ost->ost_io_service);
369         ptlrpc_unregister_service(ost->ost_seq_service);
370         ptlrpc_unregister_service(ost->ost_out_service);
371
372         ost->ost_service = NULL;
373         ost->ost_create_service = NULL;
374         ost->ost_io_service = NULL;
375         ost->ost_seq_service = NULL;
376         ost->ost_out_service = NULL;
377
378         mutex_unlock(&ost->ost_health_mutex);
379
380         lprocfs_obd_cleanup(obd);
381
382         if (ost_io_cptable != NULL) {
383                 cfs_cpt_table_free(ost_io_cptable);
384                 ost_io_cptable = NULL;
385         }
386
387         RETURN(err);
388 }
389
390 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
391 {
392         struct ost_obd *ost = &obd->u.ost;
393         int rc = 0;
394
395         mutex_lock(&ost->ost_health_mutex);
396         rc |= ptlrpc_service_health_check(ost->ost_service);
397         rc |= ptlrpc_service_health_check(ost->ost_create_service);
398         rc |= ptlrpc_service_health_check(ost->ost_io_service);
399         mutex_unlock(&ost->ost_health_mutex);
400
401         /*
402          * health_check to return 0 on healthy
403          * and 1 on unhealthy.
404          */
405         if( rc != 0)
406                 rc = 1;
407
408         return rc;
409 }
410
411 /* use obd ops to offer management infrastructure */
412 static struct obd_ops ost_obd_ops = {
413         .o_owner        = THIS_MODULE,
414         .o_setup        = ost_setup,
415         .o_cleanup      = ost_cleanup,
416         .o_health_check = ost_health_check,
417 };
418
419
420 static int __init ost_init(void)
421 {
422         int rc;
423
424         ENTRY;
425
426         rc = class_register_type(&ost_obd_ops, NULL, NULL,
427 #ifndef HAVE_ONLY_PROCFS_SEQ
428                                 NULL,
429 #endif
430                                 LUSTRE_OSS_NAME, NULL);
431
432         if (ost_num_threads != 0 && oss_num_threads == 0) {
433                 LCONSOLE_INFO("ost_num_threads module parameter is deprecated, "
434                               "use oss_num_threads instead or unset both for "
435                               "dynamic thread startup\n");
436                 oss_num_threads = ost_num_threads;
437         }
438
439         RETURN(rc);
440 }
441
442 static void /*__exit*/ ost_exit(void)
443 {
444         class_unregister_type(LUSTRE_OSS_NAME);
445 }
446
447 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
448 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
449 MODULE_LICENSE("GPL");
450
451 module_init(ost_init);
452 module_exit(ost_exit);