Whamcloud - gitweb
4592fe4c554afbeb4f463be5195ba92a7d76f2d0
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ost/ost_handler.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_OST
39
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
45
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
49
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
53
54 static int oss_num_create_threads;
55 module_param(oss_num_create_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
57
58 static char *oss_cpts;
59 module_param(oss_cpts, charp, 0444);
60 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
61
62 static char *oss_io_cpts;
63 module_param(oss_io_cpts, charp, 0444);
64 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
65
66 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
67
68 static struct cfs_cpt_table     *ost_io_cptable;
69
70 #ifdef CONFIG_PROC_FS
71 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
72
73 static struct lprocfs_vars lprocfs_ost_obd_vars[] = {
74         { .name =       "uuid",
75           .fops =       &ost_uuid_fops  },
76         { NULL }
77 };
78 #endif /* CONFIG_PROC_FS */
79
80 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
81 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
82 {
83         static struct ptlrpc_service_conf       svc_conf;
84         struct ost_obd *ost = &obd->u.ost;
85         nodemask_t              *mask;
86         int rc;
87         ENTRY;
88
89 #ifdef CONFIG_PROC_FS
90         obd->obd_vars = lprocfs_ost_obd_vars;
91         lprocfs_obd_setup(obd);
92 #endif
93         mutex_init(&ost->ost_health_mutex);
94
95         svc_conf = (typeof(svc_conf)) {
96                 .psc_name               = LUSTRE_OSS_NAME,
97                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
98                 .psc_buf                = {
99                         .bc_nbufs               = OST_NBUFS,
100                         .bc_buf_size            = OST_BUFSIZE,
101                         .bc_req_max_size        = OST_MAXREQSIZE,
102                         .bc_rep_max_size        = OST_MAXREPSIZE,
103                         .bc_req_portal          = OST_REQUEST_PORTAL,
104                         .bc_rep_portal          = OSC_REPLY_PORTAL,
105                 },
106                 .psc_thr                = {
107                         .tc_thr_name            = "ll_ost",
108                         .tc_thr_factor          = OSS_THR_FACTOR,
109                         .tc_nthrs_init          = OSS_NTHRS_INIT,
110                         .tc_nthrs_base          = OSS_NTHRS_BASE,
111                         .tc_nthrs_max           = oss_max_threads,
112                         .tc_nthrs_user          = oss_num_threads,
113                         .tc_cpu_affinity        = 1,
114                         .tc_ctx_tags            = LCT_DT_THREAD,
115                 },
116                 .psc_cpt                = {
117                         .cc_pattern             = oss_cpts,
118                 },
119                 .psc_ops                = {
120                         .so_req_handler         = tgt_request_handle,
121                         .so_req_printer         = target_print_req,
122                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
123                 },
124         };
125         ost->ost_service = ptlrpc_register_service(&svc_conf,
126                                                    obd->obd_proc_entry);
127         if (IS_ERR(ost->ost_service)) {
128                 rc = PTR_ERR(ost->ost_service);
129                 CERROR("failed to start service: %d\n", rc);
130                 GOTO(out_lprocfs, rc);
131         }
132
133         memset(&svc_conf, 0, sizeof(svc_conf));
134         svc_conf = (typeof(svc_conf)) {
135                 .psc_name               = "ost_create",
136                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
137                 .psc_buf                = {
138                         .bc_nbufs               = OST_NBUFS,
139                         .bc_buf_size            = OST_BUFSIZE,
140                         .bc_req_max_size        = OST_MAXREQSIZE,
141                         .bc_rep_max_size        = OST_MAXREPSIZE,
142                         .bc_req_portal          = OST_CREATE_PORTAL,
143                         .bc_rep_portal          = OSC_REPLY_PORTAL,
144                 },
145                 .psc_thr                = {
146                         .tc_thr_name            = "ll_ost_create",
147                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
148                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
149                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
150                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
151                         .tc_nthrs_user          = oss_num_create_threads,
152                         .tc_cpu_affinity        = 1,
153                         .tc_ctx_tags            = LCT_DT_THREAD,
154                 },
155                 .psc_cpt                = {
156                         .cc_pattern             = oss_cpts,
157                 },
158                 .psc_ops                = {
159                         .so_req_handler         = tgt_request_handle,
160                         .so_req_printer         = target_print_req,
161                 },
162         };
163         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
164                                                           obd->obd_proc_entry);
165         if (IS_ERR(ost->ost_create_service)) {
166                 rc = PTR_ERR(ost->ost_create_service);
167                 CERROR("failed to start OST create service: %d\n", rc);
168                 GOTO(out_service, rc);
169         }
170
171         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
172         /* event CPT feature is disabled in libcfs level by set partition
173          * number to 1, we still want to set node affinity for io service */
174         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
175                 int     cpt = 0;
176                 int     i;
177
178                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
179                 for_each_node_mask(i, *mask) {
180                         if (ost_io_cptable == NULL) {
181                                 CWARN("OSS failed to create CPT table\n");
182                                 break;
183                         }
184
185                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
186                         if (!rc) {
187                                 CWARN("OSS Failed to set node %d for"
188                                       "IO CPT table\n", i);
189                                 cfs_cpt_table_free(ost_io_cptable);
190                                 ost_io_cptable = NULL;
191                                 break;
192                         }
193                 }
194         }
195
196         memset(&svc_conf, 0, sizeof(svc_conf));
197         svc_conf = (typeof(svc_conf)) {
198                 .psc_name               = "ost_io",
199                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
200                 .psc_buf                = {
201                         .bc_nbufs               = OST_NBUFS,
202                         .bc_buf_size            = OST_IO_BUFSIZE,
203                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
204                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
205                         .bc_req_portal          = OST_IO_PORTAL,
206                         .bc_rep_portal          = OSC_REPLY_PORTAL,
207                 },
208                 .psc_thr                = {
209                         .tc_thr_name            = "ll_ost_io",
210                         .tc_thr_factor          = OSS_THR_FACTOR,
211                         .tc_nthrs_init          = OSS_NTHRS_INIT,
212                         .tc_nthrs_base          = OSS_NTHRS_BASE,
213                         .tc_nthrs_max           = oss_max_threads,
214                         .tc_nthrs_user          = oss_num_threads,
215                         .tc_cpu_affinity        = 1,
216                         .tc_ctx_tags            = LCT_DT_THREAD,
217                 },
218                 .psc_cpt                = {
219                         .cc_cptable             = ost_io_cptable,
220                         .cc_pattern             = ost_io_cptable == NULL ?
221                                                   oss_io_cpts : NULL,
222                 },
223                 .psc_ops                = {
224                         .so_thr_init            = tgt_io_thread_init,
225                         .so_thr_done            = tgt_io_thread_done,
226                         .so_req_handler         = tgt_request_handle,
227                         .so_hpreq_handler       = tgt_hpreq_handler,
228                         .so_req_printer         = target_print_req,
229                 },
230         };
231         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
232                                                       obd->obd_proc_entry);
233         if (IS_ERR(ost->ost_io_service)) {
234                 rc = PTR_ERR(ost->ost_io_service);
235                 CERROR("failed to start OST I/O service: %d\n", rc);
236                 ost->ost_io_service = NULL;
237                 GOTO(out_create, rc);
238         }
239
240         memset(&svc_conf, 0, sizeof(svc_conf));
241         svc_conf = (typeof(svc_conf)) {
242                 .psc_name               = "ost_seq",
243                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
244                 .psc_buf                = {
245                         .bc_nbufs               = OST_NBUFS,
246                         .bc_buf_size            = OST_BUFSIZE,
247                         .bc_req_max_size        = OST_MAXREQSIZE,
248                         .bc_rep_max_size        = OST_MAXREPSIZE,
249                         .bc_req_portal          = SEQ_DATA_PORTAL,
250                         .bc_rep_portal          = OSC_REPLY_PORTAL,
251                 },
252                 .psc_thr                = {
253                         .tc_thr_name            = "ll_ost_seq",
254                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
255                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
256                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
257                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
258                         .tc_nthrs_user          = oss_num_create_threads,
259                         .tc_cpu_affinity        = 1,
260                         .tc_ctx_tags            = LCT_DT_THREAD,
261                 },
262
263                 .psc_cpt                = {
264                         .cc_pattern          = oss_cpts,
265                 },
266                 .psc_ops                = {
267                         .so_req_handler         = tgt_request_handle,
268                         .so_req_printer         = target_print_req,
269                         .so_hpreq_handler       = NULL,
270                 },
271         };
272         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
273                                                       obd->obd_proc_entry);
274         if (IS_ERR(ost->ost_seq_service)) {
275                 rc = PTR_ERR(ost->ost_seq_service);
276                 CERROR("failed to start OST seq service: %d\n", rc);
277                 ost->ost_seq_service = NULL;
278                 GOTO(out_io, rc);
279         }
280
281         /* Object update service */
282         memset(&svc_conf, 0, sizeof(svc_conf));
283         svc_conf = (typeof(svc_conf)) {
284                 .psc_name               = "ost_out",
285                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
286                 .psc_buf                = {
287                         .bc_nbufs               = OST_NBUFS,
288                         .bc_buf_size            = OUT_BUFSIZE,
289                         .bc_req_max_size        = OUT_MAXREQSIZE,
290                         .bc_rep_max_size        = OUT_MAXREPSIZE,
291                         .bc_req_portal          = OUT_PORTAL,
292                         .bc_rep_portal          = OSC_REPLY_PORTAL,
293                 },
294                 /*
295                  * We'd like to have a mechanism to set this on a per-device
296                  * basis, but alas...
297                  */
298                 .psc_thr                = {
299                         .tc_thr_name            = "ll_ost_out",
300                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
301                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
302                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
303                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
304                         .tc_nthrs_user          = oss_num_create_threads,
305                         .tc_cpu_affinity        = 1,
306                         .tc_ctx_tags            = LCT_MD_THREAD |
307                                                   LCT_DT_THREAD,
308                 },
309                 .psc_cpt                = {
310                         .cc_pattern             = oss_cpts,
311                 },
312                 .psc_ops                = {
313                         .so_req_handler         = tgt_request_handle,
314                         .so_req_printer         = target_print_req,
315                         .so_hpreq_handler       = NULL,
316                 },
317         };
318         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
319                                                        obd->obd_proc_entry);
320         if (IS_ERR(ost->ost_out_service)) {
321                 rc = PTR_ERR(ost->ost_out_service);
322                 CERROR("failed to start out service: %d\n", rc);
323                 ost->ost_out_service = NULL;
324                 GOTO(out_seq, rc);
325         }
326
327         ping_evictor_start();
328
329         RETURN(0);
330
331 out_seq:
332         ptlrpc_unregister_service(ost->ost_seq_service);
333         ost->ost_seq_service = NULL;
334 out_io:
335         ptlrpc_unregister_service(ost->ost_io_service);
336         ost->ost_io_service = NULL;
337 out_create:
338         ptlrpc_unregister_service(ost->ost_create_service);
339         ost->ost_create_service = NULL;
340 out_service:
341         ptlrpc_unregister_service(ost->ost_service);
342         ost->ost_service = NULL;
343 out_lprocfs:
344         lprocfs_obd_cleanup(obd);
345         RETURN(rc);
346 }
347
348 static int ost_cleanup(struct obd_device *obd)
349 {
350         struct ost_obd *ost = &obd->u.ost;
351         int err = 0;
352         ENTRY;
353
354         ping_evictor_stop();
355
356         /* there is no recovery for OST OBD, all recovery is controlled by
357          * obdfilter OBD */
358         LASSERT(obd->obd_recovering == 0);
359         mutex_lock(&ost->ost_health_mutex);
360         ptlrpc_unregister_service(ost->ost_service);
361         ptlrpc_unregister_service(ost->ost_create_service);
362         ptlrpc_unregister_service(ost->ost_io_service);
363         ptlrpc_unregister_service(ost->ost_seq_service);
364         ptlrpc_unregister_service(ost->ost_out_service);
365
366         ost->ost_service = NULL;
367         ost->ost_create_service = NULL;
368         ost->ost_io_service = NULL;
369         ost->ost_seq_service = NULL;
370         ost->ost_out_service = NULL;
371
372         mutex_unlock(&ost->ost_health_mutex);
373
374         lprocfs_obd_cleanup(obd);
375
376         if (ost_io_cptable != NULL) {
377                 cfs_cpt_table_free(ost_io_cptable);
378                 ost_io_cptable = NULL;
379         }
380
381         RETURN(err);
382 }
383
384 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
385 {
386         struct ost_obd *ost = &obd->u.ost;
387         int rc = 0;
388
389         mutex_lock(&ost->ost_health_mutex);
390         rc |= ptlrpc_service_health_check(ost->ost_service);
391         rc |= ptlrpc_service_health_check(ost->ost_create_service);
392         rc |= ptlrpc_service_health_check(ost->ost_io_service);
393         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
394         mutex_unlock(&ost->ost_health_mutex);
395
396         return rc != 0 ? 1 : 0;
397 }
398
399 /* use obd ops to offer management infrastructure */
400 static struct obd_ops ost_obd_ops = {
401         .o_owner        = THIS_MODULE,
402         .o_setup        = ost_setup,
403         .o_cleanup      = ost_cleanup,
404         .o_health_check = ost_health_check,
405 };
406
407
408 static int __init ost_init(void)
409 {
410         int rc;
411
412         ENTRY;
413
414         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
415                                  LUSTRE_OSS_NAME, NULL);
416
417         RETURN(rc);
418 }
419
420 static void __exit ost_exit(void)
421 {
422         class_unregister_type(LUSTRE_OSS_NAME);
423 }
424
425 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
426 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
427 MODULE_VERSION(LUSTRE_VERSION_STRING);
428 MODULE_LICENSE("GPL");
429
430 module_init(ost_init);
431 module_exit(ost_exit);