Whamcloud - gitweb
0c95d7ad1d1550a3394b30ad98b3b2e0d54fcf17
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ost/ost_handler.c
33  *
34  * Author: Peter J. Braam <braam@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_OST
39
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
45
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
49
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
53
54 static int oss_num_create_threads;
55 module_param(oss_num_create_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
57
58 static char *oss_cpts;
59 module_param(oss_cpts, charp, 0444);
60 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
61
62 static char *oss_io_cpts;
63 module_param(oss_io_cpts, charp, 0444);
64 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
65
66 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
67
68 static struct cfs_cpt_table     *ost_io_cptable;
69
70 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
71 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
72 {
73         static struct ptlrpc_service_conf       svc_conf;
74         struct ost_obd *ost = &obd->u.ost;
75         nodemask_t              *mask;
76         int rc;
77         ENTRY;
78
79         rc = lprocfs_obd_setup(obd, true);
80         if (rc)
81                 return rc;
82
83         mutex_init(&ost->ost_health_mutex);
84
85         svc_conf = (typeof(svc_conf)) {
86                 .psc_name               = LUSTRE_OSS_NAME,
87                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
88                 .psc_buf                = {
89                         .bc_nbufs               = OST_NBUFS,
90                         .bc_buf_size            = OST_BUFSIZE,
91                         .bc_req_max_size        = OST_MAXREQSIZE,
92                         .bc_rep_max_size        = OST_MAXREPSIZE,
93                         .bc_req_portal          = OST_REQUEST_PORTAL,
94                         .bc_rep_portal          = OSC_REPLY_PORTAL,
95                 },
96                 .psc_thr                = {
97                         .tc_thr_name            = "ll_ost",
98                         .tc_thr_factor          = OSS_THR_FACTOR,
99                         .tc_nthrs_init          = OSS_NTHRS_INIT,
100                         .tc_nthrs_base          = OSS_NTHRS_BASE,
101                         .tc_nthrs_max           = oss_max_threads,
102                         .tc_nthrs_user          = oss_num_threads,
103                         .tc_cpu_affinity        = 1,
104                         .tc_ctx_tags            = LCT_DT_THREAD,
105                 },
106                 .psc_cpt                = {
107                         .cc_pattern             = oss_cpts,
108                 },
109                 .psc_ops                = {
110                         .so_req_handler         = tgt_request_handle,
111                         .so_req_printer         = target_print_req,
112                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
113                 },
114         };
115         ost->ost_service = ptlrpc_register_service(&svc_conf,
116                                                    &obd->obd_kset,
117                                                    obd->obd_proc_entry);
118         if (IS_ERR(ost->ost_service)) {
119                 rc = PTR_ERR(ost->ost_service);
120                 CERROR("failed to start service: %d\n", rc);
121                 GOTO(out_lprocfs, rc);
122         }
123
124         memset(&svc_conf, 0, sizeof(svc_conf));
125         svc_conf = (typeof(svc_conf)) {
126                 .psc_name               = "ost_create",
127                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
128                 .psc_buf                = {
129                         .bc_nbufs               = OST_NBUFS,
130                         .bc_buf_size            = OST_BUFSIZE,
131                         .bc_req_max_size        = OST_MAXREQSIZE,
132                         .bc_rep_max_size        = OST_MAXREPSIZE,
133                         .bc_req_portal          = OST_CREATE_PORTAL,
134                         .bc_rep_portal          = OSC_REPLY_PORTAL,
135                 },
136                 .psc_thr                = {
137                         .tc_thr_name            = "ll_ost_create",
138                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
139                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
140                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
141                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
142                         .tc_nthrs_user          = oss_num_create_threads,
143                         .tc_cpu_affinity        = 1,
144                         .tc_ctx_tags            = LCT_DT_THREAD,
145                 },
146                 .psc_cpt                = {
147                         .cc_pattern             = oss_cpts,
148                 },
149                 .psc_ops                = {
150                         .so_req_handler         = tgt_request_handle,
151                         .so_req_printer         = target_print_req,
152                 },
153         };
154         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
155                                                           &obd->obd_kset,
156                                                           obd->obd_proc_entry);
157         if (IS_ERR(ost->ost_create_service)) {
158                 rc = PTR_ERR(ost->ost_create_service);
159                 CERROR("failed to start OST create service: %d\n", rc);
160                 GOTO(out_service, rc);
161         }
162
163         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
164         /* event CPT feature is disabled in libcfs level by set partition
165          * number to 1, we still want to set node affinity for io service */
166         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
167                 int     cpt = 0;
168                 int     i;
169
170                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
171                 for_each_node_mask(i, *mask) {
172                         if (ost_io_cptable == NULL) {
173                                 CWARN("OSS failed to create CPT table\n");
174                                 break;
175                         }
176
177                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
178                         if (!rc) {
179                                 CWARN("OSS Failed to set node %d for"
180                                       "IO CPT table\n", i);
181                                 cfs_cpt_table_free(ost_io_cptable);
182                                 ost_io_cptable = NULL;
183                                 break;
184                         }
185                 }
186         }
187
188         memset(&svc_conf, 0, sizeof(svc_conf));
189         svc_conf = (typeof(svc_conf)) {
190                 .psc_name               = "ost_io",
191                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
192                 .psc_buf                = {
193                         .bc_nbufs               = OST_NBUFS,
194                         .bc_buf_size            = OST_IO_BUFSIZE,
195                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
196                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
197                         .bc_req_portal          = OST_IO_PORTAL,
198                         .bc_rep_portal          = OSC_REPLY_PORTAL,
199                 },
200                 .psc_thr                = {
201                         .tc_thr_name            = "ll_ost_io",
202                         .tc_thr_factor          = OSS_THR_FACTOR,
203                         .tc_nthrs_init          = OSS_NTHRS_INIT,
204                         .tc_nthrs_base          = OSS_NTHRS_BASE,
205                         .tc_nthrs_max           = oss_max_threads,
206                         .tc_nthrs_user          = oss_num_threads,
207                         .tc_cpu_affinity        = 1,
208                         .tc_ctx_tags            = LCT_DT_THREAD,
209                 },
210                 .psc_cpt                = {
211                         .cc_cptable             = ost_io_cptable,
212                         .cc_pattern             = ost_io_cptable == NULL ?
213                                                   oss_io_cpts : NULL,
214                 },
215                 .psc_ops                = {
216                         .so_thr_init            = tgt_io_thread_init,
217                         .so_thr_done            = tgt_io_thread_done,
218                         .so_req_handler         = tgt_request_handle,
219                         .so_hpreq_handler       = tgt_hpreq_handler,
220                         .so_req_printer         = target_print_req,
221                 },
222         };
223         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
224                                                       &obd->obd_kset,
225                                                       obd->obd_proc_entry);
226         if (IS_ERR(ost->ost_io_service)) {
227                 rc = PTR_ERR(ost->ost_io_service);
228                 CERROR("failed to start OST I/O service: %d\n", rc);
229                 ost->ost_io_service = NULL;
230                 GOTO(out_create, rc);
231         }
232
233         memset(&svc_conf, 0, sizeof(svc_conf));
234         svc_conf = (typeof(svc_conf)) {
235                 .psc_name               = "ost_seq",
236                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
237                 .psc_buf                = {
238                         .bc_nbufs               = OST_NBUFS,
239                         .bc_buf_size            = OST_BUFSIZE,
240                         .bc_req_max_size        = OST_MAXREQSIZE,
241                         .bc_rep_max_size        = OST_MAXREPSIZE,
242                         .bc_req_portal          = SEQ_DATA_PORTAL,
243                         .bc_rep_portal          = OSC_REPLY_PORTAL,
244                 },
245                 .psc_thr                = {
246                         .tc_thr_name            = "ll_ost_seq",
247                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
248                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
249                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
250                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
251                         .tc_nthrs_user          = oss_num_create_threads,
252                         .tc_cpu_affinity        = 1,
253                         .tc_ctx_tags            = LCT_DT_THREAD,
254                 },
255
256                 .psc_cpt                = {
257                         .cc_pattern          = oss_cpts,
258                 },
259                 .psc_ops                = {
260                         .so_req_handler         = tgt_request_handle,
261                         .so_req_printer         = target_print_req,
262                         .so_hpreq_handler       = NULL,
263                 },
264         };
265         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
266                                                        &obd->obd_kset,
267                                                        obd->obd_proc_entry);
268         if (IS_ERR(ost->ost_seq_service)) {
269                 rc = PTR_ERR(ost->ost_seq_service);
270                 CERROR("failed to start OST seq service: %d\n", rc);
271                 ost->ost_seq_service = NULL;
272                 GOTO(out_io, rc);
273         }
274
275         /* Object update service */
276         memset(&svc_conf, 0, sizeof(svc_conf));
277         svc_conf = (typeof(svc_conf)) {
278                 .psc_name               = "ost_out",
279                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
280                 .psc_buf                = {
281                         .bc_nbufs               = OST_NBUFS,
282                         .bc_buf_size            = OUT_BUFSIZE,
283                         .bc_req_max_size        = OUT_MAXREQSIZE,
284                         .bc_rep_max_size        = OUT_MAXREPSIZE,
285                         .bc_req_portal          = OUT_PORTAL,
286                         .bc_rep_portal          = OSC_REPLY_PORTAL,
287                 },
288                 /*
289                  * We'd like to have a mechanism to set this on a per-device
290                  * basis, but alas...
291                  */
292                 .psc_thr                = {
293                         .tc_thr_name            = "ll_ost_out",
294                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
295                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
296                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
297                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
298                         .tc_nthrs_user          = oss_num_create_threads,
299                         .tc_cpu_affinity        = 1,
300                         .tc_ctx_tags            = LCT_MD_THREAD |
301                                                   LCT_DT_THREAD,
302                 },
303                 .psc_cpt                = {
304                         .cc_pattern             = oss_cpts,
305                 },
306                 .psc_ops                = {
307                         .so_req_handler         = tgt_request_handle,
308                         .so_req_printer         = target_print_req,
309                         .so_hpreq_handler       = NULL,
310                 },
311         };
312         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
313                                                        &obd->obd_kset,
314                                                        obd->obd_proc_entry);
315         if (IS_ERR(ost->ost_out_service)) {
316                 rc = PTR_ERR(ost->ost_out_service);
317                 CERROR("failed to start out service: %d\n", rc);
318                 ost->ost_out_service = NULL;
319                 GOTO(out_seq, rc);
320         }
321
322         ping_evictor_start();
323
324         RETURN(0);
325
326 out_seq:
327         ptlrpc_unregister_service(ost->ost_seq_service);
328         ost->ost_seq_service = NULL;
329 out_io:
330         ptlrpc_unregister_service(ost->ost_io_service);
331         ost->ost_io_service = NULL;
332 out_create:
333         ptlrpc_unregister_service(ost->ost_create_service);
334         ost->ost_create_service = NULL;
335 out_service:
336         ptlrpc_unregister_service(ost->ost_service);
337         ost->ost_service = NULL;
338 out_lprocfs:
339         lprocfs_obd_cleanup(obd);
340         RETURN(rc);
341 }
342
343 static int ost_cleanup(struct obd_device *obd)
344 {
345         struct ost_obd *ost = &obd->u.ost;
346         int err = 0;
347         ENTRY;
348
349         ping_evictor_stop();
350
351         /* there is no recovery for OST OBD, all recovery is controlled by
352          * obdfilter OBD */
353         LASSERT(obd->obd_recovering == 0);
354         mutex_lock(&ost->ost_health_mutex);
355         ptlrpc_unregister_service(ost->ost_service);
356         ptlrpc_unregister_service(ost->ost_create_service);
357         ptlrpc_unregister_service(ost->ost_io_service);
358         ptlrpc_unregister_service(ost->ost_seq_service);
359         ptlrpc_unregister_service(ost->ost_out_service);
360
361         ost->ost_service = NULL;
362         ost->ost_create_service = NULL;
363         ost->ost_io_service = NULL;
364         ost->ost_seq_service = NULL;
365         ost->ost_out_service = NULL;
366
367         mutex_unlock(&ost->ost_health_mutex);
368
369         lprocfs_obd_cleanup(obd);
370
371         if (ost_io_cptable != NULL) {
372                 cfs_cpt_table_free(ost_io_cptable);
373                 ost_io_cptable = NULL;
374         }
375
376         RETURN(err);
377 }
378
379 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
380 {
381         struct ost_obd *ost = &obd->u.ost;
382         int rc = 0;
383
384         mutex_lock(&ost->ost_health_mutex);
385         rc |= ptlrpc_service_health_check(ost->ost_service);
386         rc |= ptlrpc_service_health_check(ost->ost_create_service);
387         rc |= ptlrpc_service_health_check(ost->ost_io_service);
388         rc |= ptlrpc_service_health_check(ost->ost_seq_service);
389         mutex_unlock(&ost->ost_health_mutex);
390
391         return rc != 0 ? 1 : 0;
392 }
393
394 /* use obd ops to offer management infrastructure */
395 static struct obd_ops ost_obd_ops = {
396         .o_owner        = THIS_MODULE,
397         .o_setup        = ost_setup,
398         .o_cleanup      = ost_cleanup,
399         .o_health_check = ost_health_check,
400 };
401
402
403 static int __init ost_init(void)
404 {
405         int rc;
406
407         ENTRY;
408
409         rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
410                                  LUSTRE_OSS_NAME, NULL);
411
412         RETURN(rc);
413 }
414
415 static void __exit ost_exit(void)
416 {
417         class_unregister_type(LUSTRE_OSS_NAME);
418 }
419
420 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
421 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
422 MODULE_VERSION(LUSTRE_VERSION_STRING);
423 MODULE_LICENSE("GPL");
424
425 module_init(ost_init);
426 module_exit(ost_exit);