4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/ost/ost_handler.c
34 * Author: Peter J. Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
38 #define DEBUG_SUBSYSTEM S_OST
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
54 static unsigned int oss_cpu_bind = 1;
55 module_param(oss_cpu_bind, uint, 0444);
56 MODULE_PARM_DESC(oss_cpu_bind,
57 "bind OSS service threads to particular CPU partitions");
59 static int oss_num_create_threads;
60 module_param(oss_num_create_threads, int, 0444);
61 MODULE_PARM_DESC(oss_num_create_threads,
62 "number of OSS create threads to start");
64 static unsigned int oss_create_cpu_bind = 1;
65 module_param(oss_create_cpu_bind, uint, 0444);
66 MODULE_PARM_DESC(oss_create_cpu_bind,
67 "bind OSS create threads to particular CPU partitions");
69 static char *oss_cpts;
70 module_param(oss_cpts, charp, 0444);
71 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
73 static char *oss_io_cpts;
74 module_param(oss_io_cpts, charp, 0444);
75 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
77 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
79 static struct cfs_cpt_table *ost_io_cptable;
81 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
82 static int ost_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
84 static struct ptlrpc_service_conf svc_conf;
85 struct ost_obd *ost = &obd->u.ost;
91 rc = lprocfs_obd_setup(obd, true);
95 mutex_init(&ost->ost_health_mutex);
97 svc_conf = (typeof(svc_conf)) {
98 .psc_name = LUSTRE_OSS_NAME,
99 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
101 .bc_nbufs = OST_NBUFS,
102 .bc_buf_size = OST_BUFSIZE,
103 .bc_req_max_size = OST_MAXREQSIZE,
104 .bc_rep_max_size = OST_MAXREPSIZE,
105 .bc_req_portal = OST_REQUEST_PORTAL,
106 .bc_rep_portal = OSC_REPLY_PORTAL,
109 .tc_thr_name = "ll_ost",
110 .tc_thr_factor = OSS_THR_FACTOR,
111 .tc_nthrs_init = OSS_NTHRS_INIT,
112 .tc_nthrs_base = OSS_NTHRS_BASE,
113 .tc_nthrs_max = oss_max_threads,
114 .tc_nthrs_user = oss_num_threads,
115 .tc_cpu_bind = oss_cpu_bind,
116 .tc_ctx_tags = LCT_DT_THREAD,
119 .cc_pattern = oss_cpts,
123 .so_req_handler = tgt_request_handle,
124 .so_req_printer = target_print_req,
125 .so_hpreq_handler = ptlrpc_hpreq_handler,
128 ost->ost_service = ptlrpc_register_service(&svc_conf,
130 obd->obd_debugfs_entry);
131 if (IS_ERR(ost->ost_service)) {
132 rc = PTR_ERR(ost->ost_service);
133 CERROR("failed to start service: %d\n", rc);
134 GOTO(out_lprocfs, rc);
137 memset(&svc_conf, 0, sizeof(svc_conf));
138 svc_conf = (typeof(svc_conf)) {
139 .psc_name = "ost_create",
140 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
142 .bc_nbufs = OST_NBUFS,
143 .bc_buf_size = OST_BUFSIZE,
144 .bc_req_max_size = OST_MAXREQSIZE,
145 .bc_rep_max_size = OST_MAXREPSIZE,
146 .bc_req_portal = OST_CREATE_PORTAL,
147 .bc_rep_portal = OSC_REPLY_PORTAL,
150 .tc_thr_name = "ll_ost_create",
151 .tc_thr_factor = OSS_CR_THR_FACTOR,
152 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
153 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
154 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
155 .tc_nthrs_user = oss_num_create_threads,
156 .tc_cpu_bind = oss_create_cpu_bind,
157 .tc_ctx_tags = LCT_DT_THREAD,
160 .cc_pattern = oss_cpts,
164 .so_req_handler = tgt_request_handle,
165 .so_req_printer = target_print_req,
168 ost->ost_create_service = ptlrpc_register_service(&svc_conf,
170 obd->obd_debugfs_entry
172 if (IS_ERR(ost->ost_create_service)) {
173 rc = PTR_ERR(ost->ost_create_service);
174 CERROR("failed to start OST create service: %d\n", rc);
175 GOTO(out_service, rc);
178 mask = cfs_cpt_nodemask(cfs_cpt_tab, CFS_CPT_ANY);
179 /* event CPT feature is disabled in libcfs level by set partition
180 * number to 1, we still want to set node affinity for io service
182 if (cfs_cpt_number(cfs_cpt_tab) == 1 && nodes_weight(*mask) > 1) {
186 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
187 for_each_node_mask(i, *mask) {
188 if (!ost_io_cptable) {
189 CWARN("OSS failed to create CPT table\n");
193 rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
195 CWARN("OSS Failed to set node %d for IO CPT table\n",
197 cfs_cpt_table_free(ost_io_cptable);
198 ost_io_cptable = NULL;
204 memset(&svc_conf, 0, sizeof(svc_conf));
205 svc_conf = (typeof(svc_conf)) {
206 .psc_name = "ost_io",
207 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
209 .bc_nbufs = OST_NBUFS,
210 .bc_buf_size = OST_IO_BUFSIZE,
211 .bc_req_max_size = OST_IO_MAXREQSIZE,
212 .bc_rep_max_size = OST_IO_MAXREPSIZE,
213 .bc_req_portal = OST_IO_PORTAL,
214 .bc_rep_portal = OSC_REPLY_PORTAL,
217 .tc_thr_name = "ll_ost_io",
218 .tc_thr_factor = OSS_THR_FACTOR,
219 .tc_nthrs_init = OSS_NTHRS_INIT,
220 .tc_nthrs_base = OSS_NTHRS_BASE,
221 .tc_nthrs_max = oss_max_threads,
222 .tc_nthrs_user = oss_num_threads,
223 .tc_cpu_bind = oss_cpu_bind,
224 .tc_ctx_tags = LCT_DT_THREAD,
227 .cc_cptable = ost_io_cptable,
228 .cc_pattern = ost_io_cptable == NULL ?
233 .so_thr_init = tgt_io_thread_init,
234 .so_thr_done = tgt_io_thread_done,
235 .so_req_handler = tgt_request_handle,
236 .so_hpreq_handler = tgt_hpreq_handler,
237 .so_req_printer = target_print_req,
240 ost->ost_io_service = ptlrpc_register_service(&svc_conf,
242 obd->obd_debugfs_entry);
243 if (IS_ERR(ost->ost_io_service)) {
244 rc = PTR_ERR(ost->ost_io_service);
245 CERROR("failed to start OST I/O service: %d\n", rc);
246 ost->ost_io_service = NULL;
247 GOTO(out_create, rc);
250 memset(&svc_conf, 0, sizeof(svc_conf));
251 svc_conf = (typeof(svc_conf)) {
252 .psc_name = "ost_seq",
253 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
255 .bc_nbufs = OST_NBUFS,
256 .bc_buf_size = OST_BUFSIZE,
257 .bc_req_max_size = OST_MAXREQSIZE,
258 .bc_rep_max_size = OST_MAXREPSIZE,
259 .bc_req_portal = SEQ_DATA_PORTAL,
260 .bc_rep_portal = OSC_REPLY_PORTAL,
263 .tc_thr_name = "ll_ost_seq",
264 .tc_thr_factor = OSS_CR_THR_FACTOR,
265 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
266 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
267 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
268 .tc_nthrs_user = oss_num_create_threads,
269 .tc_cpu_bind = oss_create_cpu_bind,
270 .tc_ctx_tags = LCT_DT_THREAD,
274 .cc_pattern = oss_cpts,
278 .so_req_handler = tgt_request_handle,
279 .so_req_printer = target_print_req,
280 .so_hpreq_handler = NULL,
283 ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
285 obd->obd_debugfs_entry);
286 if (IS_ERR(ost->ost_seq_service)) {
287 rc = PTR_ERR(ost->ost_seq_service);
288 CERROR("failed to start OST seq service: %d\n", rc);
289 ost->ost_seq_service = NULL;
293 /* Object update service */
294 memset(&svc_conf, 0, sizeof(svc_conf));
295 svc_conf = (typeof(svc_conf)) {
296 .psc_name = "ost_out",
297 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
299 .bc_nbufs = OST_NBUFS,
300 .bc_buf_size = OUT_BUFSIZE,
301 .bc_req_max_size = OUT_MAXREQSIZE,
302 .bc_rep_max_size = OUT_MAXREPSIZE,
303 .bc_req_portal = OUT_PORTAL,
304 .bc_rep_portal = OSC_REPLY_PORTAL,
307 * We'd like to have a mechanism to set this on a per-device
311 .tc_thr_name = "ll_ost_out",
312 .tc_thr_factor = OSS_CR_THR_FACTOR,
313 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
314 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
315 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
316 .tc_nthrs_user = oss_num_create_threads,
317 .tc_cpu_bind = oss_create_cpu_bind,
318 .tc_ctx_tags = LCT_MD_THREAD |
322 .cc_pattern = oss_cpts,
326 .so_req_handler = tgt_request_handle,
327 .so_req_printer = target_print_req,
328 .so_hpreq_handler = NULL,
331 ost->ost_out_service = ptlrpc_register_service(&svc_conf,
333 obd->obd_debugfs_entry);
334 if (IS_ERR(ost->ost_out_service)) {
335 rc = PTR_ERR(ost->ost_out_service);
336 CERROR("failed to start out service: %d\n", rc);
337 ost->ost_out_service = NULL;
341 ping_evictor_start();
346 ptlrpc_unregister_service(ost->ost_seq_service);
347 ost->ost_seq_service = NULL;
349 ptlrpc_unregister_service(ost->ost_io_service);
350 ost->ost_io_service = NULL;
352 ptlrpc_unregister_service(ost->ost_create_service);
353 ost->ost_create_service = NULL;
355 ptlrpc_unregister_service(ost->ost_service);
356 ost->ost_service = NULL;
358 lprocfs_obd_cleanup(obd);
362 static int ost_cleanup(struct obd_device *obd)
364 struct ost_obd *ost = &obd->u.ost;
371 /* there is no recovery for OST OBD, all recovery is controlled by
374 LASSERT(obd->obd_recovering == 0);
375 mutex_lock(&ost->ost_health_mutex);
376 ptlrpc_unregister_service(ost->ost_service);
377 ptlrpc_unregister_service(ost->ost_create_service);
378 ptlrpc_unregister_service(ost->ost_io_service);
379 ptlrpc_unregister_service(ost->ost_seq_service);
380 ptlrpc_unregister_service(ost->ost_out_service);
382 ost->ost_service = NULL;
383 ost->ost_create_service = NULL;
384 ost->ost_io_service = NULL;
385 ost->ost_seq_service = NULL;
386 ost->ost_out_service = NULL;
388 mutex_unlock(&ost->ost_health_mutex);
390 lprocfs_obd_cleanup(obd);
392 if (ost_io_cptable) {
393 cfs_cpt_table_free(ost_io_cptable);
394 ost_io_cptable = NULL;
400 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
402 struct ost_obd *ost = &obd->u.ost;
405 mutex_lock(&ost->ost_health_mutex);
406 rc |= ptlrpc_service_health_check(ost->ost_service);
407 rc |= ptlrpc_service_health_check(ost->ost_create_service);
408 rc |= ptlrpc_service_health_check(ost->ost_io_service);
409 rc |= ptlrpc_service_health_check(ost->ost_seq_service);
410 mutex_unlock(&ost->ost_health_mutex);
412 return rc != 0 ? 1 : 0;
415 /* use obd ops to offer management infrastructure */
416 static const struct obd_ops ost_obd_ops = {
417 .o_owner = THIS_MODULE,
418 .o_setup = ost_setup,
419 .o_cleanup = ost_cleanup,
420 .o_health_check = ost_health_check,
423 static int __init ost_init(void)
429 rc = class_register_type(&ost_obd_ops, NULL, false, NULL,
430 LUSTRE_OSS_NAME, NULL);
435 static void __exit ost_exit(void)
437 class_unregister_type(LUSTRE_OSS_NAME);
440 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
441 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
442 MODULE_VERSION(LUSTRE_VERSION_STRING);
443 MODULE_LICENSE("GPL");
445 module_init(ost_init);
446 module_exit(ost_exit);