4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/ost/ost_handler.c
34 * Author: Peter J. Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
38 #define DEBUG_SUBSYSTEM S_OST
40 #include <linux/module.h>
41 #include <lustre_dlm.h>
42 #include <lprocfs_status.h>
43 #include <obd_class.h>
44 #include "ost_internal.h"
46 int oss_max_threads = 512;
47 module_param(oss_max_threads, int, 0444);
48 MODULE_PARM_DESC(oss_max_threads, "maximum number of OSS service threads");
50 static int oss_num_threads;
51 module_param(oss_num_threads, int, 0444);
52 MODULE_PARM_DESC(oss_num_threads, "number of OSS service threads to start");
54 static int oss_num_create_threads;
55 module_param(oss_num_create_threads, int, 0444);
56 MODULE_PARM_DESC(oss_num_create_threads, "number of OSS create threads to start");
58 static char *oss_cpts;
59 module_param(oss_cpts, charp, 0444);
60 MODULE_PARM_DESC(oss_cpts, "CPU partitions OSS threads should run on");
62 static char *oss_io_cpts;
63 module_param(oss_io_cpts, charp, 0444);
64 MODULE_PARM_DESC(oss_io_cpts, "CPU partitions OSS IO threads should run on");
66 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
68 static struct cfs_cpt_table *ost_io_cptable;
70 static struct kset *oss_kset;
72 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
73 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
75 static struct ptlrpc_service_conf svc_conf;
76 struct ost_obd *ost = &obd->u.ost;
81 rc = lprocfs_kset_register(obd, &oss_kset);
85 mutex_init(&ost->ost_health_mutex);
87 svc_conf = (typeof(svc_conf)) {
88 .psc_name = LUSTRE_OSS_NAME,
89 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
91 .bc_nbufs = OST_NBUFS,
92 .bc_buf_size = OST_BUFSIZE,
93 .bc_req_max_size = OST_MAXREQSIZE,
94 .bc_rep_max_size = OST_MAXREPSIZE,
95 .bc_req_portal = OST_REQUEST_PORTAL,
96 .bc_rep_portal = OSC_REPLY_PORTAL,
99 .tc_thr_name = "ll_ost",
100 .tc_thr_factor = OSS_THR_FACTOR,
101 .tc_nthrs_init = OSS_NTHRS_INIT,
102 .tc_nthrs_base = OSS_NTHRS_BASE,
103 .tc_nthrs_max = oss_max_threads,
104 .tc_nthrs_user = oss_num_threads,
105 .tc_cpu_affinity = 1,
106 .tc_ctx_tags = LCT_DT_THREAD,
109 .cc_pattern = oss_cpts,
112 .so_req_handler = tgt_request_handle,
113 .so_req_printer = target_print_req,
114 .so_hpreq_handler = ptlrpc_hpreq_handler,
117 ost->ost_service = ptlrpc_register_service(&svc_conf, oss_kset,
118 obd->obd_proc_entry);
119 if (IS_ERR(ost->ost_service)) {
120 rc = PTR_ERR(ost->ost_service);
121 CERROR("failed to start service: %d\n", rc);
122 GOTO(out_lprocfs, rc);
125 memset(&svc_conf, 0, sizeof(svc_conf));
126 svc_conf = (typeof(svc_conf)) {
127 .psc_name = "ost_create",
128 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
130 .bc_nbufs = OST_NBUFS,
131 .bc_buf_size = OST_BUFSIZE,
132 .bc_req_max_size = OST_MAXREQSIZE,
133 .bc_rep_max_size = OST_MAXREPSIZE,
134 .bc_req_portal = OST_CREATE_PORTAL,
135 .bc_rep_portal = OSC_REPLY_PORTAL,
138 .tc_thr_name = "ll_ost_create",
139 .tc_thr_factor = OSS_CR_THR_FACTOR,
140 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
141 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
142 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
143 .tc_nthrs_user = oss_num_create_threads,
144 .tc_cpu_affinity = 1,
145 .tc_ctx_tags = LCT_DT_THREAD,
148 .cc_pattern = oss_cpts,
151 .so_req_handler = tgt_request_handle,
152 .so_req_printer = target_print_req,
155 ost->ost_create_service = ptlrpc_register_service(&svc_conf, oss_kset,
156 obd->obd_proc_entry);
157 if (IS_ERR(ost->ost_create_service)) {
158 rc = PTR_ERR(ost->ost_create_service);
159 CERROR("failed to start OST create service: %d\n", rc);
160 GOTO(out_service, rc);
163 mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
164 /* event CPT feature is disabled in libcfs level by set partition
165 * number to 1, we still want to set node affinity for io service */
166 if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
170 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
171 for_each_node_mask(i, *mask) {
172 if (ost_io_cptable == NULL) {
173 CWARN("OSS failed to create CPT table\n");
177 rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
179 CWARN("OSS Failed to set node %d for"
180 "IO CPT table\n", i);
181 cfs_cpt_table_free(ost_io_cptable);
182 ost_io_cptable = NULL;
188 memset(&svc_conf, 0, sizeof(svc_conf));
189 svc_conf = (typeof(svc_conf)) {
190 .psc_name = "ost_io",
191 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
193 .bc_nbufs = OST_NBUFS,
194 .bc_buf_size = OST_IO_BUFSIZE,
195 .bc_req_max_size = OST_IO_MAXREQSIZE,
196 .bc_rep_max_size = OST_IO_MAXREPSIZE,
197 .bc_req_portal = OST_IO_PORTAL,
198 .bc_rep_portal = OSC_REPLY_PORTAL,
201 .tc_thr_name = "ll_ost_io",
202 .tc_thr_factor = OSS_THR_FACTOR,
203 .tc_nthrs_init = OSS_NTHRS_INIT,
204 .tc_nthrs_base = OSS_NTHRS_BASE,
205 .tc_nthrs_max = oss_max_threads,
206 .tc_nthrs_user = oss_num_threads,
207 .tc_cpu_affinity = 1,
208 .tc_ctx_tags = LCT_DT_THREAD,
211 .cc_cptable = ost_io_cptable,
212 .cc_pattern = ost_io_cptable == NULL ?
216 .so_thr_init = tgt_io_thread_init,
217 .so_thr_done = tgt_io_thread_done,
218 .so_req_handler = tgt_request_handle,
219 .so_hpreq_handler = tgt_hpreq_handler,
220 .so_req_printer = target_print_req,
223 ost->ost_io_service = ptlrpc_register_service(&svc_conf, oss_kset,
224 obd->obd_proc_entry);
225 if (IS_ERR(ost->ost_io_service)) {
226 rc = PTR_ERR(ost->ost_io_service);
227 CERROR("failed to start OST I/O service: %d\n", rc);
228 ost->ost_io_service = NULL;
229 GOTO(out_create, rc);
232 memset(&svc_conf, 0, sizeof(svc_conf));
233 svc_conf = (typeof(svc_conf)) {
234 .psc_name = "ost_seq",
235 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
237 .bc_nbufs = OST_NBUFS,
238 .bc_buf_size = OST_BUFSIZE,
239 .bc_req_max_size = OST_MAXREQSIZE,
240 .bc_rep_max_size = OST_MAXREPSIZE,
241 .bc_req_portal = SEQ_DATA_PORTAL,
242 .bc_rep_portal = OSC_REPLY_PORTAL,
245 .tc_thr_name = "ll_ost_seq",
246 .tc_thr_factor = OSS_CR_THR_FACTOR,
247 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
248 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
249 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
250 .tc_nthrs_user = oss_num_create_threads,
251 .tc_cpu_affinity = 1,
252 .tc_ctx_tags = LCT_DT_THREAD,
256 .cc_pattern = oss_cpts,
259 .so_req_handler = tgt_request_handle,
260 .so_req_printer = target_print_req,
261 .so_hpreq_handler = NULL,
264 ost->ost_seq_service = ptlrpc_register_service(&svc_conf, oss_kset,
265 obd->obd_proc_entry);
266 if (IS_ERR(ost->ost_seq_service)) {
267 rc = PTR_ERR(ost->ost_seq_service);
268 CERROR("failed to start OST seq service: %d\n", rc);
269 ost->ost_seq_service = NULL;
273 /* Object update service */
274 memset(&svc_conf, 0, sizeof(svc_conf));
275 svc_conf = (typeof(svc_conf)) {
276 .psc_name = "ost_out",
277 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
279 .bc_nbufs = OST_NBUFS,
280 .bc_buf_size = OUT_BUFSIZE,
281 .bc_req_max_size = OUT_MAXREQSIZE,
282 .bc_rep_max_size = OUT_MAXREPSIZE,
283 .bc_req_portal = OUT_PORTAL,
284 .bc_rep_portal = OSC_REPLY_PORTAL,
287 * We'd like to have a mechanism to set this on a per-device
291 .tc_thr_name = "ll_ost_out",
292 .tc_thr_factor = OSS_CR_THR_FACTOR,
293 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
294 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
295 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
296 .tc_nthrs_user = oss_num_create_threads,
297 .tc_cpu_affinity = 1,
298 .tc_ctx_tags = LCT_MD_THREAD |
302 .cc_pattern = oss_cpts,
305 .so_req_handler = tgt_request_handle,
306 .so_req_printer = target_print_req,
307 .so_hpreq_handler = NULL,
310 ost->ost_out_service = ptlrpc_register_service(&svc_conf, oss_kset,
311 obd->obd_proc_entry);
312 if (IS_ERR(ost->ost_out_service)) {
313 rc = PTR_ERR(ost->ost_out_service);
314 CERROR("failed to start out service: %d\n", rc);
315 ost->ost_out_service = NULL;
319 ping_evictor_start();
324 ptlrpc_unregister_service(ost->ost_seq_service);
325 ost->ost_seq_service = NULL;
327 ptlrpc_unregister_service(ost->ost_io_service);
328 ost->ost_io_service = NULL;
330 ptlrpc_unregister_service(ost->ost_create_service);
331 ost->ost_create_service = NULL;
333 ptlrpc_unregister_service(ost->ost_service);
334 ost->ost_service = NULL;
336 lprocfs_kset_unregister(obd, oss_kset);
340 static int ost_cleanup(struct obd_device *obd)
342 struct ost_obd *ost = &obd->u.ost;
348 /* there is no recovery for OST OBD, all recovery is controlled by
350 LASSERT(obd->obd_recovering == 0);
351 mutex_lock(&ost->ost_health_mutex);
352 ptlrpc_unregister_service(ost->ost_service);
353 ptlrpc_unregister_service(ost->ost_create_service);
354 ptlrpc_unregister_service(ost->ost_io_service);
355 ptlrpc_unregister_service(ost->ost_seq_service);
356 ptlrpc_unregister_service(ost->ost_out_service);
358 ost->ost_service = NULL;
359 ost->ost_create_service = NULL;
360 ost->ost_io_service = NULL;
361 ost->ost_seq_service = NULL;
362 ost->ost_out_service = NULL;
364 mutex_unlock(&ost->ost_health_mutex);
366 lprocfs_kset_unregister(obd, oss_kset);
368 if (ost_io_cptable != NULL) {
369 cfs_cpt_table_free(ost_io_cptable);
370 ost_io_cptable = NULL;
376 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
378 struct ost_obd *ost = &obd->u.ost;
381 mutex_lock(&ost->ost_health_mutex);
382 rc |= ptlrpc_service_health_check(ost->ost_service);
383 rc |= ptlrpc_service_health_check(ost->ost_create_service);
384 rc |= ptlrpc_service_health_check(ost->ost_io_service);
385 rc |= ptlrpc_service_health_check(ost->ost_seq_service);
386 mutex_unlock(&ost->ost_health_mutex);
388 return rc != 0 ? 1 : 0;
391 /* use obd ops to offer management infrastructure */
392 static struct obd_ops ost_obd_ops = {
393 .o_owner = THIS_MODULE,
394 .o_setup = ost_setup,
395 .o_cleanup = ost_cleanup,
396 .o_health_check = ost_health_check,
400 static int __init ost_init(void)
406 rc = class_register_type(&ost_obd_ops, NULL, true, NULL,
407 LUSTRE_OSS_NAME, NULL);
412 static void __exit ost_exit(void)
414 class_unregister_type(LUSTRE_OSS_NAME);
417 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
418 MODULE_DESCRIPTION("Lustre Object Storage Target (OST)");
419 MODULE_VERSION(LUSTRE_VERSION_STRING);
420 MODULE_LICENSE("GPL");
422 module_init(ost_init);
423 module_exit(ost_exit);