4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ost/ost_handler.c
38 * Author: Peter J. Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_OST
44 #include <linux/module.h>
46 #include <lustre_dlm.h>
47 #include <lprocfs_status.h>
48 #include "ost_internal.h"
50 static int oss_num_threads;
51 CFS_MODULE_PARM(oss_num_threads, "i", int, 0444,
52 "number of OSS service threads to start");
54 static int ost_num_threads;
55 CFS_MODULE_PARM(ost_num_threads, "i", int, 0444,
56 "number of OST service threads to start (deprecated)");
58 static int oss_num_create_threads;
59 CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444,
60 "number of OSS create threads to start");
62 static char *oss_cpts;
63 CFS_MODULE_PARM(oss_cpts, "s", charp, 0444,
64 "CPU partitions OSS threads should run on");
66 static char *oss_io_cpts;
67 CFS_MODULE_PARM(oss_io_cpts, "s", charp, 0444,
68 "CPU partitions OSS IO threads should run on");
70 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
72 static struct cfs_cpt_table *ost_io_cptable;
75 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
77 static struct lprocfs_seq_vars lprocfs_ost_obd_vars[] = {
78 { "uuid", &ost_uuid_fops },
83 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
84 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
86 static struct ptlrpc_service_conf svc_conf;
87 struct ost_obd *ost = &obd->u.ost;
92 rc = cfs_cleanup_group_info();
97 obd->obd_vars = lprocfs_ost_obd_vars;
98 lprocfs_seq_obd_setup(obd);
100 mutex_init(&ost->ost_health_mutex);
102 svc_conf = (typeof(svc_conf)) {
103 .psc_name = LUSTRE_OSS_NAME,
104 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
106 .bc_nbufs = OST_NBUFS,
107 .bc_buf_size = OST_BUFSIZE,
108 .bc_req_max_size = OST_MAXREQSIZE,
109 .bc_rep_max_size = OST_MAXREPSIZE,
110 .bc_req_portal = OST_REQUEST_PORTAL,
111 .bc_rep_portal = OSC_REPLY_PORTAL,
114 .tc_thr_name = "ll_ost",
115 .tc_thr_factor = OSS_THR_FACTOR,
116 .tc_nthrs_init = OSS_NTHRS_INIT,
117 .tc_nthrs_base = OSS_NTHRS_BASE,
118 .tc_nthrs_max = OSS_NTHRS_MAX,
119 .tc_nthrs_user = oss_num_threads,
120 .tc_cpu_affinity = 1,
121 .tc_ctx_tags = LCT_DT_THREAD,
124 .cc_pattern = oss_cpts,
127 .so_req_handler = tgt_request_handle,
128 .so_req_printer = target_print_req,
129 .so_hpreq_handler = ptlrpc_hpreq_handler,
132 ost->ost_service = ptlrpc_register_service(&svc_conf,
133 obd->obd_proc_entry);
134 if (IS_ERR(ost->ost_service)) {
135 rc = PTR_ERR(ost->ost_service);
136 CERROR("failed to start service: %d\n", rc);
137 GOTO(out_lprocfs, rc);
140 memset(&svc_conf, 0, sizeof(svc_conf));
141 svc_conf = (typeof(svc_conf)) {
142 .psc_name = "ost_create",
143 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
145 .bc_nbufs = OST_NBUFS,
146 .bc_buf_size = OST_BUFSIZE,
147 .bc_req_max_size = OST_MAXREQSIZE,
148 .bc_rep_max_size = OST_MAXREPSIZE,
149 .bc_req_portal = OST_CREATE_PORTAL,
150 .bc_rep_portal = OSC_REPLY_PORTAL,
153 .tc_thr_name = "ll_ost_create",
154 .tc_thr_factor = OSS_CR_THR_FACTOR,
155 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
156 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
157 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
158 .tc_nthrs_user = oss_num_create_threads,
159 .tc_cpu_affinity = 1,
160 .tc_ctx_tags = LCT_DT_THREAD,
163 .cc_pattern = oss_cpts,
166 .so_req_handler = tgt_request_handle,
167 .so_req_printer = target_print_req,
170 ost->ost_create_service = ptlrpc_register_service(&svc_conf,
171 obd->obd_proc_entry);
172 if (IS_ERR(ost->ost_create_service)) {
173 rc = PTR_ERR(ost->ost_create_service);
174 CERROR("failed to start OST create service: %d\n", rc);
175 GOTO(out_service, rc);
178 mask = cfs_cpt_table->ctb_nodemask;
179 /* event CPT feature is disabled in libcfs level by set partition
180 * number to 1, we still want to set node affinity for io service */
181 if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
185 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
186 for_each_node_mask(i, *mask) {
187 if (ost_io_cptable == NULL) {
188 CWARN("OSS failed to create CPT table\n");
192 rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
194 CWARN("OSS Failed to set node %d for"
195 "IO CPT table\n", i);
196 cfs_cpt_table_free(ost_io_cptable);
197 ost_io_cptable = NULL;
203 memset(&svc_conf, 0, sizeof(svc_conf));
204 svc_conf = (typeof(svc_conf)) {
205 .psc_name = "ost_io",
206 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
208 .bc_nbufs = OST_NBUFS,
209 .bc_buf_size = OST_IO_BUFSIZE,
210 .bc_req_max_size = OST_IO_MAXREQSIZE,
211 .bc_rep_max_size = OST_IO_MAXREPSIZE,
212 .bc_req_portal = OST_IO_PORTAL,
213 .bc_rep_portal = OSC_REPLY_PORTAL,
216 .tc_thr_name = "ll_ost_io",
217 .tc_thr_factor = OSS_THR_FACTOR,
218 .tc_nthrs_init = OSS_NTHRS_INIT,
219 .tc_nthrs_base = OSS_NTHRS_BASE,
220 .tc_nthrs_max = OSS_NTHRS_MAX,
221 .tc_nthrs_user = oss_num_threads,
222 .tc_cpu_affinity = 1,
223 .tc_ctx_tags = LCT_DT_THREAD,
226 .cc_cptable = ost_io_cptable,
227 .cc_pattern = ost_io_cptable == NULL ?
231 .so_thr_init = tgt_io_thread_init,
232 .so_thr_done = tgt_io_thread_done,
233 .so_req_handler = tgt_request_handle,
234 .so_hpreq_handler = tgt_hpreq_handler,
235 .so_req_printer = target_print_req,
238 ost->ost_io_service = ptlrpc_register_service(&svc_conf,
239 obd->obd_proc_entry);
240 if (IS_ERR(ost->ost_io_service)) {
241 rc = PTR_ERR(ost->ost_io_service);
242 CERROR("failed to start OST I/O service: %d\n", rc);
243 ost->ost_io_service = NULL;
244 GOTO(out_create, rc);
247 memset(&svc_conf, 0, sizeof(svc_conf));
248 svc_conf = (typeof(svc_conf)) {
249 .psc_name = "ost_seq",
250 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
252 .bc_nbufs = OST_NBUFS,
253 .bc_buf_size = OST_BUFSIZE,
254 .bc_req_max_size = OST_MAXREQSIZE,
255 .bc_rep_max_size = OST_MAXREPSIZE,
256 .bc_req_portal = SEQ_DATA_PORTAL,
257 .bc_rep_portal = OSC_REPLY_PORTAL,
260 .tc_thr_name = "ll_ost_seq",
261 .tc_thr_factor = OSS_CR_THR_FACTOR,
262 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
263 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
264 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
265 .tc_nthrs_user = oss_num_create_threads,
266 .tc_cpu_affinity = 1,
267 .tc_ctx_tags = LCT_DT_THREAD,
271 .cc_pattern = oss_cpts,
274 .so_req_handler = tgt_request_handle,
275 .so_req_printer = target_print_req,
276 .so_hpreq_handler = NULL,
279 ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
280 obd->obd_proc_entry);
281 if (IS_ERR(ost->ost_seq_service)) {
282 rc = PTR_ERR(ost->ost_seq_service);
283 CERROR("failed to start OST seq service: %d\n", rc);
284 ost->ost_seq_service = NULL;
288 /* Object update service */
289 memset(&svc_conf, 0, sizeof(svc_conf));
290 svc_conf = (typeof(svc_conf)) {
291 .psc_name = "ost_out",
292 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
294 .bc_nbufs = OST_NBUFS,
295 .bc_buf_size = OUT_BUFSIZE,
296 .bc_req_max_size = OUT_MAXREQSIZE,
297 .bc_rep_max_size = OUT_MAXREPSIZE,
298 .bc_req_portal = OUT_PORTAL,
299 .bc_rep_portal = OSC_REPLY_PORTAL,
302 * We'd like to have a mechanism to set this on a per-device
306 .tc_thr_name = "ll_ost_out",
307 .tc_thr_factor = OSS_CR_THR_FACTOR,
308 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
309 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
310 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
311 .tc_nthrs_user = oss_num_create_threads,
312 .tc_cpu_affinity = 1,
313 .tc_ctx_tags = LCT_MD_THREAD |
317 .cc_pattern = oss_cpts,
320 .so_req_handler = tgt_request_handle,
321 .so_req_printer = target_print_req,
322 .so_hpreq_handler = NULL,
325 ost->ost_out_service = ptlrpc_register_service(&svc_conf,
326 obd->obd_proc_entry);
327 if (IS_ERR(ost->ost_out_service)) {
328 rc = PTR_ERR(ost->ost_out_service);
329 CERROR("failed to start out service: %d\n", rc);
330 ost->ost_out_service = NULL;
334 /* Index read service */
335 memset(&svc_conf, 0, sizeof(svc_conf));
336 svc_conf = (typeof(svc_conf)) {
337 .psc_name = "ost_idx_read",
338 .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
340 .bc_nbufs = OST_NBUFS,
341 .bc_buf_size = OST_BUFSIZE,
342 .bc_req_max_size = OST_MAXREQSIZE,
343 .bc_rep_max_size = OST_MAXREPSIZE,
344 .bc_req_portal = OST_IDX_PORTAL,
345 .bc_rep_portal = OSC_REPLY_PORTAL,
348 .tc_thr_name = "ll_ost_idx",
349 .tc_thr_factor = OSS_CR_THR_FACTOR,
350 .tc_nthrs_init = OSS_CR_NTHRS_INIT,
351 .tc_nthrs_base = OSS_CR_NTHRS_BASE,
352 .tc_nthrs_max = OSS_CR_NTHRS_MAX,
353 .tc_nthrs_user = oss_num_create_threads,
354 .tc_cpu_affinity = 1,
355 .tc_ctx_tags = LCT_DT_THREAD,
358 .cc_pattern = oss_cpts,
361 .so_req_handler = tgt_request_handle,
362 .so_req_printer = target_print_req,
365 ost->ost_idx_service = ptlrpc_register_service(&svc_conf,
366 obd->obd_proc_entry);
367 if (IS_ERR(ost->ost_idx_service)) {
368 rc = PTR_ERR(ost->ost_idx_service);
369 CERROR("failed to start OST index read service: rc = %d\n", rc);
370 ost->ost_idx_service = NULL;
374 ping_evictor_start();
379 ptlrpc_unregister_service(ost->ost_out_service);
380 ost->ost_out_service = NULL;
382 ptlrpc_unregister_service(ost->ost_seq_service);
383 ost->ost_seq_service = NULL;
385 ptlrpc_unregister_service(ost->ost_io_service);
386 ost->ost_io_service = NULL;
388 ptlrpc_unregister_service(ost->ost_create_service);
389 ost->ost_create_service = NULL;
391 ptlrpc_unregister_service(ost->ost_service);
392 ost->ost_service = NULL;
394 lprocfs_obd_cleanup(obd);
398 static int ost_cleanup(struct obd_device *obd)
400 struct ost_obd *ost = &obd->u.ost;
406 /* there is no recovery for OST OBD, all recovery is controlled by
408 LASSERT(obd->obd_recovering == 0);
409 mutex_lock(&ost->ost_health_mutex);
410 ptlrpc_unregister_service(ost->ost_service);
411 ptlrpc_unregister_service(ost->ost_create_service);
412 ptlrpc_unregister_service(ost->ost_io_service);
413 ptlrpc_unregister_service(ost->ost_seq_service);
414 ptlrpc_unregister_service(ost->ost_out_service);
415 ptlrpc_unregister_service(ost->ost_idx_service);
417 ost->ost_service = NULL;
418 ost->ost_create_service = NULL;
419 ost->ost_io_service = NULL;
420 ost->ost_seq_service = NULL;
421 ost->ost_out_service = NULL;
422 ost->ost_idx_service = NULL;
424 mutex_unlock(&ost->ost_health_mutex);
426 lprocfs_obd_cleanup(obd);
428 if (ost_io_cptable != NULL) {
429 cfs_cpt_table_free(ost_io_cptable);
430 ost_io_cptable = NULL;
436 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
438 struct ost_obd *ost = &obd->u.ost;
441 mutex_lock(&ost->ost_health_mutex);
442 rc |= ptlrpc_service_health_check(ost->ost_service);
443 rc |= ptlrpc_service_health_check(ost->ost_create_service);
444 rc |= ptlrpc_service_health_check(ost->ost_io_service);
445 mutex_unlock(&ost->ost_health_mutex);
448 * health_check to return 0 on healthy
449 * and 1 on unhealthy.
457 /* use obd ops to offer management infrastructure */
458 static struct obd_ops ost_obd_ops = {
459 .o_owner = THIS_MODULE,
460 .o_setup = ost_setup,
461 .o_cleanup = ost_cleanup,
462 .o_health_check = ost_health_check,
466 static int __init ost_init(void)
472 rc = class_register_type(&ost_obd_ops, NULL, NULL,
473 #ifndef HAVE_ONLY_PROCFS_SEQ
476 LUSTRE_OSS_NAME, NULL);
478 if (ost_num_threads != 0 && oss_num_threads == 0) {
479 LCONSOLE_INFO("ost_num_threads module parameter is deprecated, "
480 "use oss_num_threads instead or unset both for "
481 "dynamic thread startup\n");
482 oss_num_threads = ost_num_threads;
488 static void /*__exit*/ ost_exit(void)
490 class_unregister_type(LUSTRE_OSS_NAME);
493 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
494 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
495 MODULE_LICENSE("GPL");
497 module_init(ost_init);
498 module_exit(ost_exit);