Whamcloud - gitweb
baab8be4676df515fed40f342152ab00750a14b7
[fs/lustre-release.git] / lustre / ost / ost_handler.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ost/ost_handler.c
37  *
38  * Author: Peter J. Braam <braam@clusterfs.com>
39  * Author: Phil Schwan <phil@clusterfs.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_OST
43
44 #include <linux/module.h>
45 #include <obd_ost.h>
46 #include <lustre_dlm.h>
47 #include <lprocfs_status.h>
48 #include "ost_internal.h"
49
50 static int oss_num_threads;
51 CFS_MODULE_PARM(oss_num_threads, "i", int, 0444,
52                 "number of OSS service threads to start");
53
54 static int ost_num_threads;
55 CFS_MODULE_PARM(ost_num_threads, "i", int, 0444,
56                 "number of OST service threads to start (deprecated)");
57
58 static int oss_num_create_threads;
59 CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444,
60                 "number of OSS create threads to start");
61
62 static char *oss_cpts;
63 CFS_MODULE_PARM(oss_cpts, "s", charp, 0444,
64                 "CPU partitions OSS threads should run on");
65
66 static char *oss_io_cpts;
67 CFS_MODULE_PARM(oss_io_cpts, "s", charp, 0444,
68                 "CPU partitions OSS IO threads should run on");
69
70 #define OST_WATCHDOG_TIMEOUT (obd_timeout * 1000)
71
72 static struct cfs_cpt_table     *ost_io_cptable;
73
74 #ifdef LPROCFS
75 LPROC_SEQ_FOPS_RO_TYPE(ost, uuid);
76
77 static struct lprocfs_seq_vars lprocfs_ost_obd_vars[] = {
78         { .name =       "uuid",
79           .fops =       &ost_uuid_fops  },
80         { 0 }
81 };
82 #endif /* LPROCFS */
83
84 /* Sigh - really, this is an OSS, the _server_, not the _target_ */
85 static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
86 {
87         static struct ptlrpc_service_conf       svc_conf;
88         struct ost_obd *ost = &obd->u.ost;
89         nodemask_t              *mask;
90         int rc;
91         ENTRY;
92
93         rc = cfs_cleanup_group_info();
94         if (rc)
95                 RETURN(rc);
96
97 #ifdef LPROCFS
98         obd->obd_vars = lprocfs_ost_obd_vars;
99         lprocfs_seq_obd_setup(obd);
100 #endif
101         mutex_init(&ost->ost_health_mutex);
102
103         svc_conf = (typeof(svc_conf)) {
104                 .psc_name               = LUSTRE_OSS_NAME,
105                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
106                 .psc_buf                = {
107                         .bc_nbufs               = OST_NBUFS,
108                         .bc_buf_size            = OST_BUFSIZE,
109                         .bc_req_max_size        = OST_MAXREQSIZE,
110                         .bc_rep_max_size        = OST_MAXREPSIZE,
111                         .bc_req_portal          = OST_REQUEST_PORTAL,
112                         .bc_rep_portal          = OSC_REPLY_PORTAL,
113                 },
114                 .psc_thr                = {
115                         .tc_thr_name            = "ll_ost",
116                         .tc_thr_factor          = OSS_THR_FACTOR,
117                         .tc_nthrs_init          = OSS_NTHRS_INIT,
118                         .tc_nthrs_base          = OSS_NTHRS_BASE,
119                         .tc_nthrs_max           = OSS_NTHRS_MAX,
120                         .tc_nthrs_user          = oss_num_threads,
121                         .tc_cpu_affinity        = 1,
122                         .tc_ctx_tags            = LCT_DT_THREAD,
123                 },
124                 .psc_cpt                = {
125                         .cc_pattern             = oss_cpts,
126                 },
127                 .psc_ops                = {
128                         .so_req_handler         = tgt_request_handle,
129                         .so_req_printer         = target_print_req,
130                         .so_hpreq_handler       = ptlrpc_hpreq_handler,
131                 },
132         };
133         ost->ost_service = ptlrpc_register_service(&svc_conf,
134                                                    obd->obd_proc_entry);
135         if (IS_ERR(ost->ost_service)) {
136                 rc = PTR_ERR(ost->ost_service);
137                 CERROR("failed to start service: %d\n", rc);
138                 GOTO(out_lprocfs, rc);
139         }
140
141         memset(&svc_conf, 0, sizeof(svc_conf));
142         svc_conf = (typeof(svc_conf)) {
143                 .psc_name               = "ost_create",
144                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
145                 .psc_buf                = {
146                         .bc_nbufs               = OST_NBUFS,
147                         .bc_buf_size            = OST_BUFSIZE,
148                         .bc_req_max_size        = OST_MAXREQSIZE,
149                         .bc_rep_max_size        = OST_MAXREPSIZE,
150                         .bc_req_portal          = OST_CREATE_PORTAL,
151                         .bc_rep_portal          = OSC_REPLY_PORTAL,
152                 },
153                 .psc_thr                = {
154                         .tc_thr_name            = "ll_ost_create",
155                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
156                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
157                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
158                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
159                         .tc_nthrs_user          = oss_num_create_threads,
160                         .tc_cpu_affinity        = 1,
161                         .tc_ctx_tags            = LCT_DT_THREAD,
162                 },
163                 .psc_cpt                = {
164                         .cc_pattern             = oss_cpts,
165                 },
166                 .psc_ops                = {
167                         .so_req_handler         = tgt_request_handle,
168                         .so_req_printer         = target_print_req,
169                 },
170         };
171         ost->ost_create_service = ptlrpc_register_service(&svc_conf,
172                                                           obd->obd_proc_entry);
173         if (IS_ERR(ost->ost_create_service)) {
174                 rc = PTR_ERR(ost->ost_create_service);
175                 CERROR("failed to start OST create service: %d\n", rc);
176                 GOTO(out_service, rc);
177         }
178
179         mask = cfs_cpt_nodemask(cfs_cpt_table, CFS_CPT_ANY);
180         /* event CPT feature is disabled in libcfs level by set partition
181          * number to 1, we still want to set node affinity for io service */
182         if (cfs_cpt_number(cfs_cpt_table) == 1 && nodes_weight(*mask) > 1) {
183                 int     cpt = 0;
184                 int     i;
185
186                 ost_io_cptable = cfs_cpt_table_alloc(nodes_weight(*mask));
187                 for_each_node_mask(i, *mask) {
188                         if (ost_io_cptable == NULL) {
189                                 CWARN("OSS failed to create CPT table\n");
190                                 break;
191                         }
192
193                         rc = cfs_cpt_set_node(ost_io_cptable, cpt++, i);
194                         if (!rc) {
195                                 CWARN("OSS Failed to set node %d for"
196                                       "IO CPT table\n", i);
197                                 cfs_cpt_table_free(ost_io_cptable);
198                                 ost_io_cptable = NULL;
199                                 break;
200                         }
201                 }
202         }
203
204         memset(&svc_conf, 0, sizeof(svc_conf));
205         svc_conf = (typeof(svc_conf)) {
206                 .psc_name               = "ost_io",
207                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
208                 .psc_buf                = {
209                         .bc_nbufs               = OST_NBUFS,
210                         .bc_buf_size            = OST_IO_BUFSIZE,
211                         .bc_req_max_size        = OST_IO_MAXREQSIZE,
212                         .bc_rep_max_size        = OST_IO_MAXREPSIZE,
213                         .bc_req_portal          = OST_IO_PORTAL,
214                         .bc_rep_portal          = OSC_REPLY_PORTAL,
215                 },
216                 .psc_thr                = {
217                         .tc_thr_name            = "ll_ost_io",
218                         .tc_thr_factor          = OSS_THR_FACTOR,
219                         .tc_nthrs_init          = OSS_NTHRS_INIT,
220                         .tc_nthrs_base          = OSS_NTHRS_BASE,
221                         .tc_nthrs_max           = OSS_NTHRS_MAX,
222                         .tc_nthrs_user          = oss_num_threads,
223                         .tc_cpu_affinity        = 1,
224                         .tc_ctx_tags            = LCT_DT_THREAD,
225                 },
226                 .psc_cpt                = {
227                         .cc_cptable             = ost_io_cptable,
228                         .cc_pattern             = ost_io_cptable == NULL ?
229                                                   oss_io_cpts : NULL,
230                 },
231                 .psc_ops                = {
232                         .so_thr_init            = tgt_io_thread_init,
233                         .so_thr_done            = tgt_io_thread_done,
234                         .so_req_handler         = tgt_request_handle,
235                         .so_hpreq_handler       = tgt_hpreq_handler,
236                         .so_req_printer         = target_print_req,
237                 },
238         };
239         ost->ost_io_service = ptlrpc_register_service(&svc_conf,
240                                                       obd->obd_proc_entry);
241         if (IS_ERR(ost->ost_io_service)) {
242                 rc = PTR_ERR(ost->ost_io_service);
243                 CERROR("failed to start OST I/O service: %d\n", rc);
244                 ost->ost_io_service = NULL;
245                 GOTO(out_create, rc);
246         }
247
248         memset(&svc_conf, 0, sizeof(svc_conf));
249         svc_conf = (typeof(svc_conf)) {
250                 .psc_name               = "ost_seq",
251                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
252                 .psc_buf                = {
253                         .bc_nbufs               = OST_NBUFS,
254                         .bc_buf_size            = OST_BUFSIZE,
255                         .bc_req_max_size        = OST_MAXREQSIZE,
256                         .bc_rep_max_size        = OST_MAXREPSIZE,
257                         .bc_req_portal          = SEQ_DATA_PORTAL,
258                         .bc_rep_portal          = OSC_REPLY_PORTAL,
259                 },
260                 .psc_thr                = {
261                         .tc_thr_name            = "ll_ost_seq",
262                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
263                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
264                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
265                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
266                         .tc_nthrs_user          = oss_num_create_threads,
267                         .tc_cpu_affinity        = 1,
268                         .tc_ctx_tags            = LCT_DT_THREAD,
269                 },
270
271                 .psc_cpt                = {
272                         .cc_pattern          = oss_cpts,
273                 },
274                 .psc_ops                = {
275                         .so_req_handler         = tgt_request_handle,
276                         .so_req_printer         = target_print_req,
277                         .so_hpreq_handler       = NULL,
278                 },
279         };
280         ost->ost_seq_service = ptlrpc_register_service(&svc_conf,
281                                                       obd->obd_proc_entry);
282         if (IS_ERR(ost->ost_seq_service)) {
283                 rc = PTR_ERR(ost->ost_seq_service);
284                 CERROR("failed to start OST seq service: %d\n", rc);
285                 ost->ost_seq_service = NULL;
286                 GOTO(out_io, rc);
287         }
288
289         /* Object update service */
290         memset(&svc_conf, 0, sizeof(svc_conf));
291         svc_conf = (typeof(svc_conf)) {
292                 .psc_name               = "ost_out",
293                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
294                 .psc_buf                = {
295                         .bc_nbufs               = OST_NBUFS,
296                         .bc_buf_size            = OUT_BUFSIZE,
297                         .bc_req_max_size        = OUT_MAXREQSIZE,
298                         .bc_rep_max_size        = OUT_MAXREPSIZE,
299                         .bc_req_portal          = OUT_PORTAL,
300                         .bc_rep_portal          = OSC_REPLY_PORTAL,
301                 },
302                 /*
303                  * We'd like to have a mechanism to set this on a per-device
304                  * basis, but alas...
305                  */
306                 .psc_thr                = {
307                         .tc_thr_name            = "ll_ost_out",
308                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
309                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
310                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
311                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
312                         .tc_nthrs_user          = oss_num_create_threads,
313                         .tc_cpu_affinity        = 1,
314                         .tc_ctx_tags            = LCT_MD_THREAD |
315                                                   LCT_DT_THREAD,
316                 },
317                 .psc_cpt                = {
318                         .cc_pattern             = oss_cpts,
319                 },
320                 .psc_ops                = {
321                         .so_req_handler         = tgt_request_handle,
322                         .so_req_printer         = target_print_req,
323                         .so_hpreq_handler       = NULL,
324                 },
325         };
326         ost->ost_out_service = ptlrpc_register_service(&svc_conf,
327                                                        obd->obd_proc_entry);
328         if (IS_ERR(ost->ost_out_service)) {
329                 rc = PTR_ERR(ost->ost_out_service);
330                 CERROR("failed to start out service: %d\n", rc);
331                 ost->ost_out_service = NULL;
332                 GOTO(out_seq, rc);
333         }
334
335         /* Index read service */
336         memset(&svc_conf, 0, sizeof(svc_conf));
337         svc_conf = (typeof(svc_conf)) {
338                 .psc_name               = "ost_idx_read",
339                 .psc_watchdog_factor    = OSS_SERVICE_WATCHDOG_FACTOR,
340                 .psc_buf                = {
341                         .bc_nbufs               = OST_NBUFS,
342                         .bc_buf_size            = OST_BUFSIZE,
343                         .bc_req_max_size        = OST_MAXREQSIZE,
344                         .bc_rep_max_size        = OST_MAXREPSIZE,
345                         .bc_req_portal          = OST_IDX_PORTAL,
346                         .bc_rep_portal          = OSC_REPLY_PORTAL,
347                 },
348                 .psc_thr                = {
349                         .tc_thr_name            = "ll_ost_idx",
350                         .tc_thr_factor          = OSS_CR_THR_FACTOR,
351                         .tc_nthrs_init          = OSS_CR_NTHRS_INIT,
352                         .tc_nthrs_base          = OSS_CR_NTHRS_BASE,
353                         .tc_nthrs_max           = OSS_CR_NTHRS_MAX,
354                         .tc_nthrs_user          = oss_num_create_threads,
355                         .tc_cpu_affinity        = 1,
356                         .tc_ctx_tags            = LCT_DT_THREAD,
357                 },
358                 .psc_cpt                = {
359                         .cc_pattern             = oss_cpts,
360                 },
361                 .psc_ops                = {
362                         .so_req_handler         = tgt_request_handle,
363                         .so_req_printer         = target_print_req,
364                 },
365         };
366         ost->ost_idx_service = ptlrpc_register_service(&svc_conf,
367                                                        obd->obd_proc_entry);
368         if (IS_ERR(ost->ost_idx_service)) {
369                 rc = PTR_ERR(ost->ost_idx_service);
370                 CERROR("failed to start OST index read service: rc = %d\n", rc);
371                 ost->ost_idx_service = NULL;
372                 GOTO(out_out, rc);
373         }
374
375         ping_evictor_start();
376
377         RETURN(0);
378
379 out_out:
380         ptlrpc_unregister_service(ost->ost_out_service);
381         ost->ost_out_service = NULL;
382 out_seq:
383         ptlrpc_unregister_service(ost->ost_seq_service);
384         ost->ost_seq_service = NULL;
385 out_io:
386         ptlrpc_unregister_service(ost->ost_io_service);
387         ost->ost_io_service = NULL;
388 out_create:
389         ptlrpc_unregister_service(ost->ost_create_service);
390         ost->ost_create_service = NULL;
391 out_service:
392         ptlrpc_unregister_service(ost->ost_service);
393         ost->ost_service = NULL;
394 out_lprocfs:
395         lprocfs_obd_cleanup(obd);
396         RETURN(rc);
397 }
398
399 static int ost_cleanup(struct obd_device *obd)
400 {
401         struct ost_obd *ost = &obd->u.ost;
402         int err = 0;
403         ENTRY;
404
405         ping_evictor_stop();
406
407         /* there is no recovery for OST OBD, all recovery is controlled by
408          * obdfilter OBD */
409         LASSERT(obd->obd_recovering == 0);
410         mutex_lock(&ost->ost_health_mutex);
411         ptlrpc_unregister_service(ost->ost_service);
412         ptlrpc_unregister_service(ost->ost_create_service);
413         ptlrpc_unregister_service(ost->ost_io_service);
414         ptlrpc_unregister_service(ost->ost_seq_service);
415         ptlrpc_unregister_service(ost->ost_out_service);
416         ptlrpc_unregister_service(ost->ost_idx_service);
417
418         ost->ost_service = NULL;
419         ost->ost_create_service = NULL;
420         ost->ost_io_service = NULL;
421         ost->ost_seq_service = NULL;
422         ost->ost_out_service = NULL;
423         ost->ost_idx_service = NULL;
424
425         mutex_unlock(&ost->ost_health_mutex);
426
427         lprocfs_obd_cleanup(obd);
428
429         if (ost_io_cptable != NULL) {
430                 cfs_cpt_table_free(ost_io_cptable);
431                 ost_io_cptable = NULL;
432         }
433
434         RETURN(err);
435 }
436
437 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
438 {
439         struct ost_obd *ost = &obd->u.ost;
440         int rc = 0;
441
442         mutex_lock(&ost->ost_health_mutex);
443         rc |= ptlrpc_service_health_check(ost->ost_service);
444         rc |= ptlrpc_service_health_check(ost->ost_create_service);
445         rc |= ptlrpc_service_health_check(ost->ost_io_service);
446         mutex_unlock(&ost->ost_health_mutex);
447
448         /*
449          * health_check to return 0 on healthy
450          * and 1 on unhealthy.
451          */
452         if( rc != 0)
453                 rc = 1;
454
455         return rc;
456 }
457
458 /* use obd ops to offer management infrastructure */
459 static struct obd_ops ost_obd_ops = {
460         .o_owner        = THIS_MODULE,
461         .o_setup        = ost_setup,
462         .o_cleanup      = ost_cleanup,
463         .o_health_check = ost_health_check,
464 };
465
466
467 static int __init ost_init(void)
468 {
469         int rc;
470
471         ENTRY;
472
473         rc = class_register_type(&ost_obd_ops, NULL, NULL,
474 #ifndef HAVE_ONLY_PROCFS_SEQ
475                                 NULL,
476 #endif
477                                 LUSTRE_OSS_NAME, NULL);
478
479         if (ost_num_threads != 0 && oss_num_threads == 0) {
480                 LCONSOLE_INFO("ost_num_threads module parameter is deprecated, "
481                               "use oss_num_threads instead or unset both for "
482                               "dynamic thread startup\n");
483                 oss_num_threads = ost_num_threads;
484         }
485
486         RETURN(rc);
487 }
488
489 static void /*__exit*/ ost_exit(void)
490 {
491         class_unregister_type(LUSTRE_OSS_NAME);
492 }
493
494 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
495 MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01");
496 MODULE_LICENSE("GPL");
497
498 module_init(ost_init);
499 module_exit(ost_exit);