4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
39 * Author: Mike Pershin <tappro@whamcloud.com>
40 * Author: Johann Lombardi <johann@whamcloud.com>
43 #define DEBUG_SUBSYSTEM S_FILTER
45 #include <obd_class.h>
46 #include <lustre_param.h>
48 #include "ofd_internal.h"
50 /* Slab for OFD object allocation */
51 static cfs_mem_cache_t *ofd_object_kmem;
53 static struct lu_kmem_descr ofd_caches[] = {
55 .ckd_cache = &ofd_object_kmem,
56 .ckd_name = "ofd_obj",
57 .ckd_size = sizeof(struct ofd_object)
64 static int ofd_connect_to_next(const struct lu_env *env, struct ofd_device *m,
65 const char *next, struct obd_export **exp)
67 struct obd_connect_data *data = NULL;
68 struct obd_device *obd;
74 GOTO(out, rc = -ENOMEM);
76 obd = class_name2obd(next);
78 CERROR("%s: can't locate next device: %s\n",
79 m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next);
80 GOTO(out, rc = -ENOTCONN);
83 data->ocd_connect_flags = OBD_CONNECT_VERSION;
84 data->ocd_version = LUSTRE_VERSION_CODE;
86 rc = obd_connect(NULL, exp, obd, &obd->obd_uuid, data, NULL);
88 CERROR("%s: cannot connect to next dev %s: rc = %d\n",
89 m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next, rc);
93 m->ofd_dt_dev.dd_lu_dev.ld_site =
94 m->ofd_osd_exp->exp_obd->obd_lu_dev->ld_site;
95 LASSERT(m->ofd_dt_dev.dd_lu_dev.ld_site);
96 m->ofd_osd = lu2dt_dev(m->ofd_osd_exp->exp_obd->obd_lu_dev);
97 m->ofd_dt_dev.dd_lu_dev.ld_site->ls_top_dev = &m->ofd_dt_dev.dd_lu_dev;
105 static int ofd_stack_init(const struct lu_env *env,
106 struct ofd_device *m, struct lustre_cfg *cfg)
108 const char *dev = lustre_cfg_string(cfg, 0);
110 struct ofd_thread_info *info = ofd_info(env);
111 struct lustre_mount_info *lmi;
117 lmi = server_get_mount(dev);
119 CERROR("Cannot get mount info for %s!\n", dev);
123 /* find bottom osd */
124 OBD_ALLOC(osdname, MTI_NAME_MAXLEN);
128 snprintf(osdname, MTI_NAME_MAXLEN, "%s-osd", dev);
129 rc = ofd_connect_to_next(env, m, osdname, &m->ofd_osd_exp);
130 OBD_FREE(osdname, MTI_NAME_MAXLEN);
134 d = m->ofd_osd_exp->exp_obd->obd_lu_dev;
136 m->ofd_osd = lu2dt_dev(d);
138 snprintf(info->fti_u.name, sizeof(info->fti_u.name),
139 "%s-osd", lustre_cfg_string(cfg, 0));
144 static void ofd_stack_fini(const struct lu_env *env, struct ofd_device *m,
145 struct lu_device *top)
147 struct obd_device *obd = ofd_obd(m);
148 struct lustre_cfg_bufs bufs;
149 struct lustre_cfg *lcfg;
154 lu_site_purge(env, top->ld_site, ~0);
156 /* process cleanup, pass mdt obd name to get obd umount flags */
157 lustre_cfg_bufs_reset(&bufs, obd->obd_name);
162 lustre_cfg_bufs_set_string(&bufs, 1, flags);
163 lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
165 CERROR("Cannot alloc lcfg!\n");
170 top->ld_ops->ldo_process_config(env, top, lcfg);
171 lustre_cfg_free(lcfg);
173 lu_site_purge(env, top->ld_site, ~0);
175 LASSERT(m->ofd_osd_exp);
176 obd_disconnect(m->ofd_osd_exp);
182 /* For interoperability, see mdt_interop_param[]. */
183 static struct cfg_interop_param ofd_interop_param[] = {
184 { "ost.quota_type", NULL },
188 /* used by MGS to process specific configurations */
189 static int ofd_process_config(const struct lu_env *env, struct lu_device *d,
190 struct lustre_cfg *cfg)
192 struct ofd_device *m = ofd_dev(d);
193 struct dt_device *dt_next = m->ofd_osd;
194 struct lu_device *next = &dt_next->dd_lu_dev;
199 switch (cfg->lcfg_command) {
201 struct lprocfs_static_vars lvars;
203 /* For interoperability */
204 struct cfg_interop_param *ptr = NULL;
205 struct lustre_cfg *old_cfg = NULL;
208 param = lustre_cfg_string(cfg, 1);
210 CERROR("param is empty\n");
215 ptr = class_find_old_param(param, ofd_interop_param);
217 if (ptr->new_param == NULL) {
219 CWARN("For interoperability, skip this %s."
220 " It is obsolete.\n", ptr->old_param);
224 CWARN("Found old param %s, changed it to %s.\n",
225 ptr->old_param, ptr->new_param);
228 cfg = lustre_cfg_rename(old_cfg, ptr->new_param);
235 lprocfs_ofd_init_vars(&lvars);
236 rc = class_process_proc_param(PARAM_OST, lvars.obd_vars, cfg,
238 if (rc > 0 || rc == -ENOSYS)
239 /* we don't understand; pass it on */
240 rc = next->ld_ops->ldo_process_config(env, next, cfg);
243 case LCFG_SPTLRPC_CONF: {
248 /* others are passed further */
249 rc = next->ld_ops->ldo_process_config(env, next, cfg);
255 static int ofd_object_init(const struct lu_env *env, struct lu_object *o,
256 const struct lu_object_conf *conf)
258 struct ofd_device *d = ofd_dev(o->lo_dev);
259 struct lu_device *under;
260 struct lu_object *below;
265 CDEBUG(D_INFO, "object init, fid = "DFID"\n",
266 PFID(lu_object_fid(o)));
268 under = &d->ofd_osd->dd_lu_dev;
269 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
271 lu_object_add(o, below);
278 static void ofd_object_free(const struct lu_env *env, struct lu_object *o)
280 struct ofd_object *of = ofd_obj(o);
281 struct lu_object_header *h;
286 CDEBUG(D_INFO, "object free, fid = "DFID"\n",
287 PFID(lu_object_fid(o)));
290 lu_object_header_fini(h);
291 OBD_SLAB_FREE_PTR(of, ofd_object_kmem);
295 static int ofd_object_print(const struct lu_env *env, void *cookie,
296 lu_printer_t p, const struct lu_object *o)
298 return (*p)(env, cookie, LUSTRE_OST_NAME"-object@%p", o);
301 struct lu_object_operations ofd_obj_ops = {
302 .loo_object_init = ofd_object_init,
303 .loo_object_free = ofd_object_free,
304 .loo_object_print = ofd_object_print
307 static struct lu_object *ofd_object_alloc(const struct lu_env *env,
308 const struct lu_object_header *hdr,
311 struct ofd_object *of;
315 OBD_SLAB_ALLOC_PTR_GFP(of, ofd_object_kmem, CFS_ALLOC_IO);
318 struct lu_object_header *h;
320 o = &of->ofo_obj.do_lu;
322 lu_object_header_init(h);
323 lu_object_init(o, h, d);
324 lu_object_add_top(h, o);
325 o->lo_ops = &ofd_obj_ops;
332 extern int ost_handle(struct ptlrpc_request *req);
334 static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev,
335 struct lu_device *dev)
337 struct ofd_thread_info *info;
338 struct ofd_device *ofd = ofd_dev(dev);
339 struct obd_device *obd = ofd_obd(ofd);
340 struct lu_device *next = &ofd->ofd_osd->dd_lu_dev;
345 rc = lu_env_refill((struct lu_env *)env);
347 CERROR("Failure to refill session: '%d'\n", rc);
351 info = ofd_info_init(env, NULL);
355 /* initialize lower device */
356 rc = next->ld_ops->ldo_prepare(env, dev, next);
358 target_recovery_init(&ofd->ofd_lut, ost_handle);
360 if (obd->obd_recovering == 0)
361 ofd_postrecov(env, ofd);
366 static int ofd_recovery_complete(const struct lu_env *env,
367 struct lu_device *dev)
369 struct ofd_device *ofd = ofd_dev(dev);
370 struct lu_device *next = &ofd->ofd_osd->dd_lu_dev;
375 /* Grant space for object precreation on the self export.
376 * This initial reserved space (i.e. 20MB for zfs and 560KB for ldiskfs)
377 * is enough to create 20k objects. It is then adapted based on the
378 * precreate request size (see ofd_grant_create()
380 ofd_grant_connect(env, dev->ld_obd->obd_self_export,
381 OST_MAX_PRECREATE * ofd->ofd_dt_conf.ddp_inodespace);
382 rc = next->ld_ops->ldo_recovery_complete(env, next);
386 static struct lu_device_operations ofd_lu_ops = {
387 .ldo_object_alloc = ofd_object_alloc,
388 .ldo_process_config = ofd_process_config,
389 .ldo_recovery_complete = ofd_recovery_complete,
390 .ldo_prepare = ofd_prepare,
393 static int ofd_procfs_init(struct ofd_device *ofd)
395 struct lprocfs_static_vars lvars;
396 struct obd_device *obd = ofd_obd(ofd);
397 cfs_proc_dir_entry_t *entry;
402 /* lprocfs must be setup before the ofd so state can be safely added
403 * to /proc incrementally as the ofd is setup */
404 lprocfs_ofd_init_vars(&lvars);
405 rc = lprocfs_obd_setup(obd, lvars.obd_vars);
407 CERROR("%s: lprocfs_obd_setup failed: %d.\n",
412 rc = lprocfs_alloc_obd_stats(obd, LPROC_OFD_LAST);
414 CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n",
416 GOTO(obd_cleanup, rc);
419 /* Init OFD private stats here */
420 lprocfs_counter_init(obd->obd_stats, LPROC_OFD_READ_BYTES,
421 LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes");
422 lprocfs_counter_init(obd->obd_stats, LPROC_OFD_WRITE_BYTES,
423 LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes");
425 rc = lproc_ofd_attach_seqstat(obd);
427 CERROR("%s: create seqstat failed: %d.\n", obd->obd_name, rc);
428 GOTO(free_obd_stats, rc);
431 entry = lprocfs_register("exports", obd->obd_proc_entry, NULL, NULL);
434 CERROR("%s: error %d setting up lprocfs for %s\n",
435 obd->obd_name, rc, "exports");
436 GOTO(free_obd_stats, rc);
438 obd->obd_proc_exports_entry = entry;
440 entry = lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
441 lprocfs_nid_stats_clear_read,
442 lprocfs_nid_stats_clear_write, obd, NULL);
445 CERROR("%s: add proc entry 'clear' failed: %d.\n",
447 GOTO(free_obd_stats, rc);
450 rc = lprocfs_job_stats_init(obd, LPROC_OFD_STATS_LAST,
451 ofd_stats_counter_init);
453 GOTO(remove_entry_clear, rc);
456 lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
458 lprocfs_free_obd_stats(obd);
460 lprocfs_obd_cleanup(obd);
464 static int ofd_procfs_fini(struct ofd_device *ofd)
466 struct obd_device *obd = ofd_obd(ofd);
468 lprocfs_job_stats_fini(obd);
469 lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
470 lprocfs_free_per_client_stats(obd);
471 lprocfs_free_obd_stats(obd);
472 lprocfs_obd_cleanup(obd);
476 extern int ost_handle(struct ptlrpc_request *req);
478 static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
479 struct lu_device_type *ldt, struct lustre_cfg *cfg)
481 const char *dev = lustre_cfg_string(cfg, 0);
482 struct ofd_thread_info *info = NULL;
483 struct obd_device *obd;
484 struct obd_statfs *osfs;
489 obd = class_name2obd(dev);
491 CERROR("Cannot find obd with name %s\n", dev);
495 rc = lu_env_refill((struct lu_env *)env);
499 obd->u.obt.obt_magic = OBT_MAGIC;
501 m->ofd_fmd_max_num = OFD_FMD_MAX_NUM_DEFAULT;
502 m->ofd_fmd_max_age = OFD_FMD_MAX_AGE_DEFAULT;
504 spin_lock_init(&m->ofd_flags_lock);
505 m->ofd_raid_degraded = 0;
506 m->ofd_syncjournal = 0;
508 m->ofd_grant_compat_disable = 0;
511 spin_lock_init(&m->ofd_osfs_lock);
512 m->ofd_osfs_age = cfs_time_shift_64(-1000);
513 m->ofd_osfs_unstable = 0;
514 m->ofd_statfs_inflight = 0;
515 m->ofd_osfs_inflight = 0;
518 spin_lock_init(&m->ofd_grant_lock);
519 m->ofd_tot_dirty = 0;
520 m->ofd_tot_granted = 0;
521 m->ofd_tot_pending = 0;
522 m->ofd_max_group = 0;
524 rwlock_init(&obd->u.filter.fo_sptlrpc_lock);
525 sptlrpc_rule_set_init(&obd->u.filter.fo_sptlrpc_rset);
527 obd->u.filter.fo_fl_oss_capa = 0;
528 CFS_INIT_LIST_HEAD(&obd->u.filter.fo_capa_keys);
529 obd->u.filter.fo_capa_hash = init_capa_hash();
530 if (obd->u.filter.fo_capa_hash == NULL)
533 m->ofd_dt_dev.dd_lu_dev.ld_ops = &ofd_lu_ops;
534 m->ofd_dt_dev.dd_lu_dev.ld_obd = obd;
535 /* set this lu_device to obd, because error handling need it */
536 obd->obd_lu_dev = &m->ofd_dt_dev.dd_lu_dev;
538 rc = ofd_procfs_init(m);
540 CERROR("Can't init ofd lprocfs, rc %d\n", rc);
544 /* No connection accepted until configurations will finish */
545 obd->obd_no_conn = 1;
546 obd->obd_replayable = 1;
547 if (cfg->lcfg_bufcount > 4 && LUSTRE_CFG_BUFLEN(cfg, 4) > 0) {
548 char *str = lustre_cfg_string(cfg, 4);
550 if (strchr(str, 'n')) {
551 CWARN("%s: recovery disabled\n", obd->obd_name);
552 obd->obd_replayable = 0;
556 info = ofd_info_init(env, NULL);
560 rc = ofd_stack_init(env, m, cfg);
562 CERROR("Can't init device stack, rc %d\n", rc);
563 GOTO(err_fini_proc, rc);
566 /* populate cached statfs data */
567 osfs = &ofd_info(env)->fti_u.osfs;
568 rc = ofd_statfs_internal(env, m, osfs, 0, NULL);
570 CERROR("%s: can't get statfs data, rc %d\n", obd->obd_name, rc);
571 GOTO(err_fini_stack, rc);
573 if (!IS_PO2(osfs->os_bsize)) {
574 CERROR("%s: blocksize (%d) is not a power of 2\n",
575 obd->obd_name, osfs->os_bsize);
576 GOTO(err_fini_stack, rc = -EPROTO);
578 m->ofd_blockbits = fls(osfs->os_bsize) - 1;
580 snprintf(info->fti_u.name, sizeof(info->fti_u.name), "filter-%p", m);
581 m->ofd_namespace = ldlm_namespace_new(obd, info->fti_u.name,
582 LDLM_NAMESPACE_SERVER,
583 LDLM_NAMESPACE_GREEDY,
585 if (m->ofd_namespace == NULL)
586 GOTO(err_fini_stack, rc = -ENOMEM);
587 /* set obd_namespace for compatibility with old code */
588 obd->obd_namespace = m->ofd_namespace;
589 ldlm_register_intent(m->ofd_namespace, ofd_intent_policy);
590 m->ofd_namespace->ns_lvbo = &ofd_lvbo;
591 m->ofd_namespace->ns_lvbp = m;
593 ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
594 "filter_ldlm_cb_client", &obd->obd_ldlm_client);
596 dt_conf_get(env, m->ofd_osd, &m->ofd_dt_conf);
598 /* Allow at most ddp_grant_reserved% of the available filesystem space
599 * to be granted to clients, so that any errors in the grant overhead
600 * calculations do not allow granting more space to clients than can be
601 * written. Assumes that in aggregate the grant overhead calculations do
602 * not have more than ddp_grant_reserved% estimation error in them. */
604 ofd_grant_ratio_conv(m->ofd_dt_conf.ddp_grant_reserved);
606 rc = tgt_init(env, &m->ofd_lut, obd, m->ofd_osd);
608 GOTO(err_free_ns, rc);
610 rc = ofd_fs_setup(env, m, obd);
612 GOTO(err_fini_lut, rc);
616 tgt_fini(env, &m->ofd_lut);
618 ldlm_namespace_free(m->ofd_namespace, 0, obd->obd_force);
619 obd->obd_namespace = m->ofd_namespace = NULL;
621 ofd_stack_fini(env, m, &m->ofd_osd->dd_lu_dev);
627 static void ofd_fini(const struct lu_env *env, struct ofd_device *m)
629 struct obd_device *obd = ofd_obd(m);
630 struct lu_device *d = &m->ofd_dt_dev.dd_lu_dev;
632 target_recovery_fini(obd);
633 obd_exports_barrier(obd);
634 obd_zombie_barrier();
636 tgt_fini(env, &m->ofd_lut);
637 ofd_fs_cleanup(env, m);
639 ofd_free_capa_keys(m);
640 cleanup_capa_hash(obd->u.filter.fo_capa_hash);
642 if (m->ofd_namespace != NULL) {
643 ldlm_namespace_free(m->ofd_namespace, NULL,
644 d->ld_obd->obd_force);
645 d->ld_obd->obd_namespace = m->ofd_namespace = NULL;
648 ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev);
650 LASSERT(cfs_atomic_read(&d->ld_ref) == 0);
651 server_put_mount(obd->obd_name, NULL);
655 static struct lu_device *ofd_device_fini(const struct lu_env *env,
659 ofd_fini(env, ofd_dev(d));
663 static struct lu_device *ofd_device_free(const struct lu_env *env,
666 struct ofd_device *m = ofd_dev(d);
668 dt_device_fini(&m->ofd_dt_dev);
673 static struct lu_device *ofd_device_alloc(const struct lu_env *env,
674 struct lu_device_type *t,
675 struct lustre_cfg *cfg)
677 struct ofd_device *m;
683 return ERR_PTR(-ENOMEM);
685 l = &m->ofd_dt_dev.dd_lu_dev;
686 dt_device_init(&m->ofd_dt_dev, t);
687 rc = ofd_init0(env, m, t, cfg);
689 ofd_device_free(env, l);
696 /* thread context key constructor/destructor */
697 LU_KEY_INIT_FINI(ofd, struct ofd_thread_info);
699 static void ofd_key_exit(const struct lu_context *ctx,
700 struct lu_context_key *key, void *data)
702 struct ofd_thread_info *info = data;
704 info->fti_env = NULL;
705 info->fti_exp = NULL;
708 info->fti_transno = 0;
709 info->fti_pre_version = 0;
710 info->fti_obj = NULL;
711 info->fti_has_trans = 0;
712 info->fti_mult_trans = 0;
715 memset(&info->fti_attr, 0, sizeof info->fti_attr);
718 struct lu_context_key ofd_thread_key = {
719 .lct_tags = LCT_DT_THREAD,
720 .lct_init = ofd_key_init,
721 .lct_fini = ofd_key_fini,
722 .lct_exit = ofd_key_exit
725 /* type constructor/destructor: mdt_type_init, mdt_type_fini */
726 LU_TYPE_INIT_FINI(ofd, &ofd_thread_key);
728 static struct lu_device_type_operations ofd_device_type_ops = {
729 .ldto_init = ofd_type_init,
730 .ldto_fini = ofd_type_fini,
732 .ldto_start = ofd_type_start,
733 .ldto_stop = ofd_type_stop,
735 .ldto_device_alloc = ofd_device_alloc,
736 .ldto_device_free = ofd_device_free,
737 .ldto_device_fini = ofd_device_fini
740 static struct lu_device_type ofd_device_type = {
741 .ldt_tags = LU_DEVICE_DT,
742 .ldt_name = LUSTRE_OST_NAME,
743 .ldt_ops = &ofd_device_type_ops,
744 .ldt_ctx_tags = LCT_DT_THREAD
747 int __init ofd_init(void)
749 struct lprocfs_static_vars lvars;
752 rc = lu_kmem_init(ofd_caches);
758 lu_kmem_fini(ofd_caches);
762 lprocfs_ofd_init_vars(&lvars);
764 rc = class_register_type(&ofd_obd_ops, NULL, lvars.module_vars,
765 LUSTRE_OST_NAME, &ofd_device_type);
769 void __exit ofd_exit(void)
772 lu_kmem_fini(ofd_caches);
773 class_unregister_type(LUSTRE_OST_NAME);
776 MODULE_AUTHOR("Whamcloud, Inc. <http://www.whamcloud.com/>");
777 MODULE_DESCRIPTION("Lustre Object Filtering Device");
778 MODULE_LICENSE("GPL");
780 module_init(ofd_init);
781 module_exit(ofd_exit);