4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/cmm/cmm_device.c
38 * Lustre Cluster Metadata Manager (cmm)
40 * Author: Mike Pershin <tappro@clusterfs.com>
47 #define DEBUG_SUBSYSTEM S_MDS
49 #include <linux/module.h>
52 #include <obd_class.h>
53 #include <lprocfs_status.h>
54 #include <lustre_ver.h>
55 #include "cmm_internal.h"
56 #include "mdc_internal.h"
58 struct obd_ops cmm_obd_device_ops = {
59 .o_owner = THIS_MODULE
62 static const struct lu_device_operations cmm_lu_ops;
64 static inline int lu_device_is_cmm(struct lu_device *d)
66 return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops);
69 int cmm_root_get(const struct lu_env *env, struct md_device *md,
72 struct cmm_device *cmm_dev = md2cmm_dev(md);
73 /* valid only on master MDS */
74 if (cmm_dev->cmm_local_num == 0)
75 return cmm_child_ops(cmm_dev)->mdo_root_get(env,
76 cmm_dev->cmm_child, fid);
81 static int cmm_statfs(const struct lu_env *env, struct md_device *md,
82 struct obd_statfs *sfs)
84 struct cmm_device *cmm_dev = md2cmm_dev(md);
88 rc = cmm_child_ops(cmm_dev)->mdo_statfs(env,
89 cmm_dev->cmm_child, sfs);
93 static int cmm_maxsize_get(const struct lu_env *env, struct md_device *md,
94 int *md_size, int *cookie_size)
96 struct cmm_device *cmm_dev = md2cmm_dev(md);
99 rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(env, cmm_dev->cmm_child,
100 md_size, cookie_size);
104 static int cmm_init_capa_ctxt(const struct lu_env *env, struct md_device *md,
105 int mode , unsigned long timeout, __u32 alg,
106 struct lustre_capa_key *keys)
108 struct cmm_device *cmm_dev = md2cmm_dev(md);
111 LASSERT(cmm_child_ops(cmm_dev)->mdo_init_capa_ctxt);
112 rc = cmm_child_ops(cmm_dev)->mdo_init_capa_ctxt(env, cmm_dev->cmm_child,
118 static int cmm_update_capa_key(const struct lu_env *env,
119 struct md_device *md,
120 struct lustre_capa_key *key)
122 struct cmm_device *cmm_dev = md2cmm_dev(md);
125 rc = cmm_child_ops(cmm_dev)->mdo_update_capa_key(env,
131 static int cmm_llog_ctxt_get(const struct lu_env *env, struct md_device *m,
134 struct cmm_device *cmm_dev = md2cmm_dev(m);
138 rc = cmm_child_ops(cmm_dev)->mdo_llog_ctxt_get(env, cmm_dev->cmm_child,
143 int cmm_iocontrol(const struct lu_env *env, struct md_device *m,
144 unsigned int cmd, int len, void *data)
146 struct md_device *next = md2cmm_dev(m)->cmm_child;
150 rc = next->md_ops->mdo_iocontrol(env, next, cmd, len, data);
155 static const struct md_device_operations cmm_md_ops = {
156 .mdo_statfs = cmm_statfs,
157 .mdo_root_get = cmm_root_get,
158 .mdo_maxsize_get = cmm_maxsize_get,
159 .mdo_init_capa_ctxt = cmm_init_capa_ctxt,
160 .mdo_update_capa_key = cmm_update_capa_key,
161 .mdo_llog_ctxt_get = cmm_llog_ctxt_get,
162 .mdo_iocontrol = cmm_iocontrol,
165 extern struct lu_device_type mdc_device_type;
169 static int cmm_post_init_mdc(const struct lu_env *env,
170 struct cmm_device *cmm)
172 int max_mdsize, max_cookiesize, rc;
173 struct mdc_device *mc, *tmp;
175 /* get the max mdsize and cookiesize from lower layer */
176 rc = cmm_maxsize_get(env, &cmm->cmm_md_dev, &max_mdsize,
181 cfs_spin_lock(&cmm->cmm_tgt_guard);
182 cfs_list_for_each_entry_safe(mc, tmp, &cmm->cmm_targets,
184 cmm_mdc_init_ea_size(env, mc, max_mdsize, max_cookiesize);
186 cfs_spin_unlock(&cmm->cmm_tgt_guard);
190 /* --- cmm_lu_operations --- */
191 /* add new MDC to the CMM, create MDC lu_device and connect it to mdc_obd */
192 static int cmm_add_mdc(const struct lu_env *env,
193 struct cmm_device *cm, struct lustre_cfg *cfg)
195 struct lu_device_type *ldt = &mdc_device_type;
196 char *p, *num = lustre_cfg_string(cfg, 2);
197 struct mdc_device *mc, *tmp;
198 struct lu_fld_target target;
199 struct lu_device *ld;
200 struct lu_device *cmm_lu = cmm2lu_dev(cm);
202 struct lu_site *site = cmm2lu_dev(cm)->ld_site;
206 /* find out that there is no such mdc */
208 mdc_num = simple_strtol(num, &p, 10);
210 CERROR("Invalid index in lustre_cgf, offset 2\n");
214 cfs_spin_lock(&cm->cmm_tgt_guard);
215 cfs_list_for_each_entry_safe(mc, tmp, &cm->cmm_targets,
217 if (mc->mc_num == mdc_num) {
218 cfs_spin_unlock(&cm->cmm_tgt_guard);
222 cfs_spin_unlock(&cm->cmm_tgt_guard);
223 ld = ldt->ldt_ops->ldto_device_alloc(env, ldt, cfg);
229 rc = ldt->ldt_ops->ldto_device_init(env, ld, NULL, NULL);
231 ldt->ldt_ops->ldto_device_free(env, ld);
234 /* pass config to the just created MDC */
235 rc = ld->ld_ops->ldo_process_config(env, ld, cfg);
237 ldt->ldt_ops->ldto_device_fini(env, ld);
238 ldt->ldt_ops->ldto_device_free(env, ld);
242 cfs_spin_lock(&cm->cmm_tgt_guard);
243 cfs_list_for_each_entry_safe(mc, tmp, &cm->cmm_targets,
245 if (mc->mc_num == mdc_num) {
246 cfs_spin_unlock(&cm->cmm_tgt_guard);
247 ldt->ldt_ops->ldto_device_fini(env, ld);
248 ldt->ldt_ops->ldto_device_free(env, ld);
253 cfs_list_add_tail(&mc->mc_linkage, &cm->cmm_targets);
255 cfs_spin_unlock(&cm->cmm_tgt_guard);
257 lu_device_get(cmm_lu);
258 lu_ref_add(&cmm_lu->ld_reference, "mdc-child", ld);
260 target.ft_srv = NULL;
261 target.ft_idx = mc->mc_num;
262 target.ft_exp = mc->mc_desc.cl_exp;
263 fld_client_add_target(cm->cmm_fld, &target);
265 if (mc->mc_num == 0) {
266 /* this is mdt0 -> mc export, fld lookup need this export
267 to forward fld lookup request. */
268 LASSERT(!lu_site2md(site)->ms_server_fld->lsf_control_exp);
269 lu_site2md(site)->ms_server_fld->lsf_control_exp =
272 /* Set max md size for the mdc. */
273 rc = cmm_post_init_mdc(env, cm);
277 static void cmm_device_shutdown(const struct lu_env *env,
278 struct cmm_device *cm,
279 struct lustre_cfg *cfg)
281 struct mdc_device *mc, *tmp;
284 /* Remove local target from FLD. */
285 fld_client_del_target(cm->cmm_fld, cm->cmm_local_num);
287 /* Finish all mdc devices. */
288 cfs_spin_lock(&cm->cmm_tgt_guard);
289 cfs_list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, mc_linkage) {
290 struct lu_device *ld_m = mdc2lu_dev(mc);
291 fld_client_del_target(cm->cmm_fld, mc->mc_num);
292 ld_m->ld_ops->ldo_process_config(env, ld_m, cfg);
294 cfs_spin_unlock(&cm->cmm_tgt_guard);
296 /* remove upcall device*/
297 md_upcall_fini(&cm->cmm_md_dev);
302 static int cmm_device_mount(const struct lu_env *env,
303 struct cmm_device *m, struct lustre_cfg *cfg)
305 const char *index = lustre_cfg_string(cfg, 2);
308 LASSERT(index != NULL);
310 m->cmm_local_num = simple_strtol(index, &p, 10);
312 CERROR("Invalid index in lustre_cgf\n");
319 static int cmm_process_config(const struct lu_env *env,
320 struct lu_device *d, struct lustre_cfg *cfg)
322 struct cmm_device *m = lu2cmm_dev(d);
323 struct lu_device *next = md2lu_dev(m->cmm_child);
327 switch(cfg->lcfg_command) {
329 /* On first ADD_MDC add also local target. */
330 if (!(m->cmm_flags & CMM_INITIALIZED)) {
331 struct lu_site *ls = cmm2lu_dev(m)->ld_site;
332 struct lu_fld_target target;
334 target.ft_srv = lu_site2md(ls)->ms_server_fld;
335 target.ft_idx = m->cmm_local_num;
336 target.ft_exp = NULL;
338 fld_client_add_target(m->cmm_fld, &target);
340 err = cmm_add_mdc(env, m, cfg);
342 /* The first ADD_MDC can be counted as setup is finished. */
343 if (!(m->cmm_flags & CMM_INITIALIZED))
344 m->cmm_flags |= CMM_INITIALIZED;
349 /* lower layers should be set up at first */
350 err = next->ld_ops->ldo_process_config(env, next, cfg);
352 err = cmm_device_mount(env, m, cfg);
357 lu_dev_del_linkage(d->ld_site, d);
358 cmm_device_shutdown(env, m, cfg);
361 err = next->ld_ops->ldo_process_config(env, next, cfg);
366 static int cmm_recovery_complete(const struct lu_env *env,
369 struct cmm_device *m = lu2cmm_dev(d);
370 struct lu_device *next = md2lu_dev(m->cmm_child);
373 rc = next->ld_ops->ldo_recovery_complete(env, next);
377 static int cmm_prepare(const struct lu_env *env,
378 struct lu_device *pdev,
379 struct lu_device *dev)
381 struct cmm_device *cmm = lu2cmm_dev(dev);
382 struct lu_device *next = md2lu_dev(cmm->cmm_child);
386 rc = next->ld_ops->ldo_prepare(env, dev, next);
390 static const struct lu_device_operations cmm_lu_ops = {
391 .ldo_object_alloc = cmm_object_alloc,
392 .ldo_process_config = cmm_process_config,
393 .ldo_recovery_complete = cmm_recovery_complete,
394 .ldo_prepare = cmm_prepare,
397 /* --- lu_device_type operations --- */
398 int cmm_upcall(const struct lu_env *env, struct md_device *md,
399 enum md_upcall_event ev, void *data)
406 rc = cmm_post_init_mdc(env, md2cmm_dev(md));
408 CERROR("can not init md size %d\n", rc);
411 rc = md_do_upcall(env, md, ev, data);
416 static struct lu_device *cmm_device_free(const struct lu_env *env,
419 struct cmm_device *m = lu2cmm_dev(d);
420 struct lu_device *next = md2lu_dev(m->cmm_child);
423 LASSERT(m->cmm_tgt_count == 0);
424 LASSERT(cfs_list_empty(&m->cmm_targets));
425 if (m->cmm_fld != NULL) {
426 OBD_FREE_PTR(m->cmm_fld);
429 md_device_fini(&m->cmm_md_dev);
434 static struct lu_device *cmm_device_alloc(const struct lu_env *env,
435 struct lu_device_type *t,
436 struct lustre_cfg *cfg)
439 struct cmm_device *m;
444 l = ERR_PTR(-ENOMEM);
446 md_device_init(&m->cmm_md_dev, t);
447 m->cmm_md_dev.md_ops = &cmm_md_ops;
448 md_upcall_init(&m->cmm_md_dev, cmm_upcall);
450 l->ld_ops = &cmm_lu_ops;
452 OBD_ALLOC_PTR(m->cmm_fld);
454 cmm_device_free(env, l);
455 l = ERR_PTR(-ENOMEM);
461 /* context key constructor/destructor: cmm_key_init, cmm_key_fini */
462 LU_KEY_INIT_FINI(cmm, struct cmm_thread_info);
464 /* context key: cmm_thread_key */
465 LU_CONTEXT_KEY_DEFINE(cmm, LCT_MD_THREAD);
467 struct cmm_thread_info *cmm_env_info(const struct lu_env *env)
469 struct cmm_thread_info *info;
471 info = lu_context_key_get(&env->le_ctx, &cmm_thread_key);
472 LASSERT(info != NULL);
476 /* type constructor/destructor: cmm_type_init/cmm_type_fini */
477 LU_TYPE_INIT_FINI(cmm, &cmm_thread_key);
480 * Kludge code : it should be moved mdc_device.c if mdc_(mds)_device
483 static int __cmm_type_init(struct lu_device_type *t)
486 rc = lu_device_type_init(&mdc_device_type);
488 rc = cmm_type_init(t);
490 lu_device_type_fini(&mdc_device_type);
495 static void __cmm_type_fini(struct lu_device_type *t)
497 lu_device_type_fini(&mdc_device_type);
501 static void __cmm_type_start(struct lu_device_type *t)
503 mdc_device_type.ldt_ops->ldto_start(&mdc_device_type);
507 static void __cmm_type_stop(struct lu_device_type *t)
509 mdc_device_type.ldt_ops->ldto_stop(&mdc_device_type);
513 static int cmm_device_init(const struct lu_env *env, struct lu_device *d,
514 const char *name, struct lu_device *next)
516 struct cmm_device *m = lu2cmm_dev(d);
521 cfs_spin_lock_init(&m->cmm_tgt_guard);
522 CFS_INIT_LIST_HEAD(&m->cmm_targets);
523 m->cmm_tgt_count = 0;
524 m->cmm_child = lu2md_dev(next);
526 err = fld_client_init(m->cmm_fld, name,
527 LUSTRE_CLI_FLD_HASH_DHT);
529 CERROR("Can't init FLD, err %d\n", err);
533 /* Assign site's fld client ref, needed for asserts in osd. */
534 ls = cmm2lu_dev(m)->ld_site;
535 lu_site2md(ls)->ms_client_fld = m->cmm_fld;
536 err = cmm_procfs_init(m, name);
541 static struct lu_device *cmm_device_fini(const struct lu_env *env,
542 struct lu_device *ld)
544 struct cmm_device *cm = lu2cmm_dev(ld);
545 struct mdc_device *mc, *tmp;
549 /* Finish all mdc devices */
550 cfs_spin_lock(&cm->cmm_tgt_guard);
551 cfs_list_for_each_entry_safe(mc, tmp, &cm->cmm_targets, mc_linkage) {
552 struct lu_device *ld_m = mdc2lu_dev(mc);
553 struct lu_device *ld_c = cmm2lu_dev(cm);
555 cfs_list_del_init(&mc->mc_linkage);
556 lu_ref_del(&ld_c->ld_reference, "mdc-child", ld_m);
558 ld_m->ld_type->ldt_ops->ldto_device_fini(env, ld_m);
559 ld_m->ld_type->ldt_ops->ldto_device_free(env, ld_m);
562 cfs_spin_unlock(&cm->cmm_tgt_guard);
564 fld_client_proc_fini(cm->cmm_fld);
565 fld_client_fini(cm->cmm_fld);
566 ls = cmm2lu_dev(cm)->ld_site;
567 lu_site2md(ls)->ms_client_fld = NULL;
570 RETURN (md2lu_dev(cm->cmm_child));
573 static struct lu_device_type_operations cmm_device_type_ops = {
574 .ldto_init = __cmm_type_init,
575 .ldto_fini = __cmm_type_fini,
577 .ldto_start = __cmm_type_start,
578 .ldto_stop = __cmm_type_stop,
580 .ldto_device_alloc = cmm_device_alloc,
581 .ldto_device_free = cmm_device_free,
583 .ldto_device_init = cmm_device_init,
584 .ldto_device_fini = cmm_device_fini
587 static struct lu_device_type cmm_device_type = {
588 .ldt_tags = LU_DEVICE_MD,
589 .ldt_name = LUSTRE_CMM_NAME,
590 .ldt_ops = &cmm_device_type_ops,
591 .ldt_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD
594 struct lprocfs_vars lprocfs_cmm_obd_vars[] = {
598 struct lprocfs_vars lprocfs_cmm_module_vars[] = {
602 static void lprocfs_cmm_init_vars(struct lprocfs_static_vars *lvars)
604 lvars->module_vars = lprocfs_cmm_module_vars;
605 lvars->obd_vars = lprocfs_cmm_obd_vars;
609 static int __init cmm_mod_init(void)
611 struct lprocfs_static_vars lvars;
613 lprocfs_cmm_init_vars(&lvars);
614 return class_register_type(&cmm_obd_device_ops, NULL, lvars.module_vars,
615 LUSTRE_CMM_NAME, &cmm_device_type);
618 static void __exit cmm_mod_exit(void)
620 class_unregister_type(LUSTRE_CMM_NAME);
623 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
624 MODULE_DESCRIPTION("Lustre Clustered Metadata Manager ("LUSTRE_CMM_NAME")");
625 MODULE_LICENSE("GPL");
627 cfs_module(cmm, "0.1.0", cmm_mod_init, cmm_mod_exit);