1 /* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
5 * Lustre Metadata Server (mdd) routines
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: Wang Di <wangdi@clusterfs.com>
10 * This file is part of the Lustre file system, http://www.lustre.org
11 * Lustre is a trademark of Cluster File Systems, Inc.
13 * You may have signed or agreed to another license before downloading
14 * this software. If so, you are bound by the terms and conditions
15 * of that agreement, and the following does not apply to you. See the
16 * LICENSE file included with this distribution for more information.
18 * If you did not agree to a different license, then this copy of Lustre
19 * is open source software; you can redistribute it and/or modify it
20 * under the terms of version 2 of the GNU General Public License as
21 * published by the Free Software Foundation.
23 * In either case, Lustre is distributed in the hope that it will be
24 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
25 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * license text for more details.
29 # define EXPORT_SYMTAB
31 #define DEBUG_SUBSYSTEM S_MDS
33 #include <linux/module.h>
34 #include <linux/jbd.h>
36 #include <obd_class.h>
37 #include <lustre_ver.h>
38 #include <obd_support.h>
39 #include <lprocfs_status.h>
41 #include <linux/ldiskfs_fs.h>
42 #include <lustre_mds.h>
43 #include <lustre/lustre_idl.h>
45 #include "mdd_internal.h"
48 static struct thandle* mdd_trans_start(const struct lu_env *env,
50 static void mdd_trans_stop(const struct lu_env *env,
51 struct mdd_device *mdd, int rc,
52 struct thandle *handle);
53 static struct dt_object* mdd_object_child(struct mdd_object *o);
54 static void __mdd_ref_add(const struct lu_env *env, struct mdd_object *obj,
55 struct thandle *handle);
56 static void __mdd_ref_del(const struct lu_env *env, struct mdd_object *obj,
57 struct thandle *handle);
58 static int __mdd_lookup(const struct lu_env *env,
59 struct md_object *pobj,
60 const char *name, const struct lu_fid* fid,
62 static int __mdd_lookup_locked(const struct lu_env *env,
63 struct md_object *pobj,
64 const char *name, const struct lu_fid* fid,
66 static int mdd_exec_permission_lite(const struct lu_env *env,
67 struct mdd_object *obj);
68 static int __mdd_permission_internal(const struct lu_env *env,
69 struct mdd_object *obj,
70 int mask, int getattr);
72 static struct md_object_operations mdd_obj_ops;
73 static struct md_dir_operations mdd_dir_ops;
74 static struct lu_object_operations mdd_lu_obj_ops;
76 static struct lu_context_key mdd_thread_key;
78 static const char *mdd_root_dir_name = "root";
79 static const char dot[] = ".";
80 static const char dotdot[] = "..";
83 MDD_TXN_OBJECT_DESTROY_OP,
84 MDD_TXN_OBJECT_CREATE_OP,
87 MDD_TXN_INDEX_INSERT_OP,
88 MDD_TXN_INDEX_DELETE_OP,
92 MDD_TXN_RENAME_TGT_OP,
93 MDD_TXN_CREATE_DATA_OP,
97 struct mdd_txn_op_descr {
98 enum mdd_txn_op mod_op;
99 unsigned int mod_credits;
103 MDD_TXN_OBJECT_DESTROY_CREDITS = 0,
104 MDD_TXN_OBJECT_CREATE_CREDITS = 0,
105 MDD_TXN_ATTR_SET_CREDITS = 0,
106 MDD_TXN_XATTR_SET_CREDITS = 0,
107 MDD_TXN_INDEX_INSERT_CREDITS = 0,
108 MDD_TXN_INDEX_DELETE_CREDITS = 0,
109 MDD_TXN_LINK_CREDITS = 0,
110 MDD_TXN_UNLINK_CREDITS = 0,
111 MDD_TXN_RENAME_CREDITS = 0,
112 MDD_TXN_RENAME_TGT_CREDITS = 0,
113 MDD_TXN_CREATE_DATA_CREDITS = 0,
114 MDD_TXN_MKDIR_CREDITS = 0
116 #define DEFINE_MDD_TXN_OP_ARRAY(opname, base) \
117 [opname ## _OP - base ## _OP]= { \
118 .mod_op = opname ## _OP, \
119 .mod_credits = opname ## _CREDITS, \
123 * number of blocks to reserve for particular operations. Should be function
124 * of ... something. Stub for now.
127 #define DEFINE_MDD_TXN_OP_DESC(opname) \
128 DEFINE_MDD_TXN_OP_ARRAY(opname, MDD_TXN_OBJECT_DESTROY)
130 static struct mdd_txn_op_descr mdd_txn_descrs[] = {
131 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_OBJECT_DESTROY),
132 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_OBJECT_CREATE),
133 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_ATTR_SET),
134 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_XATTR_SET),
135 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_INDEX_INSERT),
136 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_INDEX_DELETE),
137 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_LINK),
138 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_UNLINK),
139 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_RENAME),
140 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_RENAME_TGT),
141 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_CREATE_DATA),
142 DEFINE_MDD_TXN_OP_DESC(MDD_TXN_MKDIR)
144 struct rw_semaphore mdd_txn_sem;
146 static void mdd_txn_param_build(const struct lu_env *env, int op)
149 /* init credits for each ops */
150 num_entries = sizeof (mdd_txn_descrs) / sizeof(struct mdd_txn_op_descr);
152 LASSERT(num_entries > 0);
154 down_read(&mdd_txn_sem);
155 for (i =0; i < num_entries; i++) {
156 if (mdd_txn_descrs[i].mod_op == op) {
157 LASSERT(mdd_txn_descrs[i].mod_credits > 0);
158 mdd_env_info(env)->mti_param.tp_credits =
159 mdd_txn_descrs[i].mod_credits;
160 up_read(&mdd_txn_sem);
164 up_read(&mdd_txn_sem);
165 CERROR("Wrong operation %d \n", op);
169 static int mdd_credit_get(const struct lu_env *env, struct mdd_device *mdd,
173 credits = mdd_child_ops(mdd)->dt_credit_get(env, mdd->mdd_child,
175 LASSERT(credits > 0);
179 /* FIXME: we should calculate it by lsm count,
181 int mdd_init_txn_credits(const struct lu_env *env, struct mdd_device *mdd)
183 struct mds_obd *mds = &mdd->mdd_obd_dev->u.mds;
184 int ost_count = mds->mds_lov_desc.ld_tgt_count;
185 int iam_credits, xattr_credits, log_credits, create_credits;
186 int num_entries, i, attr_credits;
188 /* init credits for each ops */
189 num_entries = sizeof (mdd_txn_descrs) / sizeof(struct mdd_txn_op_descr);
190 LASSERT(num_entries > 0);
192 /* init the basic credits from osd layer */
193 iam_credits = mdd_credit_get(env, mdd, INSERT_IAM);
194 log_credits = mdd_credit_get(env, mdd, LOG_REC);
195 attr_credits = mdd_credit_get(env, mdd, ATTR_SET);
196 xattr_credits = mdd_credit_get(env, mdd, XATTR_SET);
197 create_credits = mdd_credit_get(env, mdd, CREATE_OBJECT);
198 /* calculate the mdd credits */
199 down_write(&mdd_txn_sem);
200 for (i =0; i < num_entries; i++) {
201 int opcode = mdd_txn_descrs[i].mod_op;
203 case MDD_TXN_OBJECT_DESTROY_OP:
204 mdd_txn_descrs[i].mod_credits = 20;
206 case MDD_TXN_OBJECT_CREATE_OP:
207 /* OI_INSERT + CREATE OBJECT */
208 mdd_txn_descrs[i].mod_credits =
209 iam_credits + create_credits;
211 case MDD_TXN_ATTR_SET_OP:
212 /* ATTR set + XATTR(lsm, lmv) set */
213 mdd_txn_descrs[i].mod_credits =
214 attr_credits + xattr_credits;
216 case MDD_TXN_XATTR_SET_OP:
217 mdd_txn_descrs[i].mod_credits = xattr_credits;
219 case MDD_TXN_INDEX_INSERT_OP:
220 mdd_txn_descrs[i].mod_credits = iam_credits;
222 case MDD_TXN_INDEX_DELETE_OP:
223 mdd_txn_descrs[i].mod_credits = iam_credits;
225 case MDD_TXN_LINK_OP:
226 mdd_txn_descrs[i].mod_credits = iam_credits;
228 case MDD_TXN_UNLINK_OP:
229 /* delete IAM + Unlink log */
230 mdd_txn_descrs[i].mod_credits =
231 iam_credits + log_credits * ost_count;
233 case MDD_TXN_RENAME_OP:
234 /* 2 delete IAM + 1 insert + Unlink log */
235 mdd_txn_descrs[i].mod_credits =
236 3 * iam_credits + log_credits * ost_count;
238 case MDD_TXN_RENAME_TGT_OP:
239 /* iam insert + iam delete */
240 mdd_txn_descrs[i].mod_credits = 2 * iam_credits;
242 case MDD_TXN_CREATE_DATA_OP:
243 /* same as set xattr(lsm) */
244 mdd_txn_descrs[i].mod_credits = xattr_credits;
246 case MDD_TXN_MKDIR_OP:
247 /* IAM_INSERT + OI_INSERT + CREATE_OBJECT_CREDITS
248 * SET_MD CREDITS is already counted in
249 * CREATE_OBJECT CREDITS
251 mdd_txn_descrs[i].mod_credits =
252 2 * iam_credits + create_credits;
255 CERROR("invalid op %d init its credit\n", opcode);
256 up_write(&mdd_txn_sem);
260 up_write(&mdd_txn_sem);
264 struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len)
268 buf = &mdd_env_info(env)->mti_buf;
274 const struct lu_buf *mdd_buf_get_const(const struct lu_env *env,
275 const void *area, ssize_t len)
279 buf = &mdd_env_info(env)->mti_buf;
280 buf->lb_buf = (void *)area;
285 #define mdd_get_group_info(group_info) do { \
286 atomic_inc(&(group_info)->usage); \
289 #define mdd_put_group_info(group_info) do { \
290 if (atomic_dec_and_test(&(group_info)->usage)) \
291 groups_free(group_info); \
294 #define MDD_NGROUPS_PER_BLOCK ((int)(CFS_PAGE_SIZE / sizeof(gid_t)))
296 #define MDD_GROUP_AT(gi, i) \
297 ((gi)->blocks[(i) / MDD_NGROUPS_PER_BLOCK][(i) % MDD_NGROUPS_PER_BLOCK])
299 /* groups_search() is copied from linux kernel! */
300 /* a simple bsearch */
301 static int mdd_groups_search(struct group_info *group_info, gid_t grp)
309 right = group_info->ngroups;
310 while (left < right) {
311 int mid = (left + right) / 2;
312 int cmp = grp - MDD_GROUP_AT(group_info, mid);
324 static int mdd_in_group_p(struct md_ucred *uc, gid_t grp)
328 if (grp != uc->mu_fsgid) {
329 struct group_info *group_info = NULL;
331 if (uc->mu_ginfo || (uc->mu_valid == UCRED_OLD))
332 if ((grp == uc->mu_suppgids[0]) ||
333 (grp == uc->mu_suppgids[1]))
337 group_info = uc->mu_ginfo;
338 else if (uc->mu_identity)
339 group_info = uc->mu_identity->mi_ginfo;
344 mdd_get_group_info(group_info);
345 rc = mdd_groups_search(group_info, grp);
346 mdd_put_group_info(group_info);
351 static inline int mdd_permission_internal(const struct lu_env *env,
352 struct mdd_object *obj, int mask)
354 return __mdd_permission_internal(env, obj, mask, 1);
357 struct mdd_thread_info *mdd_env_info(const struct lu_env *env)
359 struct mdd_thread_info *info;
361 info = lu_context_key_get(&env->le_ctx, &mdd_thread_key);
362 LASSERT(info != NULL);
366 static struct lu_object *mdd_object_alloc(const struct lu_env *env,
367 const struct lu_object_header *hdr,
370 struct mdd_object *mdd_obj;
372 OBD_ALLOC_PTR(mdd_obj);
373 if (mdd_obj != NULL) {
376 o = mdd2lu_obj(mdd_obj);
377 lu_object_init(o, NULL, d);
378 mdd_obj->mod_obj.mo_ops = &mdd_obj_ops;
379 mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops;
380 mdd_obj->mod_count = 0;
381 o->lo_ops = &mdd_lu_obj_ops;
388 static int mdd_object_init(const struct lu_env *env, struct lu_object *o)
390 struct mdd_device *d = lu2mdd_dev(o->lo_dev);
391 struct lu_object *below;
392 struct lu_device *under;
395 under = &d->mdd_child->dd_lu_dev;
396 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
401 lu_object_add(o, below);
405 static int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj);
407 static int mdd_object_start(const struct lu_env *env, struct lu_object *o)
409 if (lu_object_exists(o))
410 return mdd_get_flags(env, lu2mdd_obj(o));
415 static void mdd_object_free(const struct lu_env *env, struct lu_object *o)
417 struct mdd_object *mdd = lu2mdd_obj(o);
423 static int mdd_object_print(const struct lu_env *env, void *cookie,
424 lu_printer_t p, const struct lu_object *o)
426 return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p", o);
429 /* orphan handling is here */
430 static void mdd_object_delete(const struct lu_env *env,
433 struct mdd_object *mdd_obj = lu2mdd_obj(o);
434 struct thandle *handle = NULL;
437 if (lu2mdd_dev(o->lo_dev)->mdd_orphans == NULL)
440 if (test_bit(LU_OBJECT_ORPHAN, &o->lo_header->loh_flags)) {
441 mdd_txn_param_build(env, MDD_TXN_MKDIR_OP);
442 handle = mdd_trans_start(env, lu2mdd_dev(o->lo_dev));
444 CERROR("Cannot get thandle\n");
446 mdd_write_lock(env, mdd_obj);
447 /* let's remove obj from the orphan list */
448 __mdd_orphan_del(env, mdd_obj, handle);
449 mdd_write_unlock(env, mdd_obj);
450 mdd_trans_stop(env, lu2mdd_dev(o->lo_dev),
456 static struct lu_object_operations mdd_lu_obj_ops = {
457 .loo_object_init = mdd_object_init,
458 .loo_object_start = mdd_object_start,
459 .loo_object_free = mdd_object_free,
460 .loo_object_print = mdd_object_print,
461 .loo_object_delete = mdd_object_delete
464 struct mdd_object *mdd_object_find(const struct lu_env *env,
465 struct mdd_device *d,
466 const struct lu_fid *f)
468 struct lu_object *o, *lo;
469 struct mdd_object *m;
472 o = lu_object_find(env, mdd2lu_dev(d)->ld_site, f, BYPASS_CAPA);
474 m = (struct mdd_object *)o;
476 lo = lu_object_locate(o->lo_header, mdd2lu_dev(d)->ld_type);
477 /* remote object can't be located and should be put then */
479 lu_object_put(env, o);
485 static inline int mdd_is_immutable(struct mdd_object *obj)
487 return obj->mod_flags & IMMUTE_OBJ;
490 static inline int mdd_is_append(struct mdd_object *obj)
492 return obj->mod_flags & APPEND_OBJ;
495 static inline void mdd_set_dead_obj(struct mdd_object *obj)
498 obj->mod_flags |= DEAD_OBJ;
501 static inline int mdd_is_dead_obj(struct mdd_object *obj)
503 return obj && obj->mod_flags & DEAD_OBJ;
506 /*Check whether it may create the cobj under the pobj*/
507 static int mdd_may_create(const struct lu_env *env,
508 struct mdd_object *pobj, struct mdd_object *cobj,
514 if (cobj && lu_object_exists(&cobj->mod_obj.mo_lu))
517 if (mdd_is_dead_obj(pobj))
520 /*check pobj may create or not*/
522 rc = mdd_permission_internal(env, pobj,
523 MAY_WRITE | MAY_EXEC);
528 static inline int __mdd_la_get(const struct lu_env *env,
529 struct mdd_object *obj, struct lu_attr *la)
531 struct dt_object *next = mdd_object_child(obj);
532 LASSERT(lu_object_exists(mdd2lu_obj(obj)));
533 return next->do_ops->do_attr_get(env, next, la);
536 static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags)
538 obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ);
540 if (flags & LUSTRE_APPEND_FL)
541 obj->mod_flags |= APPEND_OBJ;
543 if (flags & LUSTRE_IMMUTABLE_FL)
544 obj->mod_flags |= IMMUTE_OBJ;
547 static int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj)
549 struct lu_attr *la = &mdd_env_info(env)->mti_la;
553 mdd_read_lock(env, obj);
554 rc = __mdd_la_get(env, obj, la);
555 mdd_read_unlock(env, obj);
557 mdd_flags_xlate(obj, la->la_flags);
561 #define mdd_cap_t(x) (x)
563 #define MDD_CAP_TO_MASK(x) (1 << (x))
565 #define mdd_cap_raised(c, flag) (mdd_cap_t(c) & MDD_CAP_TO_MASK(flag))
567 /* capable() is copied from linux kernel! */
568 static inline int mdd_capable(struct md_ucred *uc, int cap)
570 if (mdd_cap_raised(uc->mu_cap, cap))
576 * It's inline, so penalty for filesystems that don't use sticky bit is
579 static inline int mdd_is_sticky(const struct lu_env *env,
580 struct mdd_object *pobj,
581 struct mdd_object *cobj)
583 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
584 struct md_ucred *uc = md_ucred(env);
587 rc = __mdd_la_get(env, cobj, tmp_la);
590 } else if (tmp_la->la_uid == uc->mu_fsuid) {
593 rc = __mdd_la_get(env, pobj, tmp_la);
596 else if (!(tmp_la->la_mode & S_ISVTX))
598 else if (tmp_la->la_uid == uc->mu_fsuid)
601 return !mdd_capable(uc, CAP_FOWNER);
605 /* Check whether it may delete the cobj under the pobj. */
606 static int mdd_may_delete(const struct lu_env *env,
607 struct mdd_object *pobj,
608 struct mdd_object *cobj,
609 int is_dir, int need_check)
611 struct mdd_device *mdd = mdo2mdd(&pobj->mod_obj);
617 if (!lu_object_exists(&cobj->mod_obj.mo_lu))
620 if (mdd_is_immutable(cobj) || mdd_is_append(cobj))
624 if (!S_ISDIR(mdd_object_type(cobj)))
627 if (lu_fid_eq(mdo2fid(cobj), &mdd->mdd_root_fid))
630 } else if (S_ISDIR(mdd_object_type(cobj))) {
635 if (mdd_is_dead_obj(pobj))
638 if (mdd_is_sticky(env, pobj, cobj))
642 rc = mdd_permission_internal(env, pobj,
643 MAY_WRITE | MAY_EXEC);
648 /* get only inode attributes */
649 static int __mdd_iattr_get(const struct lu_env *env,
650 struct mdd_object *mdd_obj, struct md_attr *ma)
655 rc = __mdd_la_get(env, mdd_obj, &ma->ma_attr);
657 ma->ma_valid = MA_INODE;
661 /* get lov EA only */
662 static int __mdd_lmm_get(const struct lu_env *env,
663 struct mdd_object *mdd_obj, struct md_attr *ma)
668 LASSERT(ma->ma_lmm != NULL && ma->ma_lmm_size > 0);
669 rc = mdd_get_md(env, mdd_obj, ma->ma_lmm, &ma->ma_lmm_size,
672 ma->ma_valid |= MA_LOV;
679 static int __mdd_lmv_get(const struct lu_env *env,
680 struct mdd_object *mdd_obj, struct md_attr *ma)
684 rc = mdd_get_md(env, mdd_obj, ma->ma_lmv, &ma->ma_lmv_size,
687 ma->ma_valid |= MA_LMV;
693 static int mdd_attr_get_internal(const struct lu_env *env,
694 struct mdd_object *mdd_obj,
700 if (ma->ma_need & MA_INODE)
701 rc = __mdd_iattr_get(env, mdd_obj, ma);
703 if (rc == 0 && ma->ma_need & MA_LOV) {
704 if (S_ISREG(mdd_object_type(mdd_obj)) ||
705 S_ISDIR(mdd_object_type(mdd_obj)))
706 rc = __mdd_lmm_get(env, mdd_obj, ma);
708 if (rc == 0 && ma->ma_need & MA_LMV) {
709 if (S_ISDIR(mdd_object_type(mdd_obj)))
710 rc = __mdd_lmv_get(env, mdd_obj, ma);
712 CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64"\n",
717 static inline int mdd_attr_get_internal_locked(const struct lu_env *env,
718 struct mdd_object *mdd_obj,
722 mdd_read_lock(env, mdd_obj);
723 rc = mdd_attr_get_internal(env, mdd_obj, ma);
724 mdd_read_unlock(env, mdd_obj);
729 * No permission check is needed.
731 static int mdd_attr_get(const struct lu_env *env, struct md_object *obj,
734 struct mdd_object *mdd_obj = md2mdd_obj(obj);
738 rc = mdd_attr_get_internal_locked(env, mdd_obj, ma);
743 * No permission check is needed.
745 static int mdd_xattr_get(const struct lu_env *env,
746 struct md_object *obj, struct lu_buf *buf,
749 struct mdd_object *mdd_obj = md2mdd_obj(obj);
750 struct dt_object *next;
755 LASSERT(lu_object_exists(&obj->mo_lu));
757 next = mdd_object_child(mdd_obj);
758 mdd_read_lock(env, mdd_obj);
759 rc = next->do_ops->do_xattr_get(env, next, buf, name);
760 mdd_read_unlock(env, mdd_obj);
766 * Permission check is done when open,
767 * no need check again.
769 static int mdd_readlink(const struct lu_env *env, struct md_object *obj,
772 struct mdd_object *mdd_obj = md2mdd_obj(obj);
773 struct dt_object *next;
778 LASSERT(lu_object_exists(&obj->mo_lu));
780 next = mdd_object_child(mdd_obj);
781 mdd_read_lock(env, mdd_obj);
782 rc = next->do_body_ops->dbo_read(env, next, buf, &pos);
783 mdd_read_unlock(env, mdd_obj);
787 static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj,
790 struct mdd_object *mdd_obj = md2mdd_obj(obj);
791 struct dt_object *next;
796 LASSERT(lu_object_exists(&obj->mo_lu));
798 next = mdd_object_child(mdd_obj);
799 mdd_read_lock(env, mdd_obj);
800 rc = next->do_ops->do_xattr_list(env, next, buf);
801 mdd_read_unlock(env, mdd_obj);
806 static int mdd_txn_start_cb(const struct lu_env *env,
807 struct txn_param *param, void *cookie)
812 static int mdd_txn_stop_cb(const struct lu_env *env,
813 struct thandle *txn, void *cookie)
815 struct mdd_device *mdd = cookie;
816 struct obd_device *obd = mdd2obd_dev(mdd);
819 return mds_lov_write_objids(obd);
822 static int mdd_txn_commit_cb(const struct lu_env *env,
823 struct thandle *txn, void *cookie)
828 static int mdd_device_init(const struct lu_env *env,
829 struct lu_device *d, struct lu_device *next)
831 struct mdd_device *mdd = lu2mdd_dev(d);
832 struct dt_device *dt;
836 mdd->mdd_child = lu2dt_dev(next);
839 /* prepare transactions callbacks */
840 mdd->mdd_txn_cb.dtc_txn_start = mdd_txn_start_cb;
841 mdd->mdd_txn_cb.dtc_txn_stop = mdd_txn_stop_cb;
842 mdd->mdd_txn_cb.dtc_txn_commit = mdd_txn_commit_cb;
843 mdd->mdd_txn_cb.dtc_cookie = mdd;
845 /* init txn credits */
846 init_rwsem(&mdd_txn_sem);
850 static struct lu_device *mdd_device_fini(const struct lu_env *env,
853 struct mdd_device *mdd = lu2mdd_dev(d);
854 struct lu_device *next = &mdd->mdd_child->dd_lu_dev;
859 static int mdd_mount(const struct lu_env *env, struct mdd_device *mdd)
862 struct dt_object *root;
865 dt_txn_callback_add(mdd->mdd_child, &mdd->mdd_txn_cb);
866 root = dt_store_open(env, mdd->mdd_child, mdd_root_dir_name,
869 LASSERT(root != NULL);
870 lu_object_put(env, &root->do_lu);
871 rc = orph_index_init(env, mdd);
878 static void mdd_device_shutdown(const struct lu_env *env,
879 struct mdd_device *m)
881 dt_txn_callback_del(m->mdd_child, &m->mdd_txn_cb);
883 mdd_fini_obd(env, m);
884 orph_index_fini(env, m);
887 static int mdd_process_config(const struct lu_env *env,
888 struct lu_device *d, struct lustre_cfg *cfg)
890 struct mdd_device *m = lu2mdd_dev(d);
891 struct dt_device *dt = m->mdd_child;
892 struct lu_device *next = &dt->dd_lu_dev;
896 switch (cfg->lcfg_command) {
898 rc = next->ld_ops->ldo_process_config(env, next, cfg);
901 dt->dd_ops->dt_conf_get(env, dt, &m->mdd_dt_conf);
903 rc = mdd_init_obd(env, m, cfg);
905 CERROR("lov init error %d \n", rc);
908 rc = mdd_mount(env, m);
913 mdd_device_shutdown(env, m);
915 rc = next->ld_ops->ldo_process_config(env, next, cfg);
922 static int mdd_recovery_complete(const struct lu_env *env,
925 struct mdd_device *mdd = lu2mdd_dev(d);
926 struct lu_device *next = &mdd->mdd_child->dd_lu_dev;
927 struct obd_device *obd = mdd2obd_dev(mdd);
931 rc = mdd_lov_set_nextid(env, mdd);
933 CERROR("%s: mdd_lov_set_nextid failed %d\n",
937 rc = mdd_cleanup_unlink_llog(env, mdd);
939 obd_notify(obd->u.mds.mds_osc_obd, NULL,
940 obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK :
941 OBD_NOTIFY_SYNC, NULL);
946 obd->obd_recovering = 0;
947 obd->obd_type->typ_dt_ops->o_postrecov(obd);
948 /* TODO: orphans handling */
949 __mdd_orphan_cleanup(env, mdd);
950 rc = next->ld_ops->ldo_recovery_complete(env, next);
955 struct lu_device_operations mdd_lu_ops = {
956 .ldo_object_alloc = mdd_object_alloc,
957 .ldo_process_config = mdd_process_config,
958 .ldo_recovery_complete = mdd_recovery_complete
961 void mdd_write_lock(const struct lu_env *env, struct mdd_object *obj)
963 struct dt_object *next = mdd_object_child(obj);
965 next->do_ops->do_write_lock(env, next);
968 void mdd_read_lock(const struct lu_env *env, struct mdd_object *obj)
970 struct dt_object *next = mdd_object_child(obj);
972 next->do_ops->do_read_lock(env, next);
975 void mdd_write_unlock(const struct lu_env *env, struct mdd_object *obj)
977 struct dt_object *next = mdd_object_child(obj);
979 next->do_ops->do_write_unlock(env, next);
982 void mdd_read_unlock(const struct lu_env *env, struct mdd_object *obj)
984 struct dt_object *next = mdd_object_child(obj);
986 next->do_ops->do_read_unlock(env, next);
989 static void mdd_lock2(const struct lu_env *env,
990 struct mdd_object *o0, struct mdd_object *o1)
992 mdd_write_lock(env, o0);
993 mdd_write_lock(env, o1);
996 static void mdd_unlock2(const struct lu_env *env,
997 struct mdd_object *o0, struct mdd_object *o1)
999 mdd_write_unlock(env, o1);
1000 mdd_write_unlock(env, o0);
1003 static struct thandle* mdd_trans_start(const struct lu_env *env,
1004 struct mdd_device *mdd)
1006 struct txn_param *p = &mdd_env_info(env)->mti_param;
1008 return mdd_child_ops(mdd)->dt_trans_start(env, mdd->mdd_child, p);
1011 static void mdd_trans_stop(const struct lu_env *env,
1012 struct mdd_device *mdd, int result,
1013 struct thandle *handle)
1015 handle->th_result = result;
1016 mdd_child_ops(mdd)->dt_trans_stop(env, handle);
1019 static int __mdd_object_create(const struct lu_env *env,
1020 struct mdd_object *obj, struct md_attr *ma,
1021 struct thandle *handle)
1023 struct dt_object *next;
1024 struct lu_attr *attr = &ma->ma_attr;
1028 if (!lu_object_exists(mdd2lu_obj(obj))) {
1029 next = mdd_object_child(obj);
1030 rc = next->do_ops->do_create(env, next, attr, handle);
1034 LASSERT(ergo(rc == 0, lu_object_exists(mdd2lu_obj(obj))));
1039 int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *o,
1040 const struct lu_attr *attr, struct thandle *handle)
1042 struct dt_object *next;
1044 LASSERT(lu_object_exists(mdd2lu_obj(o)));
1045 next = mdd_object_child(o);
1046 return next->do_ops->do_attr_set(env, next, attr, handle);
1049 int mdd_attr_set_internal_locked(const struct lu_env *env,
1050 struct mdd_object *o,
1051 const struct lu_attr *attr,
1052 struct thandle *handle)
1055 mdd_write_lock(env, o);
1056 rc = mdd_attr_set_internal(env, o, attr, handle);
1057 mdd_write_unlock(env, o);
1061 static int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *o,
1062 const struct lu_buf *buf, const char *name,
1063 int fl, struct thandle *handle)
1065 struct dt_object *next;
1069 LASSERT(lu_object_exists(mdd2lu_obj(o)));
1070 next = mdd_object_child(o);
1071 if (buf->lb_buf && buf->lb_len > 0) {
1072 rc = next->do_ops->do_xattr_set(env, next, buf, name,
1074 } else if (buf->lb_buf == NULL && buf->lb_len == 0) {
1075 rc = next->do_ops->do_xattr_del(env, next, name, handle);
1080 /* this gives the same functionality as the code between
1081 * sys_chmod and inode_setattr
1082 * chown_common and inode_setattr
1083 * utimes and inode_setattr
1084 * This API is ported from mds_fix_attr but remove some unnecesssary stuff.
1087 int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
1090 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1091 struct md_ucred *uc = md_ucred(env);
1092 time_t now = CURRENT_SECONDS;
1099 /* Do not permit change file type */
1100 if (la->la_valid & LA_TYPE)
1103 /* They should not be processed by setattr */
1104 if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
1107 rc = __mdd_la_get(env, obj, tmp_la);
1111 if (mdd_is_immutable(obj) || mdd_is_append(obj)) {
1114 * If only change flags of the object, we should
1115 * let it pass, but also need capability check
1116 * here if (!capable(CAP_LINUX_IMMUTABLE)),
1117 * fix it, when implement capable in mds
1119 if (la->la_valid & ~LA_FLAGS)
1122 if (!mdd_capable(uc, CAP_LINUX_IMMUTABLE))
1125 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1126 !mdd_capable(uc, CAP_FOWNER))
1130 * According to Ext3 implementation on this, the
1131 * Ctime will be changed, but not clear why?
1134 la->la_valid |= LA_CTIME;
1138 /* Check for setting the obj time. */
1139 if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) &&
1140 !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) {
1141 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1142 !mdd_capable(uc, CAP_FOWNER))
1146 /* Make sure a caller can chmod. */
1147 if (la->la_valid & LA_MODE) {
1149 * Bypass la_vaild == LA_MODE,
1150 * this is for changing file with SUID or SGID.
1152 if ((la->la_valid & ~LA_MODE) &&
1153 (uc->mu_fsuid != tmp_la->la_uid) &&
1154 !mdd_capable(uc, CAP_FOWNER))
1157 if (la->la_mode == (umode_t) -1)
1158 la->la_mode = tmp_la->la_mode;
1160 la->la_mode = (la->la_mode & S_IALLUGO) |
1161 (tmp_la->la_mode & ~S_IALLUGO);
1163 /* Also check the setgid bit! */
1164 if (!mdd_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid :
1165 tmp_la->la_gid) && !mdd_capable(uc, CAP_FSETID))
1166 la->la_mode &= ~S_ISGID;
1168 la->la_mode = tmp_la->la_mode;
1171 /* Make sure a caller can chown. */
1172 if (la->la_valid & LA_UID) {
1173 if (la->la_uid == (uid_t) -1)
1174 la->la_uid = tmp_la->la_uid;
1175 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1176 (la->la_uid != tmp_la->la_uid)) &&
1177 !mdd_capable(uc, CAP_CHOWN))
1181 * If the user or group of a non-directory has been
1182 * changed by a non-root user, remove the setuid bit.
1183 * 19981026 David C Niemi <niemi@tux.org>
1185 * Changed this to apply to all users, including root,
1186 * to avoid some races. This is the behavior we had in
1187 * 2.0. The check for non-root was definitely wrong
1188 * for 2.2 anyway, as it should have been using
1189 * CAP_FSETID rather than fsuid -- 19990830 SD.
1191 if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
1192 !S_ISDIR(tmp_la->la_mode)) {
1193 la->la_mode &= ~S_ISUID;
1194 la->la_valid |= LA_MODE;
1198 /* Make sure caller can chgrp. */
1199 if (la->la_valid & LA_GID) {
1200 if (la->la_gid == (gid_t) -1)
1201 la->la_gid = tmp_la->la_gid;
1202 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1203 ((la->la_gid != tmp_la->la_gid) &&
1204 !mdd_in_group_p(uc, la->la_gid))) &&
1205 !mdd_capable(uc, CAP_CHOWN))
1209 * Likewise, if the user or group of a non-directory
1210 * has been changed by a non-root user, remove the
1211 * setgid bit UNLESS there is no group execute bit
1212 * (this would be a file marked for mandatory
1213 * locking). 19981026 David C Niemi <niemi@tux.org>
1215 * Removed the fsuid check (see the comment above) --
1218 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
1219 (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
1220 la->la_mode &= ~S_ISGID;
1221 la->la_valid |= LA_MODE;
1225 /* For tuncate (or setsize), we should have MAY_WRITE perm */
1226 if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
1227 rc = mdd_permission_internal(env, obj, MAY_WRITE);
1232 * For the "Size-on-MDS" setattr update, merge coming
1233 * attributes with the set in the inode. BUG 10641
1235 if ((la->la_valid & LA_ATIME) &&
1236 (la->la_atime < tmp_la->la_atime))
1237 la->la_valid &= ~LA_ATIME;
1239 if ((la->la_valid & LA_CTIME) &&
1240 (la->la_ctime < tmp_la->la_ctime))
1241 la->la_valid &= ~(LA_MTIME | LA_CTIME);
1243 if (!(la->la_valid & LA_MTIME) && (now > tmp_la->la_mtime)) {
1245 la->la_valid |= LA_MTIME;
1249 /* For last, ctime must be fixed */
1250 if (!(la->la_valid & LA_CTIME) && (now > tmp_la->la_ctime)) {
1252 la->la_valid |= LA_CTIME;
1258 /* set attr and LOV EA at once, return updated attr */
1259 static int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
1260 const struct md_attr *ma)
1262 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1263 struct mdd_device *mdd = mdo2mdd(obj);
1264 struct thandle *handle;
1265 struct lov_mds_md *lmm = NULL;
1266 int rc = 0, lmm_size = 0, max_size = 0;
1267 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1270 mdd_txn_param_build(env, MDD_TXN_ATTR_SET_OP);
1271 handle = mdd_trans_start(env, mdd);
1273 RETURN(PTR_ERR(handle));
1274 /*TODO: add lock here*/
1275 /* start a log jounal handle if needed */
1276 if (S_ISREG(mdd_object_type(mdd_obj)) &&
1277 ma->ma_attr.la_valid & (LA_UID | LA_GID)) {
1278 max_size = mdd_lov_mdsize(env, mdd);
1279 OBD_ALLOC(lmm, max_size);
1281 GOTO(cleanup, rc = -ENOMEM);
1283 rc = mdd_get_md_locked(env, mdd_obj, lmm, &lmm_size,
1290 if (ma->ma_attr.la_valid & (ATTR_MTIME | ATTR_CTIME))
1291 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
1292 ma->ma_attr.la_mtime, ma->ma_attr.la_ctime);
1294 *la_copy = ma->ma_attr;
1295 mdd_write_lock(env, mdd_obj);
1296 rc = mdd_fix_attr(env, mdd_obj, la_copy);
1297 mdd_write_unlock(env, mdd_obj);
1301 if (la_copy->la_valid & LA_FLAGS) {
1302 rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
1305 mdd_flags_xlate(mdd_obj, la_copy->la_flags);
1306 } else if (la_copy->la_valid) { /* setattr */
1307 rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
1309 /* journal chown/chgrp in llog, just like unlink */
1310 if (rc == 0 && lmm_size){
1311 /*TODO set_attr llog */
1315 if (rc == 0 && ma->ma_valid & MA_LOV) {
1318 mode = mdd_object_type(mdd_obj);
1319 if (S_ISREG(mode) || S_ISDIR(mode)) {
1320 /*TODO check permission*/
1321 rc = mdd_lov_set_md(env, NULL, mdd_obj, ma->ma_lmm,
1322 ma->ma_lmm_size, handle, 1);
1327 mdd_trans_stop(env, mdd, rc, handle);
1328 if (rc == 0 && lmm_size) {
1329 /*set obd attr, if needed*/
1330 rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size);
1333 OBD_FREE(lmm, max_size);
1339 int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj,
1340 const struct lu_buf *buf, const char *name, int fl,
1341 struct thandle *handle)
1346 mdd_write_lock(env, obj);
1347 rc = __mdd_xattr_set(env, obj, buf, name, fl, handle);
1348 mdd_write_unlock(env, obj);
1353 static int mdd_xattr_sanity_check(const struct lu_env *env,
1354 struct mdd_object *obj)
1356 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1357 struct md_ucred *uc = md_ucred(env);
1361 if (mdd_is_immutable(obj) || mdd_is_append(obj))
1364 mdd_read_lock(env, obj);
1365 rc = __mdd_la_get(env, obj, tmp_la);
1366 mdd_read_unlock(env, obj);
1370 if ((uc->mu_fsuid != tmp_la->la_uid) && !mdd_capable(uc, CAP_FOWNER))
1376 static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
1377 const struct lu_buf *buf, const char *name, int fl)
1379 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1380 struct mdd_device *mdd = mdo2mdd(obj);
1381 struct thandle *handle;
1385 rc = mdd_xattr_sanity_check(env, mdd_obj);
1389 mdd_txn_param_build(env, MDD_TXN_XATTR_SET_OP);
1390 handle = mdd_trans_start(env, mdd);
1392 RETURN(PTR_ERR(handle));
1394 rc = mdd_xattr_set_txn(env, md2mdd_obj(obj), buf, name,
1396 #ifdef HAVE_SPLIT_SUPPORT
1398 /* very ugly hack, if setting lmv, it means splitting
1399 * sucess, we should return -ERESTART to notify the
1400 * client, so transno for this splitting should be
1401 * zero according to the replay rules. so return -ERESTART
1402 * here let mdt trans stop callback know this.
1404 if (strncmp(name, MDS_LMV_MD_NAME, strlen(name)) == 0)
1408 mdd_trans_stop(env, mdd, rc, handle);
1413 static int __mdd_xattr_del(const struct lu_env *env,struct mdd_device *mdd,
1414 struct mdd_object *obj,
1415 const char *name, struct thandle *handle)
1417 struct dt_object *next;
1419 LASSERT(lu_object_exists(mdd2lu_obj(obj)));
1420 next = mdd_object_child(obj);
1421 return next->do_ops->do_xattr_del(env, next, name, handle);
1424 int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
1427 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1428 struct mdd_device *mdd = mdo2mdd(obj);
1429 struct thandle *handle;
1433 rc = mdd_xattr_sanity_check(env, mdd_obj);
1437 mdd_txn_param_build(env, MDD_TXN_XATTR_SET_OP);
1438 handle = mdd_trans_start(env, mdd);
1440 RETURN(PTR_ERR(handle));
1442 mdd_write_lock(env, mdd_obj);
1443 rc = __mdd_xattr_del(env, mdd, md2mdd_obj(obj), name, handle);
1444 mdd_write_unlock(env, mdd_obj);
1446 mdd_trans_stop(env, mdd, rc, handle);
1451 static int __mdd_index_insert_only(const struct lu_env *env,
1452 struct mdd_object *pobj,
1453 const struct lu_fid *lf,
1454 const char *name, struct thandle *th)
1457 struct dt_object *next = mdd_object_child(pobj);
1460 if (dt_try_as_dir(env, next))
1461 rc = next->do_index_ops->dio_insert(env, next,
1462 (struct dt_rec *)lf,
1463 (struct dt_key *)name, th);
1469 /* insert new index, add reference if isdir, update times */
1470 static int __mdd_index_insert(const struct lu_env *env,
1471 struct mdd_object *pobj, const struct lu_fid *lf,
1472 const char *name, int isdir, struct thandle *th)
1475 struct dt_object *next = mdd_object_child(pobj);
1479 struct lu_attr *la = &mdd_env_info(env)->mti_la;
1482 if (dt_try_as_dir(env, next))
1483 rc = next->do_index_ops->dio_insert(env, next,
1484 (struct dt_rec *)lf,
1485 (struct dt_key *)name,
1492 __mdd_ref_add(env, pobj, th);
1494 la->la_valid = LA_MTIME|LA_CTIME;
1495 la->la_atime = ma->ma_attr.la_atime;
1496 la->la_ctime = ma->ma_attr.la_ctime;
1497 rc = mdd_attr_set_internal(env, mdd_obj, la, handle);
1503 static int __mdd_index_delete(const struct lu_env *env,
1504 struct mdd_object *pobj, const char *name,
1505 int is_dir, struct thandle *handle)
1508 struct dt_object *next = mdd_object_child(pobj);
1511 if (dt_try_as_dir(env, next)) {
1512 rc = next->do_index_ops->dio_delete(env, next,
1513 (struct dt_key *)name,
1515 if (rc == 0 && is_dir)
1516 __mdd_ref_del(env, pobj, handle);
1522 static int mdd_link_sanity_check(const struct lu_env *env,
1523 struct mdd_object *tgt_obj,
1524 struct mdd_object *src_obj)
1529 rc = mdd_may_create(env, tgt_obj, NULL, 1);
1533 if (S_ISDIR(mdd_object_type(src_obj)))
1536 if (mdd_is_immutable(src_obj) || mdd_is_append(src_obj))
1542 static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
1543 struct md_object *src_obj, const char *name,
1546 struct mdd_object *mdd_tobj = md2mdd_obj(tgt_obj);
1547 struct mdd_object *mdd_sobj = md2mdd_obj(src_obj);
1548 struct mdd_device *mdd = mdo2mdd(src_obj);
1549 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1550 struct thandle *handle;
1554 mdd_txn_param_build(env, MDD_TXN_LINK_OP);
1555 handle = mdd_trans_start(env, mdd);
1557 RETURN(PTR_ERR(handle));
1559 mdd_lock2(env, mdd_tobj, mdd_sobj);
1561 rc = mdd_link_sanity_check(env, mdd_tobj, mdd_sobj);
1565 rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj),
1568 __mdd_ref_add(env, mdd_sobj, handle);
1570 *la_copy = ma->ma_attr;
1571 la_copy->la_valid = LA_CTIME;
1572 rc = mdd_attr_set_internal(env, mdd_sobj, la_copy, handle);
1576 la_copy->la_valid = LA_CTIME | LA_MTIME;
1577 rc = mdd_attr_set_internal(env, mdd_tobj, la_copy, handle);
1580 mdd_unlock2(env, mdd_tobj, mdd_sobj);
1581 mdd_trans_stop(env, mdd, rc, handle);
1586 * Check that @dir contains no entries except (possibly) dot and dotdot.
1591 * -ENOTEMPTY not empty
1595 static int mdd_dir_is_empty(const struct lu_env *env,
1596 struct mdd_object *dir)
1599 struct dt_object *obj;
1600 struct dt_it_ops *iops;
1603 obj = mdd_object_child(dir);
1604 iops = &obj->do_index_ops->dio_it;
1605 it = iops->init(env, obj, 0);
1607 result = iops->get(env, it, (const void *)"");
1610 for (result = 0, i = 0; result == 0 && i < 3; ++i)
1611 result = iops->next(env, it);
1613 result = -ENOTEMPTY;
1614 else if (result == +1)
1616 } else if (result == 0)
1618 * Huh? Index contains no zero key?
1623 iops->fini(env, it);
1629 /* return md_attr back,
1630 * if it is last unlink then return lov ea + llog cookie*/
1631 int __mdd_object_kill(const struct lu_env *env,
1632 struct mdd_object *obj,
1638 mdd_set_dead_obj(obj);
1639 if (S_ISREG(mdd_object_type(obj))) {
1640 /* Return LOV & COOKIES unconditionally here. We clean evth up.
1641 * Caller must be ready for that. */
1642 rc = __mdd_lmm_get(env, obj, ma);
1643 if ((ma->ma_valid & MA_LOV))
1644 rc = mdd_unlink_log(env, mdo2mdd(&obj->mod_obj),
1650 /* caller should take a lock before calling */
1651 static int __mdd_finish_unlink(const struct lu_env *env,
1652 struct mdd_object *obj, struct md_attr *ma,
1658 rc = __mdd_iattr_get(env, obj, ma);
1659 if (rc == 0 && ma->ma_attr.la_nlink == 0) {
1660 /* add new orphan and the object
1661 * will be deleted during the object_put() */
1662 if (__mdd_orphan_add(env, obj, th) == 0)
1663 set_bit(LU_OBJECT_ORPHAN,
1664 &mdd2lu_obj(obj)->lo_header->loh_flags);
1666 if (obj->mod_count == 0)
1667 rc = __mdd_object_kill(env, obj, ma);
1672 static int mdd_unlink_sanity_check(const struct lu_env *env,
1673 struct mdd_object *pobj,
1674 struct mdd_object *cobj,
1677 struct dt_object *dt_cobj = mdd_object_child(cobj);
1681 rc = mdd_may_delete(env, pobj, cobj,
1682 S_ISDIR(ma->ma_attr.la_mode), 1);
1686 if (S_ISDIR(mdd_object_type(cobj))) {
1687 if (dt_try_as_dir(env, dt_cobj))
1688 rc = mdd_dir_is_empty(env, cobj);
1696 static int mdd_unlink(const struct lu_env *env,
1697 struct md_object *pobj, struct md_object *cobj,
1698 const char *name, struct md_attr *ma)
1700 struct mdd_device *mdd = mdo2mdd(pobj);
1701 struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
1702 struct mdd_object *mdd_cobj = md2mdd_obj(cobj);
1703 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1704 struct thandle *handle;
1708 mdd_txn_param_build(env, MDD_TXN_UNLINK_OP);
1709 handle = mdd_trans_start(env, mdd);
1711 RETURN(PTR_ERR(handle));
1713 mdd_lock2(env, mdd_pobj, mdd_cobj);
1715 rc = mdd_unlink_sanity_check(env, mdd_pobj, mdd_cobj, ma);
1719 is_dir = S_ISDIR(lu_object_attr(&cobj->mo_lu));
1720 rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle);
1724 __mdd_ref_del(env, mdd_cobj, handle);
1725 *la_copy = ma->ma_attr;
1728 __mdd_ref_del(env, mdd_cobj, handle);
1730 la_copy->la_valid = LA_CTIME;
1731 rc = mdd_attr_set_internal(env, mdd_cobj, la_copy, handle);
1736 la_copy->la_valid = LA_CTIME | LA_MTIME;
1737 rc = mdd_attr_set_internal(env, mdd_pobj, la_copy, handle);
1741 rc = __mdd_finish_unlink(env, mdd_cobj, ma, handle);
1744 obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp,
1745 strlen("unlinked"), "unlinked", 0,
1749 mdd_unlock2(env, mdd_pobj, mdd_cobj);
1750 mdd_trans_stop(env, mdd, rc, handle);
1754 /* partial unlink */
1755 static int mdd_ref_del(const struct lu_env *env, struct md_object *obj,
1758 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1759 struct mdd_device *mdd = mdo2mdd(obj);
1760 struct thandle *handle;
1764 mdd_txn_param_build(env, MDD_TXN_XATTR_SET_OP);
1765 handle = mdd_trans_start(env, mdd);
1769 mdd_write_lock(env, mdd_obj);
1771 rc = mdd_unlink_sanity_check(env, NULL, mdd_obj, ma);
1775 __mdd_ref_del(env, mdd_obj, handle);
1777 if (S_ISDIR(lu_object_attr(&obj->mo_lu))) {
1779 __mdd_ref_del(env, mdd_obj, handle);
1782 rc = __mdd_finish_unlink(env, mdd_obj, ma, handle);
1786 mdd_write_unlock(env, mdd_obj);
1787 mdd_trans_stop(env, mdd, rc, handle);
1791 static int mdd_parent_fid(const struct lu_env *env,
1792 struct mdd_object *obj,
1795 return __mdd_lookup_locked(env, &obj->mod_obj,
1800 * return 1: if lf is the fid of the ancestor of p1;
1803 * return -EREMOTE: if remote object is found, in this
1804 * case fid of remote object is saved to @pf;
1806 * otherwise: values < 0, errors.
1808 static int mdd_is_parent(const struct lu_env *env,
1809 struct mdd_device *mdd,
1810 struct mdd_object *p1,
1811 const struct lu_fid *lf,
1814 struct mdd_object *parent = NULL;
1815 struct lu_fid *pfid;
1819 LASSERT(!lu_fid_eq(mdo2fid(p1), lf));
1820 pfid = &mdd_env_info(env)->mti_fid;
1822 /* Do not lookup ".." in root, they do not exist there. */
1823 if (lu_fid_eq(mdo2fid(p1), &mdd->mdd_root_fid))
1827 rc = mdd_parent_fid(env, p1, pfid);
1830 if (lu_fid_eq(pfid, &mdd->mdd_root_fid))
1832 if (lu_fid_eq(pfid, lf))
1835 mdd_object_put(env, parent);
1836 parent = mdd_object_find(env, mdd, pfid);
1838 /* cross-ref parent */
1839 if (parent == NULL) {
1842 GOTO(out, rc = EREMOTE);
1843 } else if (IS_ERR(parent))
1844 GOTO(out, rc = PTR_ERR(parent));
1849 if (parent && !IS_ERR(parent))
1850 mdd_object_put(env, parent);
1854 static int mdd_rename_lock(const struct lu_env *env,
1855 struct mdd_device *mdd,
1856 struct mdd_object *src_pobj,
1857 struct mdd_object *tgt_pobj)
1862 if (src_pobj == tgt_pobj) {
1863 mdd_write_lock(env, src_pobj);
1867 /* compared the parent child relationship of src_p&tgt_p */
1868 if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(src_pobj))){
1869 mdd_lock2(env, src_pobj, tgt_pobj);
1871 } else if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(tgt_pobj))) {
1872 mdd_lock2(env, tgt_pobj, src_pobj);
1876 rc = mdd_is_parent(env, mdd, src_pobj, mdo2fid(tgt_pobj), NULL);
1881 mdd_lock2(env, tgt_pobj, src_pobj);
1885 mdd_lock2(env, src_pobj, tgt_pobj);
1890 static void mdd_rename_unlock(const struct lu_env *env,
1891 struct mdd_object *src_pobj,
1892 struct mdd_object *tgt_pobj)
1894 mdd_write_unlock(env, src_pobj);
1895 if (src_pobj != tgt_pobj)
1896 mdd_write_unlock(env, tgt_pobj);
1899 static int mdd_rename_sanity_check(const struct lu_env *env,
1900 struct mdd_object *src_pobj,
1901 struct mdd_object *tgt_pobj,
1902 const struct lu_fid *sfid,
1904 struct mdd_object *sobj,
1905 struct mdd_object *tobj)
1907 struct mdd_device *mdd = mdo2mdd(&src_pobj->mod_obj);
1908 int rc = 0, need_check = 1;
1911 mdd_read_lock(env, src_pobj);
1912 rc = mdd_may_delete(env, src_pobj, sobj, src_is_dir, need_check);
1913 mdd_read_unlock(env, src_pobj);
1917 if (src_pobj == tgt_pobj)
1921 mdd_read_lock(env, tgt_pobj);
1922 rc = mdd_may_create(env, tgt_pobj, NULL, need_check);
1923 mdd_read_unlock(env, tgt_pobj);
1925 mdd_read_lock(env, tgt_pobj);
1926 rc = mdd_may_delete(env, tgt_pobj, tobj, src_is_dir,
1928 mdd_read_unlock(env, tgt_pobj);
1929 if (!rc && S_ISDIR(mdd_object_type(tobj)) &&
1930 mdd_dir_is_empty(env, tobj))
1934 /* source should not be ancestor of target dir */
1935 if (!rc && src_is_dir && mdd_is_parent(env, mdd, tgt_pobj, sfid, NULL))
1940 /* src object can be remote that is why we use only fid and type of object */
1941 static int mdd_rename(const struct lu_env *env,
1942 struct md_object *src_pobj, struct md_object *tgt_pobj,
1943 const struct lu_fid *lf, const char *sname,
1944 struct md_object *tobj, const char *tname,
1947 struct mdd_device *mdd = mdo2mdd(src_pobj);
1948 struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj);
1949 struct mdd_object *mdd_tpobj = md2mdd_obj(tgt_pobj);
1950 struct mdd_object *mdd_sobj = mdd_object_find(env, mdd, lf);
1951 struct mdd_object *mdd_tobj = NULL;
1952 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1953 struct thandle *handle;
1958 LASSERT(ma->ma_attr.la_mode & S_IFMT);
1959 is_dir = S_ISDIR(ma->ma_attr.la_mode);
1960 if (ma->ma_attr.la_valid & LA_FLAGS &&
1961 ma->ma_attr.la_flags & (LUSTRE_APPEND_FL | LUSTRE_IMMUTABLE_FL))
1962 GOTO(out, rc = -EPERM);
1965 mdd_tobj = md2mdd_obj(tobj);
1967 /*XXX: shouldn't this check be done under lock below? */
1968 rc = mdd_rename_sanity_check(env, mdd_spobj, mdd_tpobj,
1969 lf, is_dir, mdd_sobj, mdd_tobj);
1973 mdd_txn_param_build(env, MDD_TXN_RENAME_OP);
1974 handle = mdd_trans_start(env, mdd);
1976 GOTO(out, rc = PTR_ERR(handle));
1978 /*FIXME: Should consider tobj and sobj too in rename_lock*/
1979 rc = mdd_rename_lock(env, mdd, mdd_spobj, mdd_tpobj);
1981 GOTO(cleanup_unlocked, rc);
1983 rc = __mdd_index_delete(env, mdd_spobj, sname, is_dir, handle);
1987 /* tobj can be remote one,
1988 * so we do index_delete unconditionally and -ENOENT is allowed */
1989 rc = __mdd_index_delete(env, mdd_tpobj, tname, is_dir, handle);
1990 if (rc != 0 && rc != -ENOENT)
1993 rc = __mdd_index_insert(env, mdd_tpobj, lf, tname, is_dir, handle);
1997 *la_copy = ma->ma_attr;
1998 la_copy->la_valid = LA_CTIME;
2000 /*XXX: how to update ctime for remote sobj? */
2001 rc = mdd_attr_set_internal_locked(env, mdd_sobj, la_copy, handle);
2005 if (tobj && lu_object_exists(&tobj->mo_lu)) {
2006 mdd_write_lock(env, mdd_tobj);
2007 __mdd_ref_del(env, mdd_tobj, handle);
2008 /* remove dot reference */
2010 __mdd_ref_del(env, mdd_tobj, handle);
2012 la_copy->la_valid = LA_CTIME;
2013 rc = mdd_attr_set_internal(env, mdd_tobj, la_copy, handle);
2017 rc = __mdd_finish_unlink(env, mdd_tobj, ma, handle);
2018 mdd_write_unlock(env, mdd_tobj);
2023 la_copy->la_valid = LA_CTIME | LA_MTIME;
2024 rc = mdd_attr_set_internal(env, mdd_spobj, la_copy, handle);
2028 if (mdd_spobj != mdd_tpobj) {
2029 la_copy->la_valid = LA_CTIME | LA_MTIME;
2030 rc = mdd_attr_set_internal(env, mdd_tpobj, la_copy, handle);
2034 mdd_rename_unlock(env, mdd_spobj, mdd_tpobj);
2036 mdd_trans_stop(env, mdd, rc, handle);
2039 mdd_object_put(env, mdd_sobj);
2044 __mdd_lookup(const struct lu_env *env, struct md_object *pobj,
2045 const char *name, const struct lu_fid* fid, int mask)
2047 struct mdd_object *mdd_obj = md2mdd_obj(pobj);
2048 struct dt_object *dir = mdd_object_child(mdd_obj);
2049 struct dt_rec *rec = (struct dt_rec *)fid;
2050 const struct dt_key *key = (const struct dt_key *)name;
2054 if (mdd_is_dead_obj(mdd_obj))
2057 if (mask == MAY_EXEC)
2058 rc = mdd_exec_permission_lite(env, mdd_obj);
2060 rc = mdd_permission_internal(env, mdd_obj, mask);
2064 if (S_ISDIR(mdd_object_type(mdd_obj)) && dt_try_as_dir(env, dir))
2065 rc = dir->do_index_ops->dio_lookup(env, dir, rec, key);
2073 __mdd_lookup_locked(const struct lu_env *env, struct md_object *pobj,
2074 const char *name, const struct lu_fid* fid, int mask)
2076 struct mdd_object *mdd_obj = md2mdd_obj(pobj);
2079 mdd_read_lock(env, mdd_obj);
2080 rc = __mdd_lookup(env, pobj, name, fid, mask);
2081 mdd_read_unlock(env, mdd_obj);
2086 static int mdd_lookup(const struct lu_env *env,
2087 struct md_object *pobj, const char *name,
2092 rc = __mdd_lookup_locked(env, pobj, name, fid, MAY_EXEC);
2097 * returns 1: if fid is ancestor of @mo;
2098 * returns 0: if fid is not a ancestor of @mo;
2100 * returns EREMOTE if remote object is found, fid of remote object is saved to
2103 * returns < 0: if error
2105 static int mdd_is_subdir(const struct lu_env *env,
2106 struct md_object *mo, const struct lu_fid *fid,
2107 struct lu_fid *sfid)
2109 struct mdd_device *mdd = mdo2mdd(mo);
2113 if (!S_ISDIR(mdd_object_type(md2mdd_obj(mo))))
2116 rc = mdd_is_parent(env, mdd, md2mdd_obj(mo), fid, sfid);
2121 static int __mdd_object_initialize(const struct lu_env *env,
2122 const struct lu_fid *pfid,
2123 struct mdd_object *child,
2124 struct md_attr *ma, struct thandle *handle)
2129 /* update attributes for child.
2131 * (1) the valid bits should be converted between Lustre and Linux;
2132 * (2) maybe, the child attributes should be set in OSD when creation.
2135 rc = mdd_attr_set_internal(env, child, &ma->ma_attr, handle);
2139 if (S_ISDIR(ma->ma_attr.la_mode)) {
2140 /* add . and .. for newly created dir */
2141 __mdd_ref_add(env, child, handle);
2142 rc = __mdd_index_insert_only(env, child, mdo2fid(child),
2145 rc = __mdd_index_insert_only(env, child, pfid,
2150 rc2 = __mdd_index_delete(env,
2151 child, dot, 0, handle);
2153 CERROR("Failure to cleanup after dotdot"
2154 " creation: %d (%d)\n", rc2, rc);
2156 __mdd_ref_del(env, child, handle);
2164 * XXX: Need MAY_WRITE to be checked?
2166 static int mdd_cd_sanity_check(const struct lu_env *env,
2167 struct mdd_object *obj)
2173 if (!obj || mdd_is_dead_obj(obj))
2177 mdd_read_lock(env, obj);
2178 rc = mdd_permission_internal(env, obj, MAY_WRITE);
2179 mdd_read_unlock(env, obj);
2186 static int mdd_create_data(const struct lu_env *env,
2187 struct md_object *pobj, struct md_object *cobj,
2188 const struct md_create_spec *spec,
2191 struct mdd_device *mdd = mdo2mdd(cobj);
2192 struct mdd_object *mdd_pobj = md2mdd_obj(pobj);/* XXX maybe NULL */
2193 struct mdd_object *son = md2mdd_obj(cobj);
2194 struct lu_attr *attr = &ma->ma_attr;
2195 struct lov_mds_md *lmm = NULL;
2197 struct thandle *handle;
2201 rc = mdd_cd_sanity_check(env, son);
2205 if (spec->sp_cr_flags & MDS_OPEN_DELAY_CREATE ||
2206 !(spec->sp_cr_flags & FMODE_WRITE))
2208 rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size, spec,
2213 mdd_txn_param_build(env, MDD_TXN_CREATE_DATA_OP);
2214 handle = mdd_trans_start(env, mdd);
2216 RETURN(rc = PTR_ERR(handle));
2218 /*XXX: setting the lov ea is not locked
2219 * but setting the attr is locked? */
2221 /* replay creates has objects already */
2222 if (spec->u.sp_ea.no_lov_create) {
2223 CDEBUG(D_INFO, "we already have lov ea\n");
2224 rc = mdd_lov_set_md(env, mdd_pobj, son,
2225 (struct lov_mds_md *)spec->u.sp_ea.eadata,
2226 spec->u.sp_ea.eadatalen, handle, 0);
2228 rc = mdd_lov_set_md(env, mdd_pobj, son, lmm,
2229 lmm_size, handle, 0);
2232 rc = mdd_attr_get_internal_locked(env, son, ma);
2234 /* finish mdd_lov_create() stuff */
2235 mdd_lov_create_finish(env, mdd, rc);
2236 mdd_trans_stop(env, mdd, rc, handle);
2238 OBD_FREE(lmm, lmm_size);
2242 static int mdd_create_sanity_check(const struct lu_env *env,
2243 struct md_object *pobj,
2244 const char *name, struct md_attr *ma)
2246 struct mdd_thread_info *info = mdd_env_info(env);
2247 struct lu_attr *la = &info->mti_la;
2248 struct lu_fid *fid = &info->mti_fid;
2249 struct mdd_object *obj = md2mdd_obj(pobj);
2254 if (mdd_is_dead_obj(obj))
2257 rc = __mdd_lookup_locked(env, pobj, name, fid,
2258 MAY_WRITE | MAY_EXEC);
2260 RETURN(rc ? : -EEXIST);
2263 mdd_read_lock(env, obj);
2264 rc = __mdd_la_get(env, obj, la);
2265 mdd_read_unlock(env, obj);
2269 if (la->la_mode & S_ISGID) {
2270 ma->ma_attr.la_gid = la->la_gid;
2271 if (S_ISDIR(ma->ma_attr.la_mode)) {
2272 ma->ma_attr.la_mode |= S_ISGID;
2273 ma->ma_attr.la_valid |= LA_MODE;
2277 switch (ma->ma_attr.la_mode & S_IFMT) {
2295 * Create object and insert it into namespace.
2297 static int mdd_create(const struct lu_env *env,
2298 struct md_object *pobj, const char *name,
2299 struct md_object *child,
2300 const struct md_create_spec *spec,
2303 struct mdd_device *mdd = mdo2mdd(pobj);
2304 struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
2305 struct mdd_object *son = md2mdd_obj(child);
2306 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
2307 struct lu_attr *attr = &ma->ma_attr;
2308 struct lov_mds_md *lmm = NULL;
2309 struct thandle *handle;
2310 int rc, created = 0, inserted = 0, lmm_size = 0;
2313 /* sanity checks before big job */
2314 rc = mdd_create_sanity_check(env, pobj, name, ma);
2318 /* no RPC inside the transaction, so OST objects should be created at
2320 if (S_ISREG(attr->la_mode)) {
2321 rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size,
2327 mdd_txn_param_build(env, MDD_TXN_MKDIR_OP);
2328 handle = mdd_trans_start(env, mdd);
2330 RETURN(PTR_ERR(handle));
2332 mdd_write_lock(env, mdd_pobj);
2335 * XXX check that link can be added to the parent in mkdir case.
2339 * Two operations have to be performed:
2341 * - allocation of new object (->do_create()), and
2343 * - insertion into parent index (->dio_insert()).
2345 * Due to locking, operation order is not important, when both are
2346 * successful, *but* error handling cases are quite different:
2348 * - if insertion is done first, and following object creation fails,
2349 * insertion has to be rolled back, but this operation might fail
2350 * also leaving us with dangling index entry.
2352 * - if creation is done first, is has to be undone if insertion
2353 * fails, leaving us with leaked space, which is neither good, nor
2356 * It seems that creation-first is simplest solution, but it is
2357 * sub-optimal in the frequent
2362 * case, because second mkdir is bound to create object, only to
2363 * destroy it immediately.
2365 * Note that local file systems do
2367 * 0. lookup -> -EEXIST
2373 * Maybe we should do the same. For now: creation-first.
2376 mdd_write_lock(env, son);
2377 rc = __mdd_object_create(env, son, ma, handle);
2379 mdd_write_unlock(env, son);
2385 rc = __mdd_object_initialize(env, mdo2fid(mdd_pobj),
2387 mdd_write_unlock(env, son);
2390 * Object has no links, so it will be destroyed when last
2391 * reference is released. (XXX not now.)
2395 rc = __mdd_index_insert(env, mdd_pobj, mdo2fid(son),
2396 name, S_ISDIR(attr->la_mode), handle);
2402 /* replay creates has objects already */
2403 if (spec->u.sp_ea.no_lov_create) {
2404 CDEBUG(D_INFO, "we already have lov ea\n");
2405 rc = mdd_lov_set_md(env, mdd_pobj, son,
2406 (struct lov_mds_md *)spec->u.sp_ea.eadata,
2407 spec->u.sp_ea.eadatalen, handle, 0);
2409 rc = mdd_lov_set_md(env, mdd_pobj, son, lmm,
2410 lmm_size, handle, 0);
2412 CERROR("error on stripe info copy %d \n", rc);
2416 if (S_ISLNK(attr->la_mode)) {
2417 struct dt_object *dt = mdd_object_child(son);
2418 const char *target_name = spec->u.sp_symname;
2419 int sym_len = strlen(target_name);
2422 rc = dt->do_body_ops->dbo_write(env, dt,
2423 mdd_buf_get_const(env,
2433 *la_copy = ma->ma_attr;
2434 la_copy->la_valid = LA_CTIME | LA_MTIME;
2435 rc = mdd_attr_set_internal(env, mdd_pobj, la_copy, handle);
2439 /* return attr back */
2440 rc = mdd_attr_get_internal_locked(env, son, ma);
2442 if (rc && created) {
2446 rc2 = __mdd_index_delete(env, mdd_pobj, name,
2447 S_ISDIR(attr->la_mode),
2450 CERROR("error can not cleanup destroy %d\n",
2454 __mdd_ref_del(env, son, handle);
2456 /* finish mdd_lov_create() stuff */
2457 mdd_lov_create_finish(env, mdd, rc);
2459 OBD_FREE(lmm, lmm_size);
2460 mdd_write_unlock(env, mdd_pobj);
2461 mdd_trans_stop(env, mdd, rc, handle);
2465 /* partial operation */
2466 static int mdd_oc_sanity_check(const struct lu_env *env,
2467 struct mdd_object *obj,
2474 if (lu_object_exists(&obj->mod_obj.mo_lu))
2477 switch (ma->ma_attr.la_mode & S_IFMT) {
2494 static int mdd_object_create(const struct lu_env *env,
2495 struct md_object *obj,
2496 const struct md_create_spec *spec,
2500 struct mdd_device *mdd = mdo2mdd(obj);
2501 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2502 struct thandle *handle;
2503 const struct lu_fid *pfid = spec->u.sp_pfid;
2507 rc = mdd_oc_sanity_check(env, mdd_obj, ma);
2511 mdd_txn_param_build(env, MDD_TXN_OBJECT_CREATE_OP);
2512 handle = mdd_trans_start(env, mdd);
2514 RETURN(PTR_ERR(handle));
2516 mdd_write_lock(env, mdd_obj);
2517 rc = __mdd_object_create(env, mdd_obj, ma, handle);
2518 if (rc == 0 && spec->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
2519 /* if creating the slave object, set slave EA here */
2520 rc = __mdd_xattr_set(env, mdd_obj,
2521 mdd_buf_get_const(env,
2522 spec->u.sp_ea.eadata,
2523 spec->u.sp_ea.eadatalen),
2524 MDS_LMV_MD_NAME, 0, handle);
2525 pfid = spec->u.sp_ea.fid;
2526 CWARN("set slave ea "DFID" eadatalen %d rc %d \n",
2527 PFID(mdo2fid(mdd_obj)), spec->u.sp_ea.eadatalen, rc);
2531 rc = __mdd_object_initialize(env, pfid, mdd_obj, ma, handle);
2532 mdd_write_unlock(env, mdd_obj);
2535 rc = mdd_attr_get_internal_locked(env, mdd_obj, ma);
2537 mdd_trans_stop(env, mdd, rc, handle);
2542 * Partial operation. Be aware, this is called with write lock taken, so we use
2543 * locksless version of __mdd_lookup() here.
2545 static int mdd_ni_sanity_check(const struct lu_env *env,
2546 struct md_object *pobj,
2548 const struct lu_fid *fid)
2550 struct mdd_object *obj = md2mdd_obj(pobj);
2555 if (mdd_is_dead_obj(obj))
2558 rc = __mdd_lookup(env, pobj, name, fid, MAY_WRITE | MAY_EXEC);
2560 RETURN(rc ? : -EEXIST);
2565 static int mdd_name_insert(const struct lu_env *env,
2566 struct md_object *pobj,
2567 const char *name, const struct lu_fid *fid,
2570 struct mdd_object *mdd_obj = md2mdd_obj(pobj);
2571 struct thandle *handle;
2575 mdd_txn_param_build(env, MDD_TXN_INDEX_INSERT_OP);
2576 handle = mdd_trans_start(env, mdo2mdd(pobj));
2578 RETURN(PTR_ERR(handle));
2580 mdd_write_lock(env, mdd_obj);
2581 rc = mdd_ni_sanity_check(env, pobj, name, fid);
2583 GOTO(out_unlock, rc);
2585 rc = __mdd_index_insert(env, mdd_obj, fid, name, isdir, handle);
2588 mdd_write_unlock(env, mdd_obj);
2590 mdd_trans_stop(env, mdo2mdd(pobj), rc, handle);
2595 * Be aware, this is called with write lock taken, so we use locksless version
2596 * of __mdd_lookup() here.
2598 static int mdd_nr_sanity_check(const struct lu_env *env,
2599 struct md_object *pobj,
2602 struct mdd_thread_info *info = mdd_env_info(env);
2603 struct lu_fid *fid = &info->mti_fid;
2604 struct mdd_object *obj = md2mdd_obj(pobj);
2609 if (mdd_is_dead_obj(obj))
2612 rc = __mdd_lookup(env, pobj, name, fid, MAY_WRITE | MAY_EXEC);
2616 static int mdd_name_remove(const struct lu_env *env,
2617 struct md_object *pobj,
2618 const char *name, int is_dir)
2620 struct mdd_device *mdd = mdo2mdd(pobj);
2621 struct mdd_object *mdd_obj = md2mdd_obj(pobj);
2622 struct thandle *handle;
2626 mdd_txn_param_build(env, MDD_TXN_INDEX_DELETE_OP);
2627 handle = mdd_trans_start(env, mdd);
2629 RETURN(PTR_ERR(handle));
2631 mdd_write_lock(env, mdd_obj);
2632 rc = mdd_nr_sanity_check(env, pobj, name);
2634 GOTO(out_unlock, rc);
2636 rc = __mdd_index_delete(env, mdd_obj, name, is_dir, handle);
2639 mdd_write_unlock(env, mdd_obj);
2641 mdd_trans_stop(env, mdd, rc, handle);
2645 static int mdd_rt_sanity_check(const struct lu_env *env,
2646 struct mdd_object *tgt_pobj,
2647 struct mdd_object *tobj,
2648 const struct lu_fid *sfid,
2649 const char *name, struct md_attr *ma)
2651 struct mdd_device *mdd = mdo2mdd(&tgt_pobj->mod_obj);
2656 if (mdd_is_dead_obj(tgt_pobj))
2659 src_is_dir = S_ISDIR(ma->ma_attr.la_mode);
2661 rc = mdd_may_delete(env, tgt_pobj, tobj, src_is_dir, 1);
2662 if (!rc && S_ISDIR(mdd_object_type(tobj)) &&
2663 mdd_dir_is_empty(env, tobj))
2666 rc = mdd_may_create(env, tgt_pobj, NULL, 1);
2669 /* source should not be ancestor of target dir */
2670 if (!rc &&& src_is_dir && mdd_is_parent(env, mdd, tgt_pobj, sfid, NULL))
2676 static int mdd_rename_tgt(const struct lu_env *env,
2677 struct md_object *pobj, struct md_object *tobj,
2678 const struct lu_fid *lf, const char *name,
2681 struct mdd_device *mdd = mdo2mdd(pobj);
2682 struct mdd_object *mdd_tpobj = md2mdd_obj(pobj);
2683 struct mdd_object *mdd_tobj = md2mdd_obj(tobj);
2684 struct thandle *handle;
2688 mdd_txn_param_build(env, MDD_TXN_RENAME_TGT_OP);
2689 handle = mdd_trans_start(env, mdd);
2691 RETURN(PTR_ERR(handle));
2694 mdd_lock2(env, mdd_tpobj, mdd_tobj);
2696 mdd_write_lock(env, mdd_tpobj);
2698 /*TODO rename sanity checking*/
2699 rc = mdd_rt_sanity_check(env, mdd_tpobj, mdd_tobj, lf, name, ma);
2703 /* if rename_tgt is called then we should just re-insert name with
2704 * correct fid, no need to dec/inc parent nlink if obj is dir */
2705 rc = __mdd_index_delete(env, mdd_tpobj, name, 0, handle);
2709 rc = __mdd_index_insert_only(env, mdd_tpobj, lf, name, handle);
2713 if (tobj && lu_object_exists(&tobj->mo_lu))
2714 __mdd_ref_del(env, mdd_tobj, handle);
2717 mdd_unlock2(env, mdd_tpobj, mdd_tobj);
2719 mdd_write_unlock(env, mdd_tpobj);
2720 mdd_trans_stop(env, mdd, rc, handle);
2725 * No permission check is needed.
2727 static int mdd_root_get(const struct lu_env *env,
2728 struct md_device *m, struct lu_fid *f)
2730 struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
2733 *f = mdd->mdd_root_fid;
2738 * No permission check is needed.
2740 static int mdd_statfs(const struct lu_env *env, struct md_device *m,
2741 struct kstatfs *sfs)
2743 struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
2748 rc = mdd_child_ops(mdd)->dt_statfs(env, mdd->mdd_child, sfs);
2754 * No permission check is needed.
2756 static int mdd_maxsize_get(const struct lu_env *env, struct md_device *m,
2757 int *md_size, int *cookie_size)
2759 struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
2762 *md_size = mdd_lov_mdsize(env, mdd);
2763 *cookie_size = mdd_lov_cookiesize(env, mdd);
2768 static int mdd_init_capa_keys(struct md_device *m,
2769 struct lustre_capa_key *keys)
2771 struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
2772 struct mds_obd *mds = &mdd2obd_dev(mdd)->u.mds;
2775 mds->mds_capa_keys = keys;
2779 static int mdd_update_capa_key(const struct lu_env *env,
2780 struct md_device *m,
2781 struct lustre_capa_key *key)
2783 struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
2784 struct obd_export *lov_exp = mdd2obd_dev(mdd)->u.mds.mds_osc_exp;
2788 rc = obd_set_info_async(lov_exp, strlen(KEY_CAPA_KEY), KEY_CAPA_KEY,
2789 sizeof(*key), key, NULL);
2793 static void __mdd_ref_add(const struct lu_env *env, struct mdd_object *obj,
2794 struct thandle *handle)
2796 struct dt_object *next;
2798 LASSERT(lu_object_exists(mdd2lu_obj(obj)));
2799 next = mdd_object_child(obj);
2800 next->do_ops->do_ref_add(env, next, handle);
2804 * XXX: if permission check is needed here?
2806 static int mdd_ref_add(const struct lu_env *env,
2807 struct md_object *obj)
2809 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2810 struct mdd_device *mdd = mdo2mdd(obj);
2811 struct thandle *handle;
2814 mdd_txn_param_build(env, MDD_TXN_XATTR_SET_OP);
2815 handle = mdd_trans_start(env, mdd);
2819 mdd_write_lock(env, mdd_obj);
2820 __mdd_ref_add(env, mdd_obj, handle);
2821 mdd_write_unlock(env, mdd_obj);
2823 mdd_trans_stop(env, mdd, 0, handle);
2829 __mdd_ref_del(const struct lu_env *env, struct mdd_object *obj,
2830 struct thandle *handle)
2832 struct dt_object *next = mdd_object_child(obj);
2834 LASSERT(lu_object_exists(mdd2lu_obj(obj)));
2836 next->do_ops->do_ref_del(env, next, handle);
2839 /* do NOT or the MAY_*'s, you'll get the weakest */
2840 static int accmode(struct mdd_object *mdd_obj, int flags)
2845 /* Sadly, NFSD reopens a file repeatedly during operation, so the
2846 * "acc_mode = 0" allowance for newly-created files isn't honoured.
2847 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
2848 * owner can write to a file even if it is marked readonly to hide
2849 * its brokenness. (bug 5781) */
2850 if (flags & MDS_OPEN_OWNEROVERRIDE && inode->i_uid == current->fsuid)
2853 if (flags & FMODE_READ)
2855 if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
2857 if (flags & MDS_FMODE_EXEC)
2862 static int mdd_open_sanity_check(const struct lu_env *env,
2863 struct mdd_object *obj, int flag)
2865 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
2866 int mode = accmode(obj, flag);
2871 if (mdd_is_dead_obj(obj))
2874 rc = __mdd_la_get(env, obj, tmp_la);
2878 if (S_ISLNK(tmp_la->la_mode))
2881 if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
2884 if (!(flag & MDS_OPEN_CREATED)) {
2885 rc = __mdd_permission_internal(env, obj, mode, 0);
2891 * FIFO's, sockets and device files are special: they don't
2892 * actually live on the filesystem itself, and as such you
2893 * can write to them even if the filesystem is read-only.
2895 if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
2896 S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
2900 * An append-only file must be opened in append mode for writing.
2902 if (mdd_is_append(obj)) {
2903 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
2909 /* O_NOATIME can only be set by the owner or superuser */
2910 if (flag & O_NOATIME) {
2911 struct md_ucred *uc = md_ucred(env);
2913 if (uc->mu_fsuid != tmp_la->la_uid &&
2914 !mdd_capable(uc, CAP_FOWNER))
2921 static int mdd_open(const struct lu_env *env, struct md_object *obj,
2924 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2927 mdd_write_lock(env, mdd_obj);
2929 rc = mdd_open_sanity_check(env, mdd_obj, flags);
2931 mdd_obj->mod_count ++;
2933 mdd_write_unlock(env, mdd_obj);
2938 * No permission check is needed.
2940 static int mdd_close(const struct lu_env *env, struct md_object *obj,
2944 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2947 mdd_write_lock(env, mdd_obj);
2948 /* release open count */
2949 mdd_obj->mod_count --;
2951 rc = __mdd_iattr_get(env, mdd_obj, ma);
2952 if (rc == 0 && mdd_obj->mod_count == 0) {
2953 if (ma->ma_attr.la_nlink == 0)
2954 rc = __mdd_object_kill(env, mdd_obj, ma);
2956 mdd_write_unlock(env, mdd_obj);
2960 static int mdd_readpage_sanity_check(const struct lu_env *env,
2961 struct mdd_object *obj)
2963 struct dt_object *next = mdd_object_child(obj);
2967 if (S_ISDIR(mdd_object_type(obj)) &&
2968 dt_try_as_dir(env, next))
2969 rc = mdd_permission_internal(env, obj, MAY_READ);
2976 static int mdd_readpage(const struct lu_env *env, struct md_object *obj,
2977 const struct lu_rdpg *rdpg)
2979 struct dt_object *next;
2980 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2984 LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj)));
2985 next = mdd_object_child(mdd_obj);
2987 mdd_read_lock(env, mdd_obj);
2988 rc = mdd_readpage_sanity_check(env, mdd_obj);
2990 GOTO(out_unlock, rc);
2992 rc = next->do_ops->do_readpage(env, next, rdpg);
2995 mdd_read_unlock(env, mdd_obj);
2999 #ifdef CONFIG_FS_POSIX_ACL
3000 #include <linux/posix_acl_xattr.h>
3001 #include <linux/posix_acl.h>
3003 static int mdd_posix_acl_permission(struct md_ucred *uc, struct lu_attr *la,
3004 int want, posix_acl_xattr_entry *entry,
3007 posix_acl_xattr_entry *pa, *pe, *mask_obj;
3015 pe = &entry[count - 1];
3016 for (; pa <= pe; pa++) {
3019 /* (May have been checked already) */
3020 if (la->la_uid == uc->mu_fsuid)
3024 if (pa->e_id == uc->mu_fsuid)
3028 if (mdd_in_group_p(uc, la->la_gid)) {
3030 if ((pa->e_perm & want) == want)
3035 if (mdd_in_group_p(uc, pa->e_id)) {
3037 if ((pa->e_perm & want) == want)
3055 for (mask_obj = pa + 1; mask_obj <= pe; mask_obj++) {
3056 if (mask_obj->e_tag == ACL_MASK) {
3057 if ((pa->e_perm & mask_obj->e_perm & want) == want)
3065 if ((pa->e_perm & want) == want)
3072 static int mdd_check_acl(const struct lu_env *env, struct mdd_object *obj,
3073 struct lu_attr* la, int mask)
3075 #ifdef CONFIG_FS_POSIX_ACL
3076 struct dt_object *next;
3077 struct lu_buf *buf = &mdd_env_info(env)->mti_buf;
3078 struct md_ucred *uc = md_ucred(env);
3079 posix_acl_xattr_entry *entry;
3084 next = mdd_object_child(obj);
3086 buf->lb_buf = mdd_env_info(env)->mti_xattr_buf;
3087 buf->lb_len = sizeof(mdd_env_info(env)->mti_xattr_buf);
3088 rc = next->do_ops->do_xattr_get(env, next, buf,
3089 XATTR_NAME_ACL_ACCESS);
3091 RETURN(rc ? : -EACCES);
3093 entry = ((posix_acl_xattr_header *)(buf->lb_buf))->a_entries;
3094 entry_count = (rc - 4) / sizeof(posix_acl_xattr_entry);
3096 rc = mdd_posix_acl_permission(uc, la, mask, entry, entry_count);
3104 static int mdd_exec_permission_lite(const struct lu_env *env,
3105 struct mdd_object *obj)
3107 struct lu_attr *la = &mdd_env_info(env)->mti_la;
3108 struct md_ucred *uc = md_ucred(env);
3113 /* These means unnecessary for permission check */
3114 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT))
3117 /* Invalid user credit */
3118 if (uc->mu_valid == UCRED_INVALID)
3121 rc = __mdd_la_get(env, obj, la);
3126 if (uc->mu_fsuid == la->la_uid)
3128 else if (mdd_in_group_p(uc, la->la_gid))
3131 if (mode & MAY_EXEC)
3134 if (((la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode)) &&
3135 mdd_capable(uc, CAP_DAC_OVERRIDE))
3138 if (S_ISDIR(la->la_mode) && mdd_capable(uc, CAP_DAC_READ_SEARCH))
3144 static int __mdd_permission_internal(const struct lu_env *env,
3145 struct mdd_object *obj,
3146 int mask, int getattr)
3148 struct lu_attr *la = &mdd_env_info(env)->mti_la;
3149 struct md_ucred *uc = md_ucred(env);
3158 /* These means unnecessary for permission check */
3159 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT))
3162 /* Invalid user credit */
3163 if (uc->mu_valid == UCRED_INVALID)
3167 * Nobody gets write access to an immutable file.
3169 if ((mask & MAY_WRITE) && mdd_is_immutable(obj))
3173 rc = __mdd_la_get(env, obj, la);
3179 if (uc->mu_fsuid == la->la_uid) {
3182 if (mode & S_IRWXG) {
3183 if (((mode >> 3) & mask & S_IRWXO) != mask)
3186 rc = mdd_check_acl(env, obj, la, mask);
3188 goto check_capabilities;
3189 else if ((rc != -EAGAIN) && (rc != -EOPNOTSUPP))
3194 if (mdd_in_group_p(uc, la->la_gid))
3199 * If the DACs are ok we don't need any capability check.
3201 if (((mode & mask & S_IRWXO) == mask))
3207 * Read/write DACs are always overridable.
3208 * Executable DACs are overridable if at least one exec bit is set.
3209 * Dir's DACs are always overridable.
3211 if (!(mask & MAY_EXEC) ||
3212 (la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode))
3213 if (mdd_capable(uc, CAP_DAC_OVERRIDE))
3217 * Searching includes executable on directories, else just read.
3219 if ((mask == MAY_READ) ||
3220 (S_ISDIR(la->la_mode) && !(mask & MAY_WRITE)))
3221 if (mdd_capable(uc, CAP_DAC_READ_SEARCH))
3227 static inline int mdd_permission_internal_locked(const struct lu_env *env,
3228 struct mdd_object *obj,
3233 mdd_read_lock(env, obj);
3234 rc = mdd_permission_internal(env, obj, mask);
3235 mdd_read_unlock(env, obj);
3240 static int mdd_permission(const struct lu_env *env, struct md_object *obj,
3243 struct mdd_object *mdd_obj = md2mdd_obj(obj);
3247 rc = mdd_permission_internal_locked(env, mdd_obj, mask);
3252 static int mdd_capa_get(const struct lu_env *env, struct md_object *obj,
3253 struct lustre_capa *capa)
3255 struct mdd_object *mdd_obj = md2mdd_obj(obj);
3256 struct mdd_device *mdd = mdo2mdd(obj);
3257 struct lu_site *ls = mdd->mdd_md_dev.md_lu_dev.ld_site;
3258 struct lustre_capa_key *key = &ls->ls_capa_keys[1];
3259 struct obd_capa *ocapa;
3263 LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj)));
3265 capa->lc_fid = *mdo2fid(mdd_obj);
3266 if (ls->ls_capa_timeout < CAPA_TIMEOUT)
3267 capa->lc_flags |= CAPA_FL_SHORT_EXPIRY;
3268 if (lu_fid_eq(&capa->lc_fid, &mdd->mdd_root_fid))
3269 capa->lc_flags |= CAPA_FL_ROOT;
3270 capa->lc_flags = ls->ls_capa_alg << 24;
3272 /* TODO: get right permission here after remote uid landing */
3273 ocapa = capa_lookup(capa);
3275 LASSERT(!capa_is_expired(ocapa));
3276 capa_cpy(capa, ocapa);
3281 capa->lc_keyid = key->lk_keyid;
3282 capa->lc_expiry = CURRENT_SECONDS + ls->ls_capa_timeout;
3283 rc = capa_hmac(capa->lc_hmac, capa, key->lk_key);
3291 struct md_device_operations mdd_ops = {
3292 .mdo_statfs = mdd_statfs,
3293 .mdo_root_get = mdd_root_get,
3294 .mdo_maxsize_get = mdd_maxsize_get,
3295 .mdo_init_capa_keys = mdd_init_capa_keys,
3296 .mdo_update_capa_key= mdd_update_capa_key,
3299 static struct md_dir_operations mdd_dir_ops = {
3300 .mdo_is_subdir = mdd_is_subdir,
3301 .mdo_lookup = mdd_lookup,
3302 .mdo_create = mdd_create,
3303 .mdo_rename = mdd_rename,
3304 .mdo_link = mdd_link,
3305 .mdo_unlink = mdd_unlink,
3306 .mdo_name_insert = mdd_name_insert,
3307 .mdo_name_remove = mdd_name_remove,
3308 .mdo_rename_tgt = mdd_rename_tgt,
3309 .mdo_create_data = mdd_create_data
3312 static struct md_object_operations mdd_obj_ops = {
3313 .moo_permission = mdd_permission,
3314 .moo_attr_get = mdd_attr_get,
3315 .moo_attr_set = mdd_attr_set,
3316 .moo_xattr_get = mdd_xattr_get,
3317 .moo_xattr_set = mdd_xattr_set,
3318 .moo_xattr_list = mdd_xattr_list,
3319 .moo_xattr_del = mdd_xattr_del,
3320 .moo_object_create = mdd_object_create,
3321 .moo_ref_add = mdd_ref_add,
3322 .moo_ref_del = mdd_ref_del,
3323 .moo_open = mdd_open,
3324 .moo_close = mdd_close,
3325 .moo_readpage = mdd_readpage,
3326 .moo_readlink = mdd_readlink,
3327 .moo_capa_get = mdd_capa_get
3330 static struct obd_ops mdd_obd_device_ops = {
3331 .o_owner = THIS_MODULE
3334 static struct lu_device *mdd_device_alloc(const struct lu_env *env,
3335 struct lu_device_type *t,
3336 struct lustre_cfg *lcfg)
3338 struct lu_device *l;
3339 struct mdd_device *m;
3343 l = ERR_PTR(-ENOMEM);
3345 md_device_init(&m->mdd_md_dev, t);
3347 l->ld_ops = &mdd_lu_ops;
3348 m->mdd_md_dev.md_ops = &mdd_ops;
3354 static void mdd_device_free(const struct lu_env *env,
3355 struct lu_device *lu)
3357 struct mdd_device *m = lu2mdd_dev(lu);
3359 LASSERT(atomic_read(&lu->ld_ref) == 0);
3360 md_device_fini(&m->mdd_md_dev);
3364 static void *mdd_ucred_key_init(const struct lu_context *ctx,
3365 struct lu_context_key *key)
3367 struct md_ucred *uc;
3371 uc = ERR_PTR(-ENOMEM);
3375 static void mdd_ucred_key_fini(const struct lu_context *ctx,
3376 struct lu_context_key *key, void *data)
3378 struct md_ucred *uc = data;
3382 static struct lu_context_key mdd_ucred_key = {
3383 .lct_tags = LCT_SESSION,
3384 .lct_init = mdd_ucred_key_init,
3385 .lct_fini = mdd_ucred_key_fini
3388 struct md_ucred *md_ucred(const struct lu_env *env)
3390 LASSERT(env->le_ses != NULL);
3391 return lu_context_key_get(env->le_ses, &mdd_ucred_key);
3393 EXPORT_SYMBOL(md_ucred);
3395 static int mdd_type_init(struct lu_device_type *t)
3399 result = lu_context_key_register(&mdd_thread_key);
3401 result = lu_context_key_register(&mdd_ucred_key);
3405 static void mdd_type_fini(struct lu_device_type *t)
3407 lu_context_key_degister(&mdd_ucred_key);
3408 lu_context_key_degister(&mdd_thread_key);
3411 static struct lu_device_type_operations mdd_device_type_ops = {
3412 .ldto_init = mdd_type_init,
3413 .ldto_fini = mdd_type_fini,
3415 .ldto_device_alloc = mdd_device_alloc,
3416 .ldto_device_free = mdd_device_free,
3418 .ldto_device_init = mdd_device_init,
3419 .ldto_device_fini = mdd_device_fini
3422 static struct lu_device_type mdd_device_type = {
3423 .ldt_tags = LU_DEVICE_MD,
3424 .ldt_name = LUSTRE_MDD_NAME,
3425 .ldt_ops = &mdd_device_type_ops,
3426 .ldt_ctx_tags = LCT_MD_THREAD
3429 static void *mdd_key_init(const struct lu_context *ctx,
3430 struct lu_context_key *key)
3432 struct mdd_thread_info *info;
3434 OBD_ALLOC_PTR(info);
3436 info = ERR_PTR(-ENOMEM);
3440 static void mdd_key_fini(const struct lu_context *ctx,
3441 struct lu_context_key *key, void *data)
3443 struct mdd_thread_info *info = data;
3447 static struct lu_context_key mdd_thread_key = {
3448 .lct_tags = LCT_MD_THREAD,
3449 .lct_init = mdd_key_init,
3450 .lct_fini = mdd_key_fini
3453 struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
3457 struct lprocfs_vars lprocfs_mdd_module_vars[] = {
3461 LPROCFS_INIT_VARS(mdd, lprocfs_mdd_module_vars, lprocfs_mdd_obd_vars);
3463 static int __init mdd_mod_init(void)
3465 struct lprocfs_static_vars lvars;
3466 printk(KERN_INFO "Lustre: MetaData Device; info@clusterfs.com\n");
3467 lprocfs_init_vars(mdd, &lvars);
3468 return class_register_type(&mdd_obd_device_ops, NULL, lvars.module_vars,
3469 LUSTRE_MDD_NAME, &mdd_device_type);
3472 static void __exit mdd_mod_exit(void)
3474 class_unregister_type(LUSTRE_MDD_NAME);
3477 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
3478 MODULE_DESCRIPTION("Lustre Meta-data Device Prototype ("LUSTRE_MDD_NAME")");
3479 MODULE_LICENSE("GPL");
3481 cfs_module(mdd, "0.1.0", mdd_mod_init, mdd_mod_exit);