1 /* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
5 * Lustre Metadata Server (mdd) routines
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: fangyong@clusterfs.com
10 * This file is part of the Lustre file system, http://www.lustre.org
11 * Lustre is a trademark of Cluster File Systems, Inc.
13 * You may have signed or agreed to another license before downloading
14 * this software. If so, you are bound by the terms and conditions
15 * of that agreement, and the following does not apply to you. See the
16 * LICENSE file included with this distribution for more information.
18 * If you did not agree to a different license, then this copy of Lustre
19 * is open source software; you can redistribute it and/or modify it
20 * under the terms of version 2 of the GNU General Public License as
21 * published by the Free Software Foundation.
23 * In either case, Lustre is distributed in the hope that it will be
24 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
25 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * license text for more details.
29 # define EXPORT_SYMTAB
31 #define DEBUG_SUBSYSTEM S_MDS
33 #include <linux/module.h>
34 #include <linux/jbd.h>
36 #include <obd_class.h>
37 #include <lustre_ver.h>
38 #include <obd_support.h>
39 #include <lprocfs_status.h>
41 #include <linux/ldiskfs_fs.h>
42 #include <lustre_mds.h>
43 #include <lustre/lustre_idl.h>
45 #ifdef CONFIG_FS_POSIX_ACL
46 # include <linux/posix_acl_xattr.h>
47 # include <linux/posix_acl.h>
50 #include "mdd_internal.h"
52 #define mdd_get_group_info(group_info) do { \
53 atomic_inc(&(group_info)->usage); \
56 #define mdd_put_group_info(group_info) do { \
57 if (atomic_dec_and_test(&(group_info)->usage)) \
58 groups_free(group_info); \
61 #define MDD_NGROUPS_PER_BLOCK ((int)(CFS_PAGE_SIZE / sizeof(gid_t)))
63 #define MDD_GROUP_AT(gi, i) \
64 ((gi)->blocks[(i) / MDD_NGROUPS_PER_BLOCK][(i) % MDD_NGROUPS_PER_BLOCK])
67 * groups_search() is copied from linux kernel!
70 static int mdd_groups_search(struct group_info *group_info, gid_t grp)
78 right = group_info->ngroups;
79 while (left < right) {
80 int mid = (left + right) / 2;
81 int cmp = grp - MDD_GROUP_AT(group_info, mid);
93 int mdd_in_group_p(struct md_ucred *uc, gid_t grp)
97 if (grp != uc->mu_fsgid) {
98 struct group_info *group_info = NULL;
100 if (uc->mu_ginfo || !uc->mu_identity ||
101 uc->mu_valid == UCRED_OLD)
102 if (grp == uc->mu_suppgids[0] ||
103 grp == uc->mu_suppgids[1])
107 group_info = uc->mu_ginfo;
108 else if (uc->mu_identity)
109 group_info = uc->mu_identity->mi_ginfo;
114 mdd_get_group_info(group_info);
115 rc = mdd_groups_search(group_info, grp);
116 mdd_put_group_info(group_info);
121 #ifdef CONFIG_FS_POSIX_ACL
122 static inline void mdd_acl_le_to_cpu(posix_acl_xattr_entry *p)
124 p->e_tag = le16_to_cpu(p->e_tag);
125 p->e_perm = le16_to_cpu(p->e_perm);
126 p->e_id = le32_to_cpu(p->e_id);
129 static inline void mdd_acl_cpu_to_le(posix_acl_xattr_entry *p)
131 p->e_tag = cpu_to_le16(p->e_tag);
132 p->e_perm = cpu_to_le16(p->e_perm);
133 p->e_id = cpu_to_le32(p->e_id);
137 * Check permission based on POSIX ACL.
139 static int mdd_posix_acl_permission(struct md_ucred *uc, struct lu_attr *la,
140 int want, posix_acl_xattr_entry *entry,
143 posix_acl_xattr_entry *pa, *pe, *mask_obj;
150 for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) {
151 mdd_acl_le_to_cpu(pa);
154 /* (May have been checked already) */
155 if (la->la_uid == uc->mu_fsuid)
159 if (pa->e_id == uc->mu_fsuid)
163 if (mdd_in_group_p(uc, la->la_gid)) {
165 if ((pa->e_perm & want) == want)
170 if (mdd_in_group_p(uc, pa->e_id)) {
172 if ((pa->e_perm & want) == want)
190 for (mask_obj = pa + 1; mask_obj <= pe; mask_obj++) {
191 mdd_acl_le_to_cpu(mask_obj);
192 if (mask_obj->e_tag == ACL_MASK) {
193 if ((pa->e_perm & mask_obj->e_perm & want) == want)
201 if ((pa->e_perm & want) == want)
208 * Get default acl EA only.
209 * Hold read_lock for mdd_obj.
211 int mdd_acl_def_get(const struct lu_env *env, struct mdd_object *mdd_obj,
218 if (ma->ma_valid & MA_ACL_DEF)
221 buf = mdd_buf_get(env, ma->ma_acl, ma->ma_acl_size);
222 rc = mdo_xattr_get(env, mdd_obj, buf, XATTR_NAME_ACL_DEFAULT,
225 ma->ma_acl_size = rc;
226 ma->ma_valid |= MA_ACL_DEF;
228 } else if ((rc == -EOPNOTSUPP) || (rc == -ENODATA)) {
235 * Modify the ACL for the chmod.
237 static int mdd_posix_acl_chmod_masq(posix_acl_xattr_entry *entry,
238 __u32 mode, int count)
240 posix_acl_xattr_entry *group_obj = NULL, *mask_obj = NULL, *pa, *pe;
242 for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) {
243 mdd_acl_le_to_cpu(pa);
246 pa->e_perm = (mode & S_IRWXU) >> 6;
262 pa->e_perm = (mode & S_IRWXO);
268 mdd_acl_cpu_to_le(pa);
272 mask_obj->e_perm = cpu_to_le16((mode & S_IRWXG) >> 3);
276 group_obj->e_perm = cpu_to_le16((mode & S_IRWXG) >> 3);
283 * Hold write_lock for o.
285 int mdd_acl_chmod(const struct lu_env *env, struct mdd_object *o, __u32 mode,
286 struct thandle *handle)
289 posix_acl_xattr_header *head;
290 posix_acl_xattr_entry *entry;
296 buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf,
297 sizeof(mdd_env_info(env)->mti_xattr_buf));
299 rc = mdo_xattr_get(env, o, buf, XATTR_NAME_ACL_ACCESS, BYPASS_CAPA);
300 if ((rc == -EOPNOTSUPP) || (rc == -ENODATA))
306 head = (posix_acl_xattr_header *)(buf->lb_buf);
307 entry = head->a_entries;
308 entry_count = (buf->lb_len - sizeof(head->a_version)) /
309 sizeof(posix_acl_xattr_entry);
310 if (entry_count <= 0)
313 rc = mdd_posix_acl_chmod_masq(entry, mode, entry_count);
317 rc = mdo_xattr_set(env, o, buf, XATTR_NAME_ACL_ACCESS,
318 0, handle, BYPASS_CAPA);
323 * Modify acl when creating a new obj.
325 static int mdd_posix_acl_create_masq(posix_acl_xattr_entry *entry,
326 __u32 *mode_p, int count)
328 posix_acl_xattr_entry *group_obj = NULL, *mask_obj = NULL, *pa, *pe;
329 __u32 mode = *mode_p;
332 for (pa = &entry[0], pe = &entry[count - 1]; pa <= pe; pa++) {
333 mdd_acl_le_to_cpu(pa);
336 pa->e_perm &= (mode >> 6) | ~S_IRWXO;
337 mode &= (pa->e_perm << 6) | ~S_IRWXU;
350 pa->e_perm &= mode | ~S_IRWXO;
351 mode &= pa->e_perm | ~S_IRWXO;
362 mdd_acl_cpu_to_le(pa);
366 mask_obj->e_perm = le16_to_cpu(mask_obj->e_perm) &
367 ((mode >> 3) | ~S_IRWXO);
368 mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
369 mask_obj->e_perm = cpu_to_le16(mask_obj->e_perm);
373 group_obj->e_perm = le16_to_cpu(group_obj->e_perm) &
374 ((mode >> 3) | ~S_IRWXO);
375 mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
376 group_obj->e_perm = cpu_to_le16(group_obj->e_perm);
379 *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
384 * Hold write_lock for obj.
386 int __mdd_acl_init(const struct lu_env *env, struct mdd_object *obj,
387 struct lu_buf *buf, __u32 *mode, struct thandle *handle)
389 posix_acl_xattr_header *head;
390 posix_acl_xattr_entry *entry;
396 head = (posix_acl_xattr_header *)(buf->lb_buf);
397 entry = head->a_entries;
398 entry_count = (buf->lb_len - sizeof(head->a_version)) /
399 sizeof(posix_acl_xattr_entry);
400 if (entry_count <= 0)
403 if (S_ISDIR(*mode)) {
404 rc = mdo_xattr_set(env, obj, buf, XATTR_NAME_ACL_DEFAULT, 0,
405 handle, BYPASS_CAPA);
410 rc = mdd_posix_acl_create_masq(entry, mode, entry_count);
414 rc = mdo_xattr_set(env, obj, buf, XATTR_NAME_ACL_ACCESS, 0, handle,
420 * Hold read_lock for pobj.
421 * Hold write_lock for cobj.
423 int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj,
424 struct mdd_object *cobj, __u32 *mode, struct thandle *handle)
433 buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf,
434 sizeof(mdd_env_info(env)->mti_xattr_buf));
435 rc = mdo_xattr_get(env, pobj, buf, XATTR_NAME_ACL_DEFAULT, BYPASS_CAPA);
436 if ((rc == -EOPNOTSUPP) || (rc == -ENODATA))
442 rc = __mdd_acl_init(env, cobj, buf, mode, handle);
448 * Hold read_lock for obj.
450 static int mdd_check_acl(const struct lu_env *env, struct mdd_object *obj,
451 struct lu_attr *la, int mask)
453 #ifdef CONFIG_FS_POSIX_ACL
454 struct md_ucred *uc = md_ucred(env);
455 posix_acl_xattr_header *head;
456 posix_acl_xattr_entry *entry;
462 buf = mdd_buf_get(env, mdd_env_info(env)->mti_xattr_buf,
463 sizeof(mdd_env_info(env)->mti_xattr_buf));
464 rc = mdo_xattr_get(env, obj, buf, XATTR_NAME_ACL_ACCESS,
465 mdd_object_capa(env, obj));
467 RETURN(rc ? : -EACCES);
470 head = (posix_acl_xattr_header *)(buf->lb_buf);
471 entry = head->a_entries;
472 entry_count = (buf->lb_len - sizeof(head->a_version)) /
473 sizeof(posix_acl_xattr_entry);
475 rc = mdd_posix_acl_permission(uc, la, mask, entry, entry_count);
483 int __mdd_permission_internal(const struct lu_env *env, struct mdd_object *obj,
484 struct lu_attr *la, int mask, int needlock)
486 struct md_ucred *uc = md_ucred(env);
494 /* These means unnecessary for permission check */
495 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT))
498 /* Invalid user credit */
499 if (uc->mu_valid == UCRED_INVALID)
503 * Nobody gets write access to an immutable file.
505 if ((mask & MAY_WRITE) && mdd_is_immutable(obj))
509 la = &mdd_env_info(env)->mti_la;
510 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
516 if (uc->mu_fsuid == la->la_uid) {
519 if (mode & S_IRWXG) {
521 mdd_read_lock(env, obj);
522 rc = mdd_check_acl(env, obj, la, mask);
524 mdd_read_unlock(env, obj);
526 goto check_capabilities;
527 else if ((rc != -EAGAIN) && (rc != -EOPNOTSUPP) &&
531 if (mdd_in_group_p(uc, la->la_gid))
535 if (((mode & mask & S_IRWXO) == mask))
539 if (!(mask & MAY_EXEC) ||
540 (la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode))
541 if (mdd_capable(uc, CAP_DAC_OVERRIDE))
544 if ((mask == MAY_READ) ||
545 (S_ISDIR(la->la_mode) && !(mask & MAY_WRITE)))
546 if (mdd_capable(uc, CAP_DAC_READ_SEARCH))
552 int mdd_permission(const struct lu_env *env,
553 struct md_object *pobj, struct md_object *cobj,
554 struct md_attr *ma, int mask)
556 struct mdd_object *mdd_pobj, *mdd_cobj;
557 struct lu_attr *la = NULL;
558 int check_create, check_link;
560 int check_rename_src, check_rename_tar;
561 int check_vtx_part, check_vtx_full;
566 mdd_cobj = md2mdd_obj(cobj);
568 /* For cross_open case, the "mask" is open flags,
569 * so convert it to permission mask first.
570 * XXX: MDS_OPEN_CROSS must be NOT equal to permission mask MAY_*. */
571 if (unlikely(mask & MDS_OPEN_CROSS)) {
572 la = &mdd_env_info(env)->mti_la;
573 rc = mdd_la_get(env, mdd_cobj, la, BYPASS_CAPA);
577 mask = accmode(env, la, mask & ~MDS_OPEN_CROSS);
580 check_create = mask & MAY_CREATE;
581 check_link = mask & MAY_LINK;
582 check_unlink = mask & MAY_UNLINK;
583 check_rename_src = mask & MAY_RENAME_SRC;
584 check_rename_tar = mask & MAY_RENAME_TAR;
585 check_vtx_part = mask & MAY_VTX_PART;
586 check_vtx_full = mask & MAY_VTX_FULL;
588 mask &= ~(MAY_CREATE | MAY_LINK |
590 MAY_RENAME_SRC | MAY_RENAME_TAR |
591 MAY_VTX_PART | MAY_VTX_FULL);
593 rc = mdd_permission_internal_locked(env, mdd_cobj, NULL, mask);
595 if (!rc && (check_create || check_link))
596 rc = mdd_may_create(env, mdd_cobj, NULL, 1, check_link);
598 if (!rc && check_unlink) {
600 rc = mdd_may_unlink(env, mdd_cobj, ma);
603 if (!rc && (check_rename_src || check_rename_tar)) {
606 mdd_pobj = md2mdd_obj(pobj);
607 rc = mdd_may_delete(env, mdd_pobj, mdd_cobj, ma, 1,
611 if (!rc && (check_vtx_part || check_vtx_full)) {
612 struct md_ucred *uc = md_ucred(env);
616 la = &mdd_env_info(env)->mti_la;
617 rc = mdd_la_get(env, mdd_cobj, la, BYPASS_CAPA);
622 if (!(la->la_mode & S_ISVTX) || (la->la_uid == uc->mu_fsuid) ||
623 (check_vtx_full && (ma->ma_attr.la_valid & LA_UID) &&
624 (ma->ma_attr.la_uid == uc->mu_fsuid))) {
625 ma->ma_attr_flags |= MDS_VTX_BYPASS;
627 ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
636 int mdd_capa_get(const struct lu_env *env, struct md_object *obj,
637 struct lustre_capa *capa, int renewal)
639 struct mdd_object *mdd_obj = md2mdd_obj(obj);
644 oc = mdo_capa_get(env, mdd_obj, renewal ? capa : NULL, capa->lc_uid,