From a514419ef0b421a2a6ca07e490567c839b19d21c Mon Sep 17 00:00:00 2001 From: fanyong Date: Sun, 24 Sep 2006 14:11:02 +0000 Subject: [PATCH] (1) Remote/local user (2) Permission check --- lustre/cmm/cmm_device.c | 16 +- lustre/cmm/cmm_internal.h | 3 +- lustre/cmm/cmm_object.c | 226 ++++--- lustre/cmm/cmm_split.c | 57 +- lustre/cmm/mdc_internal.h | 6 +- lustre/cmm/mdc_object.c | 79 ++- lustre/include/linux/lvfs.h | 15 +- lustre/include/lustre/liblustreapi.h | 2 + lustre/include/lustre/lustre_idl.h | 38 +- lustre/include/lustre/lustre_user.h | 49 +- lustre/include/lustre_cfg.h | 5 + lustre/include/lustre_disk.h | 2 + lustre/include/lustre_dlm.h | 1 + lustre/include/lustre_export.h | 11 + lustre/include/lustre_lib.h | 4 + lustre/include/lustre_mdt.h | 22 + lustre/include/lustre_net.h | 2 +- lustre/include/lustre_param.h | 5 + lustre/include/lustre_ucache.h | 75 ++- lustre/include/md_object.h | 425 +++++++++---- lustre/include/obd.h | 19 +- lustre/include/obd_class.h | 13 +- lustre/include/obd_support.h | 7 +- lustre/ldlm/ldlm_lockd.c | 56 ++ lustre/llite/Makefile.in | 2 +- lustre/llite/dir.c | 15 + lustre/llite/file.c | 21 + lustre/llite/llite_internal.h | 31 + lustre/llite/llite_lib.c | 121 +++- lustre/llite/namei.c | 3 + lustre/llite/remote_perm.c | 285 +++++++++ lustre/llite/super.c | 33 + lustre/llite/super25.c | 31 + lustre/lmv/lmv_obd.c | 32 +- lustre/lvfs/lvfs_linux.c | 26 +- lustre/lvfs/upcall_cache.c | 346 +++++------ lustre/mdc/mdc_internal.h | 6 + lustre/mdc/mdc_locks.c | 45 +- lustre/mdc/mdc_request.c | 63 +- lustre/mdd/mdd_handler.c | 1113 +++++++++++++++++++++++++++++----- lustre/mdd/mdd_internal.h | 2 + lustre/mdd/mdd_lov.c | 5 +- lustre/mds/handler.c | 12 +- lustre/mds/lproc_mds.c | 4 + lustre/mds/mds_lib.c | 18 + lustre/mds/mds_reint.c | 6 + lustre/mds/mds_xattr.c | 4 +- lustre/mdt/Makefile.in | 1 + lustre/mdt/mdt_handler.c | 315 ++++++++-- lustre/mdt/mdt_identity.c | 307 ++++++++++ lustre/mdt/mdt_idmap.c | 739 ++++++++++++++++++++++ lustre/mdt/mdt_internal.h | 72 ++- lustre/mdt/mdt_lib.c | 418 +++++++++++++ lustre/mdt/mdt_open.c | 66 +- lustre/mdt/mdt_recovery.c | 15 +- lustre/mdt/mdt_reint.c | 51 +- lustre/mdt/mdt_rmtacl.c | 248 ++++++++ lustre/mdt/mdt_xattr.c | 113 +++- lustre/mgs/mgs_llog.c | 22 + lustre/obdclass/lprocfs_status.c | 5 +- lustre/obdclass/obd_mount.c | 9 + lustre/ptlrpc/gss/sec_gss.c | 2 +- lustre/ptlrpc/pack_generic.c | 18 + lustre/ptlrpc/ptlrpc_module.c | 2 + lustre/tests/cfg/local.sh | 2 + lustre/tests/cfg/lov.sh | 2 + lustre/tests/runas.c | 60 +- lustre/tests/sanity-sec.sh | 285 +++++++++ lustre/utils/Makefile.am | 10 +- lustre/utils/l_facl.c | 242 ++++++++ lustre/utils/l_getidentity.c | 356 +++++++++++ lustre/utils/lfs.c | 122 ++++ lustre/utils/liblustreapi.c | 57 +- 73 files changed, 6094 insertions(+), 807 deletions(-) create mode 100644 lustre/llite/remote_perm.c create mode 100644 lustre/mdt/mdt_identity.c create mode 100644 lustre/mdt/mdt_idmap.c create mode 100644 lustre/mdt/mdt_rmtacl.c create mode 100644 lustre/tests/sanity-sec.sh create mode 100644 lustre/utils/l_facl.c create mode 100644 lustre/utils/l_getidentity.c diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c index 2b6c74e..8a996aa 100644 --- a/lustre/cmm/cmm_device.c +++ b/lustre/cmm/cmm_device.c @@ -51,37 +51,37 @@ static inline int lu_device_is_cmm(struct lu_device *d) return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops); } -static int cmm_root_get(const struct lu_context *ctx, struct md_device *md, - struct lu_fid *fid) +int cmm_root_get(const struct lu_context *ctx, struct md_device *md, + struct lu_fid *fid, struct md_ucred *uc) { struct cmm_device *cmm_dev = md2cmm_dev(md); /* valid only on master MDS */ if (cmm_dev->cmm_local_num == 0) return cmm_child_ops(cmm_dev)->mdo_root_get(ctx, - cmm_dev->cmm_child, fid); + cmm_dev->cmm_child, fid, uc); else return -EINVAL; } static int cmm_statfs(const struct lu_context *ctxt, struct md_device *md, - struct kstatfs *sfs) { + struct kstatfs *sfs, struct md_ucred *uc) { struct cmm_device *cmm_dev = md2cmm_dev(md); int rc; ENTRY; rc = cmm_child_ops(cmm_dev)->mdo_statfs(ctxt, - cmm_dev->cmm_child, sfs); + cmm_dev->cmm_child, sfs, uc); RETURN (rc); } static int cmm_maxsize_get(const struct lu_context *ctxt, struct md_device *md, - int *md_size, int *cookie_size) + int *md_size, int *cookie_size, struct md_ucred *uc) { struct cmm_device *cmm_dev = md2cmm_dev(md); int rc; ENTRY; - rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(ctxt, - cmm_dev->cmm_child, md_size, cookie_size); + rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(ctxt, cmm_dev->cmm_child, + md_size, cookie_size, uc); RETURN(rc); } diff --git a/lustre/cmm/cmm_internal.h b/lustre/cmm/cmm_internal.h index a429981..8e4a15b 100644 --- a/lustre/cmm/cmm_internal.h +++ b/lustre/cmm/cmm_internal.h @@ -127,7 +127,8 @@ int cmm_upcall(const struct lu_context *ctxt, struct md_device *md, enum md_upcall_event ev); #ifdef HAVE_SPLIT_SUPPORT /* cmm_split.c */ -int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo); +int cml_try_to_split(const struct lu_context *ctx, + struct md_object *mo, struct md_ucred *uc); #endif #endif /* __KERNEL__ */ diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 1b88938..77f8115 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -209,122 +209,136 @@ static struct lu_object_operations cml_obj_ops = { static int cml_object_create(const struct lu_context *ctx, struct md_object *mo, const struct md_create_spec *spec, - struct md_attr *attr) + struct md_attr *attr, + struct md_ucred *uc) { int rc; ENTRY; - rc = mo_object_create(ctx, md_object_next(mo), spec, attr); + rc = mo_object_create(ctx, md_object_next(mo), spec, attr, uc); + RETURN(rc); +} + +static int cml_permission(const struct lu_context *ctx, + struct md_object *mo, int mask, struct md_ucred *uc) +{ + int rc; + ENTRY; + rc = mo_permission(ctx, md_object_next(mo), mask, uc); RETURN(rc); } static int cml_attr_get(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *attr) + struct md_attr *attr, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_attr_get(ctx, md_object_next(mo), attr); + rc = mo_attr_get(ctx, md_object_next(mo), attr, uc); RETURN(rc); } static int cml_attr_set(const struct lu_context *ctx, struct md_object *mo, - const struct md_attr *attr) + const struct md_attr *attr, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_attr_set(ctx, md_object_next(mo), attr); + rc = mo_attr_set(ctx, md_object_next(mo), attr, uc); RETURN(rc); } static int cml_xattr_get(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen, const char *name) + void *buf, int buflen, const char *name, + struct md_ucred *uc) { int rc; ENTRY; - rc = mo_xattr_get(ctx, md_object_next(mo), buf, buflen, name); + rc = mo_xattr_get(ctx, md_object_next(mo), buf, buflen, name, uc); RETURN(rc); } static int cml_readlink(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen) + void *buf, int buflen, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_readlink(ctx, md_object_next(mo), buf, buflen); + rc = mo_readlink(ctx, md_object_next(mo), buf, buflen, uc); RETURN(rc); } static int cml_xattr_list(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen) + void *buf, int buflen, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_xattr_list(ctx, md_object_next(mo), buf, buflen); + rc = mo_xattr_list(ctx, md_object_next(mo), buf, buflen, uc); RETURN(rc); } static int cml_xattr_set(const struct lu_context *ctx, struct md_object *mo, - const void *buf, int buflen, const char *name, int fl) + const void *buf, int buflen, + const char *name, int fl, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_xattr_set(ctx, md_object_next(mo), buf, buflen, name, fl); + rc = mo_xattr_set(ctx, md_object_next(mo), buf, buflen, name, fl, uc); RETURN(rc); } static int cml_xattr_del(const struct lu_context *ctx, struct md_object *mo, - const char *name) + const char *name, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_xattr_del(ctx, md_object_next(mo), name); + rc = mo_xattr_del(ctx, md_object_next(mo), name, uc); RETURN(rc); } -static int cml_ref_add(const struct lu_context *ctx, struct md_object *mo) +static int cml_ref_add(const struct lu_context *ctx, struct md_object *mo, + struct md_ucred *uc) { int rc; ENTRY; - rc = mo_ref_add(ctx, md_object_next(mo)); + rc = mo_ref_add(ctx, md_object_next(mo), uc); RETURN(rc); } static int cml_ref_del(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_ref_del(ctx, md_object_next(mo), ma); + rc = mo_ref_del(ctx, md_object_next(mo), ma, uc); RETURN(rc); } static int cml_open(const struct lu_context *ctx, struct md_object *mo, - int flags) + int flags, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_open(ctx, md_object_next(mo), flags); + rc = mo_open(ctx, md_object_next(mo), flags, uc); RETURN(rc); } static int cml_close(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_close(ctx, md_object_next(mo), ma); + rc = mo_close(ctx, md_object_next(mo), ma, uc); RETURN(rc); } static int cml_readpage(const struct lu_context *ctxt, struct md_object *mo, - const struct lu_rdpg *rdpg) + const struct lu_rdpg *rdpg, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_readpage(ctxt, md_object_next(mo), rdpg); + rc = mo_readpage(ctxt, md_object_next(mo), rdpg, uc); RETURN(rc); } static struct md_object_operations cml_mo_ops = { + .moo_permission = cml_permission, .moo_attr_get = cml_attr_get, .moo_attr_set = cml_attr_set, .moo_xattr_get = cml_xattr_get, @@ -342,67 +356,67 @@ static struct md_object_operations cml_mo_ops = { /* md_dir operations */ static int cml_lookup(const struct lu_context *ctx, struct md_object *mo_p, - const char *name, struct lu_fid *lf) + const char *name, struct lu_fid *lf, struct md_ucred *uc) { int rc; ENTRY; - rc = mdo_lookup(ctx, md_object_next(mo_p), name, lf); + rc = mdo_lookup(ctx, md_object_next(mo_p), name, lf, uc); RETURN(rc); } -static int cml_create(const struct lu_context *ctx, struct md_object *mo_p, - const char *child_name, struct md_object *mo_c, - const struct md_create_spec *spec, - struct md_attr *ma) +static int cml_create(const struct lu_context *ctx, + struct md_object *mo_p, const char *child_name, + struct md_object *mo_c, const struct md_create_spec *spec, + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; #ifdef HAVE_SPLIT_SUPPORT - rc = cml_try_to_split(ctx, mo_p); + rc = cml_try_to_split(ctx, mo_p, uc); if (rc) RETURN(rc); #endif rc = mdo_create(ctx, md_object_next(mo_p), child_name, - md_object_next(mo_c), spec, ma); + md_object_next(mo_c), spec, ma, uc); RETURN(rc); } -static int cml_create_data(const struct lu_context *ctx, - struct md_object *p, struct md_object *o, +static int cml_create_data(const struct lu_context *ctx, struct md_object *p, + struct md_object *o, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; rc = mdo_create_data(ctx, md_object_next(p), md_object_next(o), - spec, ma); + spec, ma, uc); RETURN(rc); } static int cml_link(const struct lu_context *ctx, struct md_object *mo_p, struct md_object *mo_s, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; rc = mdo_link(ctx, md_object_next(mo_p), md_object_next(mo_s), - name, ma); + name, ma, uc); RETURN(rc); } static int cml_unlink(const struct lu_context *ctx, struct md_object *mo_p, struct md_object *mo_c, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; rc = mdo_unlink(ctx, md_object_next(mo_p), md_object_next(mo_c), - name, ma); + name, ma, uc); RETURN(rc); } @@ -426,7 +440,8 @@ struct md_object *md_object_find(const struct lu_context *ctx, } static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md, - const struct lu_fid *lf, struct md_attr *ma) + const struct lu_fid *lf, struct md_attr *ma, + struct md_ucred *uc) { struct cmm_thread_info *cmi; struct md_object *mo_s = md_object_find(ctx, md, lf); @@ -443,7 +458,7 @@ static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md, tmp_ma->ma_need = MA_INODE; /* get type from src, can be remote req */ - rc = mo_attr_get(ctx, md_object_next(mo_s), tmp_ma); + rc = mo_attr_get(ctx, md_object_next(mo_s), tmp_ma, uc); if (rc == 0) { ma->ma_attr.la_mode = tmp_ma->ma_attr.la_mode; ma->ma_attr.la_flags = tmp_ma->ma_attr.la_flags; @@ -454,74 +469,76 @@ static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md, } static int cml_rename(const struct lu_context *ctx, struct md_object *mo_po, - struct md_object *mo_pn, const struct lu_fid *lf, - const char *s_name, struct md_object *mo_t, - const char *t_name, struct md_attr *ma) + struct md_object *mo_pn, const struct lu_fid *lf, + const char *s_name, struct md_object *mo_t, + const char *t_name, struct md_attr *ma, + struct md_ucred *uc) { int rc; ENTRY; - rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma); + rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma, uc); if (rc != 0) RETURN(rc); if (mo_t && lu_object_exists(&mo_t->mo_lu) < 0) { /* mo_t is remote object and there is RPC to unlink it */ - rc = mo_ref_del(ctx, md_object_next(mo_t), ma); + rc = mo_ref_del(ctx, md_object_next(mo_t), ma, uc); if (rc) RETURN(rc); mo_t = NULL; } - + /* local rename, mo_t can be NULL */ rc = mdo_rename(ctx, md_object_next(mo_po), md_object_next(mo_pn), lf, s_name, - md_object_next(mo_t), t_name, ma); + md_object_next(mo_t), t_name, ma, uc); RETURN(rc); } -static int cml_rename_tgt(const struct lu_context *ctx, - struct md_object *mo_p, struct md_object *mo_t, - const struct lu_fid *lf, const char *name, - struct md_attr *ma) +static int cml_rename_tgt(const struct lu_context *ctx, struct md_object *mo_p, + struct md_object *mo_t, const struct lu_fid *lf, + const char *name, struct md_attr *ma, + struct md_ucred *uc) { int rc; ENTRY; rc = mdo_rename_tgt(ctx, md_object_next(mo_p), - md_object_next(mo_t), lf, name, ma); + md_object_next(mo_t), lf, name, ma, uc); RETURN(rc); } /* used only in case of rename_tgt() when target is not exist */ -static int cml_name_insert(const struct lu_context *ctx, - struct md_object *p, const char *name, - const struct lu_fid *lf, int isdir) +static int cml_name_insert(const struct lu_context *ctx, struct md_object *p, + const char *name, const struct lu_fid *lf, int isdir, + struct md_ucred *uc) { int rc; ENTRY; - rc = mdo_name_insert(ctx, md_object_next(p), name, lf, isdir); + rc = mdo_name_insert(ctx, md_object_next(p), name, lf, isdir, uc); RETURN(rc); } /* Common method for remote and local use. */ static int cmm_is_subdir(const struct lu_context *ctx, struct md_object *mo, - const struct lu_fid *fid, struct lu_fid *sfid) + const struct lu_fid *fid, struct lu_fid *sfid, + struct md_ucred *uc) { struct cmm_thread_info *cmi; int rc; ENTRY; cmi = lu_context_key_get(ctx, &cmm_thread_key); - rc = __cmm_mode_get(ctx, md_obj2dev(mo), fid, &cmi->cmi_ma); + rc = __cmm_mode_get(ctx, md_obj2dev(mo), fid, &cmi->cmi_ma, uc); if (rc) RETURN(rc); if (!S_ISDIR(cmi->cmi_ma.ma_attr.la_mode)) RETURN(0); - rc = mdo_is_subdir(ctx, md_object_next(mo), fid, sfid); + rc = mdo_is_subdir(ctx, md_object_next(mo), fid, sfid, uc); RETURN(rc); } @@ -621,83 +638,94 @@ static struct lu_object_operations cmr_obj_ops = { static int cmr_object_create(const struct lu_context *ctx, struct md_object *mo, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, + struct md_ucred *uc) { RETURN(-EFAULT); } +static int cmr_permission(const struct lu_context *ctx, struct md_object *mo, + int mask, struct md_ucred *uc) +{ + RETURN(-EREMOTE); +} + static int cmr_attr_get(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *attr) + struct md_attr *attr, struct md_ucred *uc) { RETURN(-EREMOTE); } static int cmr_attr_set(const struct lu_context *ctx, struct md_object *mo, - const struct md_attr *attr) + const struct md_attr *attr, struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_xattr_get(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen, const char *name) + void *buf, int buflen, const char *name, + struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_readlink(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen) + void *buf, int buflen, struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_xattr_list(const struct lu_context *ctx, struct md_object *mo, - void *buf, int buflen) + void *buf, int buflen, struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_xattr_set(const struct lu_context *ctx, struct md_object *mo, - const void *buf, int buflen, const char *name, int fl) + const void *buf, int buflen, const char *name, int fl, + struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_xattr_del(const struct lu_context *ctx, struct md_object *mo, - const char *name) + const char *name, struct md_ucred *uc) { RETURN(-EFAULT); } -static int cmr_ref_add(const struct lu_context *ctx, struct md_object *mo) +static int cmr_ref_add(const struct lu_context *ctx, struct md_object *mo, + struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_ref_del(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_open(const struct lu_context *ctx, struct md_object *mo, - int flags) + int flags, struct md_ucred *uc) { RETURN(-EREMOTE); } static int cmr_close(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { RETURN(-EFAULT); } static int cmr_readpage(const struct lu_context *ctxt, struct md_object *mo, - const struct lu_rdpg *rdpg) + const struct lu_rdpg *rdpg, struct md_ucred *uc) { RETURN(-EREMOTE); } static struct md_object_operations cmr_mo_ops = { + .moo_permission = cmr_permission, .moo_attr_get = cmr_attr_get, .moo_attr_set = cmr_attr_set, .moo_xattr_get = cmr_xattr_get, @@ -715,7 +743,7 @@ static struct md_object_operations cmr_mo_ops = { /* remote part of md_dir operations */ static int cmr_lookup(const struct lu_context *ctx, struct md_object *mo_p, - const char *name, struct lu_fid *lf) + const char *name, struct lu_fid *lf, struct md_ucred *uc) { /* * This can happens while rename() If new parent is remote dir, lookup @@ -738,7 +766,7 @@ static int cmr_lookup(const struct lu_context *ctx, struct md_object *mo_p, static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p, const char *child_name, struct md_object *mo_c, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct cmm_thread_info *cmi; struct md_attr *tmp_ma; @@ -750,7 +778,7 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p, LASSERT(cmi); tmp_ma = &cmi->cmi_ma; tmp_ma->ma_need = MA_INODE; - rc = mo_attr_get(ctx, md_object_next(mo_p), tmp_ma); + rc = mo_attr_get(ctx, md_object_next(mo_p), tmp_ma, uc); if (rc) RETURN(rc); @@ -762,11 +790,11 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p, } } /* remote object creation and local name insert */ - rc = mo_object_create(ctx, md_object_next(mo_c), spec, ma); + rc = mo_object_create(ctx, md_object_next(mo_c), spec, ma, uc); if (rc == 0) { rc = mdo_name_insert(ctx, md_object_next(mo_p), child_name, lu_object_fid(&mo_c->mo_lu), - S_ISDIR(ma->ma_attr.la_mode)); + S_ISDIR(ma->ma_attr.la_mode), uc); } RETURN(rc); @@ -774,17 +802,17 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p, static int cmr_link(const struct lu_context *ctx, struct md_object *mo_p, struct md_object *mo_s, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; //XXX: make sure that MDT checks name isn't exist - rc = mo_ref_add(ctx, md_object_next(mo_s)); + rc = mo_ref_add(ctx, md_object_next(mo_s), uc); if (rc == 0) { rc = mdo_name_insert(ctx, md_object_next(mo_p), - name, lu_object_fid(&mo_s->mo_lu), 0); + name, lu_object_fid(&mo_s->mo_lu), 0, uc); } RETURN(rc); @@ -792,30 +820,30 @@ static int cmr_link(const struct lu_context *ctx, struct md_object *mo_p, static int cmr_unlink(const struct lu_context *ctx, struct md_object *mo_p, struct md_object *mo_c, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; - rc = mo_ref_del(ctx, md_object_next(mo_c), ma); + rc = mo_ref_del(ctx, md_object_next(mo_c), ma, uc); if (rc == 0) { - rc = mdo_name_remove(ctx, md_object_next(mo_p), - name); + rc = mdo_name_remove(ctx, md_object_next(mo_p), name, uc); } RETURN(rc); } -static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po, - struct md_object *mo_pn, const struct lu_fid *lf, - const char *s_name, struct md_object *mo_t, - const char *t_name, struct md_attr *ma) +static int cmr_rename(const struct lu_context *ctx, + struct md_object *mo_po, struct md_object *mo_pn, + const struct lu_fid *lf, const char *s_name, + struct md_object *mo_t, const char *t_name, + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; /* get real type of src */ - rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma); + rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma, uc); if (rc != 0) RETURN(rc); @@ -824,11 +852,11 @@ static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po, * mo_t or not. Therefore mo_t is NULL here but remote server should do * lookup and process this further */ rc = mdo_rename_tgt(ctx, md_object_next(mo_pn), - NULL/* mo_t */, lf, t_name, ma); + NULL/* mo_t */, lf, t_name, ma, uc); /* only old name is removed localy */ if (rc == 0) rc = mdo_name_remove(ctx, md_object_next(mo_po), - s_name); + s_name, uc); RETURN(rc); } @@ -838,16 +866,16 @@ static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po, static int cmr_rename_tgt(const struct lu_context *ctx, struct md_object *mo_p, struct md_object *mo_t, const struct lu_fid *lf, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; ENTRY; /* target object is remote one */ - rc = mo_ref_del(ctx, md_object_next(mo_t), ma); + rc = mo_ref_del(ctx, md_object_next(mo_t), ma, uc); /* continue locally with name handling only */ if (rc == 0) rc = mdo_rename_tgt(ctx, md_object_next(mo_p), - NULL, lf, name, ma); + NULL, lf, name, ma, uc); RETURN(rc); } @@ -860,5 +888,3 @@ static struct md_dir_operations cmr_dir_ops = { .mdo_rename = cmr_rename, .mdo_rename_tgt = cmr_rename_tgt, }; - - diff --git a/lustre/cmm/cmm_split.c b/lustre/cmm/cmm_split.c index 061794d..4f82634 100644 --- a/lustre/cmm/cmm_split.c +++ b/lustre/cmm/cmm_split.c @@ -54,7 +54,9 @@ static inline struct lu_fid* cmm2_fid(struct cmm_object *obj) } static int cmm_expect_splitting(const struct lu_context *ctx, - struct md_object *mo, struct md_attr *ma) + struct md_object *mo, + struct md_attr *ma, + struct md_ucred *uc) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct lu_fid *fid = NULL; @@ -70,8 +72,7 @@ static int cmm_expect_splitting(const struct lu_context *ctx, if (ma->ma_lmv_size) GOTO(cleanup, rc = CMM_NO_SPLIT_EXPECTED); OBD_ALLOC_PTR(fid); - rc = cmm_child_ops(cmm)->mdo_root_get(ctx, cmm->cmm_child, - fid); + rc = cmm_child_ops(cmm)->mdo_root_get(ctx, cmm->cmm_child, fid, uc); if (rc) GOTO(cleanup, rc); @@ -152,7 +153,7 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx, struct cmm_device *cmm, struct lu_fid *fid, struct md_attr *ma, const struct lmv_stripe_md *lmv, - int lmv_size) + int lmv_size, struct md_ucred *uc) { struct cmm_object *obj; struct md_create_spec *spec; @@ -170,7 +171,7 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx, spec->u.sp_ea.eadatalen = lmv_size; spec->sp_cr_flags |= MDS_CREATE_SLAVE_OBJ; rc = mo_object_create(ctx, md_object_next(&obj->cmo_obj), - spec, ma); + spec, ma, uc); OBD_FREE_PTR(spec); cmm_object_put(ctx, obj); @@ -178,7 +179,8 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx, } static int cmm_create_slave_objects(const struct lu_context *ctx, - struct md_object *mo, struct md_attr *ma) + struct md_object *mo, struct md_attr *ma, + struct md_ucred *uc) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct lmv_stripe_md *lmv = NULL, *slave_lmv = NULL; @@ -213,7 +215,7 @@ static int cmm_create_slave_objects(const struct lu_context *ctx, slave_lmv->mea_count = 0; for (i = 1; i < cmm->cmm_tgt_count + 1; i ++) { rc = cmm_creat_remote_obj(ctx, cmm, &lmv->mea_ids[i], ma, - slave_lmv, sizeof(slave_lmv)); + slave_lmv, sizeof(slave_lmv), uc); if (rc) GOTO(cleanup, rc); } @@ -228,7 +230,8 @@ cleanup: static int cmm_send_split_pages(const struct lu_context *ctx, struct md_object *mo, struct lu_rdpg *rdpg, - struct lu_fid *fid, int len) + struct lu_fid *fid, int len, + struct md_ucred *uc) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct cmm_object *obj; @@ -240,14 +243,14 @@ static int cmm_send_split_pages(const struct lu_context *ctx, RETURN(PTR_ERR(obj)); rc = mdc_send_page(cmm, ctx, md_object_next(&obj->cmo_obj), - rdpg->rp_pages[0], len); + rdpg->rp_pages[0], len, uc); cmm_object_put(ctx, obj); RETURN(rc); } static int cmm_remove_entries(const struct lu_context *ctx, struct md_object *mo, struct lu_rdpg *rdpg, - __u32 hash_end, __u32 *len) + __u32 hash_end, __u32 *len, struct md_ucred *uc) { struct lu_dirpage *dp; struct lu_dirent *ent; @@ -268,7 +271,7 @@ static int cmm_remove_entries(const struct lu_context *ctx, OBD_ALLOC(name, ent->lde_namelen + 1); memcpy(name, ent->lde_name, ent->lde_namelen); rc = mdo_name_remove(ctx, md_object_next(mo), - name); + name, uc); OBD_FREE(name, ent->lde_namelen + 1); } if (rc) { @@ -292,9 +295,9 @@ unmap: RETURN(rc); } -static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo, - struct lu_rdpg *rdpg, struct lu_fid *lf, - __u32 end) +static int cmm_split_entries(const struct lu_context *ctx, + struct md_object *mo, struct lu_rdpg *rdpg, + struct lu_fid *lf, __u32 end, struct md_ucred *uc) { int rc, done = 0; ENTRY; @@ -310,7 +313,7 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo, memset(kmap(rdpg->rp_pages[0]), 0, CFS_PAGE_SIZE); kunmap(rdpg->rp_pages[0]); - rc = mo_readpage(ctx, md_object_next(mo), rdpg); + rc = mo_readpage(ctx, md_object_next(mo), rdpg, uc); /* -E2BIG means it already reach the end of the dir */ if (rc) { if (rc != -ERANGE) { @@ -321,13 +324,13 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo, } /* Remove the old entries */ - rc = cmm_remove_entries(ctx, mo, rdpg, end, &len); + rc = cmm_remove_entries(ctx, mo, rdpg, end, &len, uc); if (rc) RETURN(rc); /* Send page to slave object */ if (len > 0) { - rc = cmm_send_split_pages(ctx, mo, rdpg, lf, len); + rc = cmm_send_split_pages(ctx, mo, rdpg, lf, len, uc); if (rc) RETURN(rc); } @@ -345,7 +348,8 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo, } #define SPLIT_PAGE_COUNT 1 static int cmm_scan_and_split(const struct lu_context *ctx, - struct md_object *mo, struct md_attr *ma) + struct md_object *mo, struct md_attr *ma, + struct md_ucred *uc) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); __u32 hash_segement; @@ -376,7 +380,7 @@ static int cmm_scan_and_split(const struct lu_context *ctx, rdpg->rp_hash = i * hash_segement; hash_end = rdpg->rp_hash + hash_segement; - rc = cmm_split_entries(ctx, mo, rdpg, lf, hash_end); + rc = cmm_split_entries(ctx, mo, rdpg, lf, hash_end, uc); if (rc) GOTO(cleanup, rc); } @@ -394,7 +398,8 @@ free_rdpg: RETURN(rc); } -int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo) +int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo, + struct md_ucred *uc) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct md_attr *ma; @@ -408,12 +413,12 @@ int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo) RETURN(-ENOMEM); ma->ma_need = MA_INODE|MA_LMV; - rc = mo_attr_get(ctx, mo, ma); + rc = mo_attr_get(ctx, mo, ma, uc); if (rc) GOTO(cleanup, ma); /* step1: checking whether the dir need to be splitted */ - rc = cmm_expect_splitting(ctx, mo, ma); + rc = cmm_expect_splitting(ctx, mo, ma, uc); if (rc != CMM_EXPECT_SPLIT) GOTO(cleanup, rc = 0); @@ -425,18 +430,18 @@ int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo) GOTO(cleanup, rc = 0); /* step2: create slave objects */ - rc = cmm_create_slave_objects(ctx, mo, ma); + rc = cmm_create_slave_objects(ctx, mo, ma, uc); if (rc) GOTO(cleanup, ma); /* step3: scan and split the object */ - rc = cmm_scan_and_split(ctx, mo, ma); + rc = cmm_scan_and_split(ctx, mo, ma, uc); if (rc) GOTO(cleanup, ma); /* step4: set mea to the master object */ - rc = mo_xattr_set(ctx, md_object_next(mo), ma->ma_lmv, ma->ma_lmv_size, - MDS_LMV_MD_NAME, 0); + rc = mo_xattr_set(ctx, md_object_next(mo), ma->ma_lmv, + ma->ma_lmv_size, MDS_LMV_MD_NAME, 0, uc); if (rc == -ERESTART) CWARN("Dir"DFID" has been split \n", diff --git a/lustre/cmm/mdc_internal.h b/lustre/cmm/mdc_internal.h index a86f358..5483b2b 100644 --- a/lustre/cmm/mdc_internal.h +++ b/lustre/cmm/mdc_internal.h @@ -35,6 +35,7 @@ #include #include #include + struct mdc_cli_desc { struct lustre_handle cl_conn; /* uuid of remote MDT to connect */ @@ -96,8 +97,9 @@ struct lu_object *mdc_object_alloc(const struct lu_context *, const struct lu_object_header *, struct lu_device *); #ifdef HAVE_SPLIT_SUPPORT -int mdc_send_page(struct cmm_device *cmm, const struct lu_context *ctx, - struct md_object *mo, struct page *page, __u32 end); +int mdc_send_page(struct cmm_device *cmm, const struct lu_context *ctx, + struct md_object *mo, struct page *page, __u32 end, + struct md_ucred *uc); #endif #endif /* __KERNEL__ */ diff --git a/lustre/cmm/mdc_object.c b/lustre/cmm/mdc_object.c index 1c5e3b5..b1f45da 100644 --- a/lustre/cmm/mdc_object.c +++ b/lustre/cmm/mdc_object.c @@ -203,7 +203,7 @@ static int mdc_req2attr_update(const struct lu_context *ctx, } static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); struct mdc_thread_info *mci; @@ -232,15 +232,19 @@ static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo, static int mdc_object_create(const struct lu_context *ctx, - struct md_object *mo, + struct md_object *mo, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, + struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); struct lu_attr *la = &ma->ma_attr; struct mdc_thread_info *mci; const void *symname; int rc, symlen; + uid_t uid; + gid_t gid; + __u32 cap; ENTRY; LASSERT(spec->u.sp_pfid != NULL); @@ -249,6 +253,16 @@ static int mdc_object_create(const struct lu_context *ctx, /* parent fid is needed to create dotdot on the remote node */ mci->mci_opdata.fid1 = *(spec->u.sp_pfid); mci->mci_opdata.mod_time = la->la_mtime; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + uid = uc->mu_fsuid; + gid = uc->mu_fsgid; + cap = uc->mu_cap; + } else { + uid = la->la_uid; + gid = la->la_gid; + cap = 0; + } /* get data from spec */ if (spec->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { @@ -263,7 +277,7 @@ static int mdc_object_create(const struct lu_context *ctx, rc = md_create(mc->mc_desc.cl_exp, &mci->mci_opdata, symname, symlen, - la->la_mode, la->la_uid, la->la_gid, 0, la->la_rdev, + la->la_mode, uid, gid, cap, la->la_rdev, &mci->mci_req); if (rc == 0) { @@ -276,7 +290,8 @@ static int mdc_object_create(const struct lu_context *ctx, RETURN(rc); } -static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo) +static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo, + struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); struct mdc_thread_info *mci; @@ -291,6 +306,18 @@ static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo) //mci->mci_opdata.mod_time = la->la_ctime; //mci->mci_opdata.fsuid = la->la_uid; //mci->mci_opdata.fsgid = la->la_gid; + mci->mci_opdata.mod_time = CURRENT_SECONDS; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.fsuid = uc->mu_fsuid; + mci->mci_opdata.fsgid = uc->mu_fsgid; + mci->mci_opdata.cap = uc->mu_cap; + } else { + mci->mci_opdata.fsuid = current->fsuid; + mci->mci_opdata.fsgid = current->fsgid; + mci->mci_opdata.cap = current->cap_effective; + } + rc = md_link(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req); @@ -300,7 +327,7 @@ static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo) } static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); struct lu_attr *la = &ma->ma_attr; @@ -312,8 +339,17 @@ static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo, mci->mci_opdata.fid1 = *lu_object_fid(&mo->mo_lu); mci->mci_opdata.create_mode = la->la_mode; mci->mci_opdata.mod_time = la->la_ctime; - mci->mci_opdata.fsuid = la->la_uid; - mci->mci_opdata.fsgid = la->la_gid; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.fsuid = uc->mu_fsuid; + mci->mci_opdata.fsgid = uc->mu_fsgid; + mci->mci_opdata.cap = uc->mu_cap; + } else { + mci->mci_opdata.fsuid = la->la_uid; + mci->mci_opdata.fsgid = la->la_gid; + mci->mci_opdata.cap = current->cap_effective; + } + rc = md_unlink(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req); if (rc == 0) { /* get attr from request */ @@ -327,7 +363,8 @@ static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo, #ifdef HAVE_SPLIT_SUPPORT int mdc_send_page(struct cmm_device *cm, const struct lu_context *ctx, - struct md_object *mo, struct page *page, __u32 offset) + struct md_object *mo, struct page *page, __u32 offset, + struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); int rc; @@ -349,10 +386,10 @@ static struct md_object_operations mdc_mo_ops = { }; /* md_dir_operations */ -static int mdc_rename_tgt(const struct lu_context *ctx, - struct md_object *mo_p, struct md_object *mo_t, - const struct lu_fid *lf, const char *name, - struct md_attr *ma) +static int mdc_rename_tgt(const struct lu_context *ctx, struct md_object *mo_p, + struct md_object *mo_t, const struct lu_fid *lf, + const char *name, struct md_attr *ma, + struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo_p)); struct lu_attr *la = &ma->ma_attr; @@ -365,8 +402,16 @@ static int mdc_rename_tgt(const struct lu_context *ctx, mci->mci_opdata.fid2 = *lf; mci->mci_opdata.create_mode = la->la_mode; mci->mci_opdata.mod_time = la->la_ctime; - mci->mci_opdata.fsuid = la->la_uid; - mci->mci_opdata.fsgid = la->la_gid; + if (uc && + ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) { + mci->mci_opdata.fsuid = uc->mu_fsuid; + mci->mci_opdata.fsgid = uc->mu_fsgid; + mci->mci_opdata.cap = uc->mu_cap; + } else { + mci->mci_opdata.fsuid = la->la_uid; + mci->mci_opdata.fsgid = la->la_gid; + mci->mci_opdata.cap = current->cap_effective; + } rc = md_rename(mc->mc_desc.cl_exp, &mci->mci_opdata, NULL, 0, name, strlen(name), &mci->mci_req); @@ -381,7 +426,8 @@ static int mdc_rename_tgt(const struct lu_context *ctx, } static int mdc_is_subdir(const struct lu_context *ctx, struct md_object *mo, - const struct lu_fid *fid, struct lu_fid *sfid) + const struct lu_fid *fid, struct lu_fid *sfid, + struct md_ucred *uc) { struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo)); struct mdc_thread_info *mci; @@ -419,4 +465,3 @@ static struct md_dir_operations mdc_dir_ops = { .mdo_is_subdir = mdc_is_subdir, .mdo_rename_tgt = mdc_rename_tgt }; - diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index cf341fb..11ca3a3 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -41,13 +41,14 @@ struct group_info { /* unused */ }; /* simple.c */ struct lvfs_ucred { - struct upcall_cache_entry *luc_uce; - __u32 luc_fsuid; - __u32 luc_fsgid; - __u32 luc_cap; - __u32 luc_suppgid1; - __u32 luc_suppgid2; - __u32 luc_umask; + __u32 luc_uid; + __u32 luc_gid; + __u32 luc_fsuid; + __u32 luc_fsgid; + __u32 luc_cap; + __u32 luc_umask; + struct group_info *luc_ginfo; + struct mdt_identity *luc_identity; }; struct lvfs_callback_ops { diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index e4730f8..1cc81d1 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -61,4 +61,6 @@ extern int llapi_quotacheck(char *mnt, int check_type); extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk); extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl); extern int llapi_target_iterate(int type_num, char **obd_type, void *args, llapi_cb_t cb); +extern int llapi_getfacl(char *fname, char *cmd); +extern int llapi_setfacl(char *fname, char *cmd); #endif diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index eb7a1d7..8cc1454 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -449,7 +449,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ - OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID) + OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | \ + OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ @@ -605,6 +606,7 @@ struct md_op_data { __u32 suppgids[2]; __u32 fsuid; __u32 fsgid; + __u32 cap; /* iattr fields and blocks. */ struct iattr attr; @@ -657,6 +659,9 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" #define XATTR_NAME_LOV "trusted.lov" +/* remote ACL */ +#define XATTR_NAME_LUSTRE_ACL "system.lustre_acl" + #define OBD_MD_FLID (0x00000001ULL) /* object ID */ #define OBD_MD_FLATIME (0x00000002ULL) /* access time */ #define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ @@ -698,6 +703,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ +#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */ #define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ @@ -1022,6 +1028,7 @@ struct lustre_md { #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; #endif + struct mdt_remote_perm *remote_perm; }; #define Q_QUOTACHECK 0x800100 @@ -1046,6 +1053,33 @@ struct obd_quotactl { extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); +/* inode access permission for remote user, the inode info are omitted, + * for client knows them. */ +struct mds_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +/* setxid permissions for mds_setxid_perm.mp_perm */ +#define LUSTRE_SETUID_PERM 0x01 +#define LUSTRE_SETGID_PERM 0x02 +#define LUSTRE_SETGRP_PERM 0x04 + +extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p); + +struct mdt_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p); + struct mds_rec_setattr { __u32 sa_opcode; __u32 sa_fsuid; @@ -1104,6 +1138,8 @@ extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa); #define FMODE_SOM 04000000 #define FMODE_CLOSED 0 +#define MDS_OPEN_CREATED 00000010 + #define MDS_FMODE_EXEC 00000004 #define MDS_OPEN_CREAT 00000100 #define MDS_OPEN_EXCL 00000200 diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 7c76d9d..158be62 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -59,6 +59,8 @@ struct obd_statfs; #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) #define LL_IOC_FLUSHCTX _IOW ('f', 166, long) +#define LL_IOC_GETFACL _IOWR('f', 167, struct rmtacl_ioctl_data *) +#define LL_IOC_SETFACL _IOWR('f', 168, struct rmtacl_ioctl_data *) #define LL_STATFS_MDC 1 #define LL_STATFS_LOV 2 @@ -74,8 +76,8 @@ struct obd_statfs; #define IOC_MDC_GETSTRIPE IOC_MDC_GETFILESTRIPE #define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ -#define O_JOIN_FILE 0400000000 /* hopefully this does not conflict */ #define O_CHECK_STALE 0200000000 /* hopefully this does not conflict */ +#define O_JOIN_FILE 0400000000 /* hopefully this does not conflict */ #define LL_FILE_IGNORE_LOCK 0x00000001 #define LL_FILE_GROUP_LOCKED 0x00000002 @@ -177,15 +179,36 @@ struct if_quotacheck { struct obd_uuid obd_uuid; }; -#define MDS_GRP_DOWNCALL_MAGIC 0x6d6dd620 +#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd620 + +/* setxid permission */ +#define N_SETXID_PERMS_MAX 64 + +struct setxid_perm_downcall_data { + __u64 pdd_nid; + __u32 pdd_perm; +}; + +struct identity_downcall_data { + __u32 idd_magic; + __u32 idd_err; + __u32 idd_uid; + __u32 idd_gid; + __u32 idd_nperms; + struct setxid_perm_downcall_data idd_perms[N_SETXID_PERMS_MAX]; + __u32 idd_ngroups; + __u32 idd_groups[0]; +}; + +#define RMTACL_DOWNCALL_MAGIC 0x6d6dd620 +#define RMTACL_SIZE_MAX (4096) -struct mds_grp_downcall_data { - __u32 mgd_magic; - __u32 mgd_err; - __u32 mgd_uid; - __u32 mgd_gid; - __u32 mgd_ngroups; - __u32 mgd_groups[0]; +struct rmtacl_downcall_data { + __u32 add_magic; + __u32 add_handle; + __u64 add_ino; + __u32 add_buflen; + __u8 add_buf[0]; }; #ifdef NEED_QUOTA_DEFS @@ -253,4 +276,12 @@ struct if_quotactl { # define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif +/* remote acl ioctl */ +struct rmtacl_ioctl_data { + char *cmd; /* IN */ + unsigned long cmd_len; + char *res; /* OUT */ + unsigned long res_len; +}; + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index f916bc7..13fa4e5 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -256,4 +256,9 @@ static inline int lustre_cfg_sanity_check(void *buf, int len) RETURN(0); } +/* default value for nllu/nllg for llite */ +#define NOBODY_UID 99 +#define NOBODY_GID 99 +#define INVALID_UID (-1) + #endif // _LUSTRE_CFG_H diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 73fb1d8..e445bf4 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -141,6 +141,8 @@ struct lustre_mount_data { char *lmd_profile; /* client only */ char *lmd_sec_mdt; /* sec from mdt (to ost/mdt) */ char *lmd_sec_cli; /* sec from client (to ost/mdt) */ + uid_t lmd_nllu; /* non-lustre-local-user id */ + gid_t lmd_nllg; /* non-lustre-local-group id */ char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 5ee5a81..b5625c1 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -432,6 +432,7 @@ int ldlm_handle_convert(struct ptlrpc_request *req); int ldlm_handle_cancel(struct ptlrpc_request *req); int ldlm_del_waiting_lock(struct ldlm_lock *lock); int ldlm_refresh_waiting_lock(struct ldlm_lock *lock); +void ldlm_revoke_export_locks(struct obd_export *exp); int ldlm_get_ref(void); void ldlm_put_ref(int force); diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index ca5cabb..896d3e8 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -11,6 +11,8 @@ /* Data stored per client in the last_rcvd file. In le32 order. */ struct mds_client_data; struct mdt_client_data; +struct mds_idmap_table; +struct mdt_idmap_table; struct mds_export_data { struct list_head med_open_head; @@ -19,6 +21,10 @@ struct mds_export_data { __u64 med_ibits_known; loff_t med_lr_off; int med_lr_idx; + unsigned int med_rmtclient:1; /* remote client? */ + __u32 med_nllu; + __u32 med_nllg; + struct mds_idmap_table *med_idmap; }; struct mdt_export_data { @@ -29,7 +35,12 @@ struct mdt_export_data { __u64 med_ibits_known; loff_t med_lr_off; int med_lr_idx; + unsigned int med_rmtclient:1; /* remote client? */ + __u32 med_nllu; + __u32 med_nllg; + struct mdt_idmap_table *med_idmap; }; + struct osc_creator { spinlock_t oscc_lock; struct list_head oscc_list; diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index b9a6903..8e865ca 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -449,8 +449,12 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) #define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) + #define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) + #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) diff --git a/lustre/include/lustre_mdt.h b/lustre/include/lustre_mdt.h index 732d3a4..911c8b4 100644 --- a/lustre/include/lustre_mdt.h +++ b/lustre/include/lustre_mdt.h @@ -40,4 +40,26 @@ struct com_thread_info { struct req_capsule cti_pill; }; +/* id map */ +#define MDT_IDMAP_HASHSIZE (32) +#define MDT_IDMAP_HASHFUNC(id) ((id) & (MDT_IDMAP_HASHSIZE - 1)) + +enum mdt_idmap_idx { + RMT_UIDMAP_IDX, + LCL_UIDMAP_IDX, + RMT_GIDMAP_IDX, + LCL_GIDMAP_IDX, + MDT_IDMAP_N_HASHES +}; + +struct mdt_idmap_table { + spinlock_t mit_lock; + struct list_head mit_idmaps[MDT_IDMAP_N_HASHES] + [MDT_IDMAP_HASHSIZE]; +}; + +/* remote perm */ +extern int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct ptlrpc_request **request); + #endif diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 8bf199e..3c65717 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -363,7 +363,7 @@ struct ptlrpc_request { rq_auth_gss:1, /* authenticated by gss */ rq_auth_remote:1, /* authed as remote user */ rq_auth_usr_root:1, /* authed as root */ - rq_auth_usr_mds:1; /* authed as mds */ + rq_auth_usr_mdt:1; /* authed as mdt */ uid_t rq_auth_uid; /* authed uid */ uid_t rq_auth_mapped_uid; /* authed uid mapped to */ diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index 95f8a73..c95744f 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -52,5 +52,10 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, #define PARAM_SEC_RPC PARAM_SEC"rpc." #define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt=" #define PARAM_SEC_RPC_CLI PARAM_SEC_RPC"cli=" +#define PARAM_ROOTSQUASH PARAM_SEC"rootsquash." +#define PARAM_ROOTSQUASH_UID PARAM_ROOTSQUASH"uid=" +#define PARAM_ROOTSQUASH_GID PARAM_ROOTSQUASH"gid=" +#define PARAM_ROOTSQUASH_SKIPS PARAM_ROOTSQUASH"skips=" +#define PARAM_GSS_SUPPORT PARAM_SEC"gss=" #endif // _LUSTRE_PARAM_H diff --git a/lustre/include/lustre_ucache.h b/lustre/include/lustre_ucache.h index 16b5c1a..5e33e88 100644 --- a/lustre/include/lustre_ucache.h +++ b/lustre/include/lustre_ucache.h @@ -5,6 +5,12 @@ #ifndef _UPCALL_CACHE_H #define _UPCALL_CACHE_H +#ifdef __KERNEL__ +#include +#else +struct group_info {}; +#endif + #define UC_CACHE_NEW 0x01 #define UC_CACHE_ACQUIRING 0x02 #define UC_CACHE_INVALID 0x04 @@ -27,22 +33,71 @@ #define UC_CACHE_CLEAR_INVALID(i) (i)->ue_flags &= ~UC_CACHE_INVALID #define UC_CACHE_CLEAR_EXPIRED(i) (i)->ue_flags &= ~UC_CACHE_EXPIRED +struct upcall_cache_entry; + +struct mdt_setxid_perm { + lnet_nid_t mp_nid; + __u32 mp_perm; +}; + +struct mdt_identity { + struct upcall_cache_entry *mi_uc_entry; + uid_t mi_uid; + gid_t mi_gid; + struct group_info *mi_ginfo; + int mi_nperms; + struct mdt_setxid_perm *mi_perms; +}; + +struct rmtacl_upcall_data { + char *aud_cmd; +}; + +struct mdt_rmtacl { + unsigned long ra_ino; + __u32 ra_handle; + char *ra_cmd; + char *ra_buf; +}; + struct upcall_cache_entry { struct list_head ue_hash; __u64 ue_key; - __u64 ue_primary; - struct group_info *ue_group_info; +// __u64 ue_primary; +// struct group_info *ue_group_info; atomic_t ue_refcount; int ue_flags; cfs_waitq_t ue_waitq; cfs_time_t ue_acquire_expire; cfs_time_t ue_expire; + union { + struct mdt_identity identity; + struct mdt_rmtacl acl; + } u; }; #define UC_CACHE_HASH_SIZE (128) #define UC_CACHE_HASH_INDEX(id) ((id) & (UC_CACHE_HASH_SIZE - 1)) #define UC_CACHE_UPCALL_MAXPATH (1024UL) +struct upcall_cache; + +struct upcall_cache_ops { + void (*init_entry)(struct upcall_cache_entry *, void *args); + void (*free_entry)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*upcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*downcall_compare)(struct upcall_cache *, + struct upcall_cache_entry *, + __u64 key, void *args); + int (*do_upcall)(struct upcall_cache *, + struct upcall_cache_entry *); + int (*parse_downcall)(struct upcall_cache *, + struct upcall_cache_entry *, void *); +}; + struct upcall_cache { struct list_head uc_hashtable[UC_CACHE_HASH_SIZE]; spinlock_t uc_lock; @@ -51,8 +106,23 @@ struct upcall_cache { char uc_upcall[UC_CACHE_UPCALL_MAXPATH]; cfs_time_t uc_acquire_expire; /* jiffies */ cfs_time_t uc_entry_expire; /* jiffies */ + struct upcall_cache_ops *uc_ops; }; +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, + __u64 key, void *args); +void upcall_cache_put_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry); +int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, + void *args); +void upcall_cache_flush_idle(struct upcall_cache *cache); +void upcall_cache_flush_all(struct upcall_cache *cache); +void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args); +struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, + struct upcall_cache_ops *ops); +void upcall_cache_cleanup(struct upcall_cache *cache); + +#if 0 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, __u64 key, __u32 primary, __u32 ngroups, __u32 *groups); @@ -65,4 +135,5 @@ void upcall_cache_flush_all(struct upcall_cache *cache); struct upcall_cache *upcall_cache_init(const char *name); void upcall_cache_cleanup(struct upcall_cache *hash); +#endif #endif /* _UPCALL_CACHE_H */ diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index c2a2c4d..d049892 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -40,12 +40,36 @@ * super-class definitions. */ #include +#include struct md_device; struct md_device_operations; struct md_object; +typedef enum { + UCRED_INVALID = -1, + UCRED_INIT = 0, + UCRED_OLD = 1, + UCRED_NEW = 2, +} ucred_t; + +struct md_ucred { + ucred_t mu_valid; + __u32 mu_o_uid; + __u32 mu_o_gid; + __u32 mu_o_fsuid; + __u32 mu_o_fsgid; + __u32 mu_uid; + __u32 mu_gid; + __u32 mu_fsuid; + __u32 mu_fsgid; + __u32 mu_cap; + __u32 mu_umask; + struct group_info *mu_ginfo; + struct mdt_identity *mu_identity; +}; + /* metadata attributes */ enum ma_valid { MA_INODE = (1 << 0), @@ -92,96 +116,179 @@ struct md_create_spec { * Operations implemented for each md object (both directory and leaf). */ struct md_object_operations { - int (*moo_attr_get)(const struct lu_context *ctxt, struct md_object *obj, - struct md_attr *attr); + int (*moo_permission)(const struct lu_context *ctxt, + struct md_object *obj, + int mask, + struct md_ucred *uc); + + int (*moo_attr_get)(const struct lu_context *ctxt, + struct md_object *obj, + struct md_attr *attr, + struct md_ucred *uc); - int (*moo_attr_set)(const struct lu_context *ctxt, struct md_object *obj, - const struct md_attr *attr); + int (*moo_attr_set)(const struct lu_context *ctxt, + struct md_object *obj, + const struct md_attr *attr, + struct md_ucred *uc); int (*moo_xattr_get)(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len, const char *name); + void *buf, + int buf_len, + const char *name, + struct md_ucred *uc); int (*moo_xattr_list)(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len); + void *buf, + int buf_len, + struct md_ucred *uc); int (*moo_xattr_set)(const struct lu_context *ctxt, - struct md_object *obj, const void *buf, - int buf_len, const char *name, int fl); + struct md_object *obj, + const void *buf, + int buf_len, + const char *name, + int fl, + struct md_ucred *uc); + int (*moo_xattr_del)(const struct lu_context *ctxt, - struct md_object *obj, const char *name); + struct md_object *obj, + const char *name, + struct md_ucred *uc); - int (*moo_readpage)(const struct lu_context *, struct md_object *, - const struct lu_rdpg *); + int (*moo_readpage)(const struct lu_context *ctxt, + struct md_object *obj, + const struct lu_rdpg *rdpg, + struct md_ucred *uc); int (*moo_readlink)(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len); + void *buf, + int buf_len, + struct md_ucred *uc); /* part of cross-ref operation */ - int (*moo_object_create)(const struct lu_context *, - struct md_object *, + int (*moo_object_create)(const struct lu_context *ctxt, + struct md_object *obj, const struct md_create_spec *spec, - struct md_attr *); - int (*moo_ref_add)(const struct lu_context *, struct md_object *); - int (*moo_ref_del)(const struct lu_context *, struct md_object *, - struct md_attr *); - int (*moo_open)(const struct lu_context *, struct md_object *, int flags); - int (*moo_close)(const struct lu_context *, struct md_object *, - struct md_attr *); + struct md_attr *ma, + struct md_ucred *uc); + + int (*moo_ref_add)(const struct lu_context * ctxt, + struct md_object *obj, + struct md_ucred *uc); + + int (*moo_ref_del)(const struct lu_context *ctxt, + struct md_object *obj, + struct md_attr *ma, + struct md_ucred *uc); + + int (*moo_open)(const struct lu_context *ctxt, + struct md_object *obj, + int flag, + struct md_ucred *uc); + + int (*moo_close)(const struct lu_context *ctxt, + struct md_object *obj, + struct md_attr *ma, + struct md_ucred *uc); }; /* * Operations implemented for each directory object. */ struct md_dir_operations { - int (*mdo_is_subdir) (const struct lu_context *, struct md_object *, - const struct lu_fid *, struct lu_fid *); + int (*mdo_is_subdir) (const struct lu_context *ctxt, + struct md_object *obj, + const struct lu_fid *fid, + struct lu_fid *sfid, + struct md_ucred *uc); - int (*mdo_lookup)(const struct lu_context *, struct md_object *, - const char *, struct lu_fid *); - - int (*mdo_create)(const struct lu_context *, struct md_object *, - const char *child_name, struct md_object *, + int (*mdo_lookup)(const struct lu_context *ctxt, + struct md_object *obj, + const char *name, + struct lu_fid *fid, + struct md_ucred *uc); + + int (*mdo_create)(const struct lu_context *ctxt, + struct md_object *pobj, + const char *name, + struct md_object *child, const struct md_create_spec *spec, - struct md_attr *); + struct md_attr *ma, + struct md_ucred *uc); + /* This method is used for creating data object for this meta object*/ - int (*mdo_create_data)(const struct lu_context *cx, struct md_object *p, + int (*mdo_create_data)(const struct lu_context *ctxt, + struct md_object *p, struct md_object *o, const struct md_create_spec *spec, - struct md_attr *ma); - int (*mdo_rename)(const struct lu_context *ctxt, - struct md_object *spobj, struct md_object *tpobj, - const struct lu_fid *lf, const char *sname, - struct md_object *tobj, const char *tname, - struct md_attr *); - - int (*mdo_link)(const struct lu_context *, struct md_object *, - struct md_object *, const char *, struct md_attr *); + struct md_attr *ma, + struct md_ucred *uc); - int (*mdo_unlink)(const struct lu_context *, struct md_object *, - struct md_object *, const char *, struct md_attr *); + int (*mdo_rename)(const struct lu_context *ctxt, + struct md_object *spobj, + struct md_object *tpobj, + const struct lu_fid *lf, + const char *sname, + struct md_object *tobj, + const char *tname, + struct md_attr *ma, + struct md_ucred *uc); + + int (*mdo_link)(const struct lu_context *ctxt, + struct md_object *tgt_obj, + struct md_object *src_obj, + const char *name, + struct md_attr *ma, + struct md_ucred *uc); + + int (*mdo_unlink)(const struct lu_context *ctxt, + struct md_object *pobj, + struct md_object *cobj, + const char *name, + struct md_attr *ma, + struct md_ucred *uc); /* partial ops for cross-ref case */ - int (*mdo_name_insert)(const struct lu_context *, struct md_object *, - const char *, const struct lu_fid *, int); - int (*mdo_name_remove)(const struct lu_context *, struct md_object *, - const char *); - int (*mdo_rename_tgt)(const struct lu_context *, struct md_object *, - struct md_object *, const struct lu_fid *, - const char *, struct md_attr *); + int (*mdo_name_insert)(const struct lu_context *ctxt, + struct md_object *obj, + const char *name, + const struct lu_fid *fid, + int isdir, + struct md_ucred *uc); + + int (*mdo_name_remove)(const struct lu_context *ctxt, + struct md_object *obj, const char *name, + struct md_ucred *uc); + + int (*mdo_rename_tgt)(const struct lu_context *ctxt, + struct md_object *pobj, + struct md_object *tobj, + const struct lu_fid *fid, + const char *name, + struct md_attr *ma, + struct md_ucred *uc); }; struct md_device_operations { /* meta-data device related handlers. */ int (*mdo_root_get)(const struct lu_context *ctx, - struct md_device *m, struct lu_fid *f); + struct md_device *m, + struct lu_fid *f, + struct md_ucred *uc); + int (*mdo_maxsize_get)(const struct lu_context *ctx, - struct md_device *m, int *md_size, - int *cookie_size); + struct md_device *m, + int *md_size, + int *cookie_size, + struct md_ucred *uc); + int (*mdo_statfs)(const struct lu_context *ctx, - struct md_device *m, struct kstatfs *sfs); + struct md_device *m, + struct kstatfs *sfs, + struct md_ucred *uc); }; enum md_upcall_event { @@ -252,187 +359,255 @@ static inline void md_device_fini(struct md_device *md) } /* md operations */ -static inline int mo_attr_get(const struct lu_context *cx, struct md_object *m, - struct md_attr *at) +static inline int mo_permission(const struct lu_context *cx, + struct md_object *m, + int mask, + struct md_ucred *uc) { - LASSERT(m->mo_ops->moo_attr_get); - return m->mo_ops->moo_attr_get(cx, m, at); + LASSERT(m->mo_ops->moo_permission); + return m->mo_ops->moo_permission(cx, m, mask, uc); } -static inline int mo_readlink(const struct lu_context *cx, struct md_object *m, - void *buf, int buf_len) +static inline int mo_attr_get(const struct lu_context *cx, + struct md_object *m, + struct md_attr *at, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_attr_get); - return m->mo_ops->moo_readlink(cx, m, buf, buf_len); + return m->mo_ops->moo_attr_get(cx, m, at, uc); +} + +static inline int mo_readlink(const struct lu_context *cx, + struct md_object *m, + void *buf, + int buf_len, + struct md_ucred *uc) +{ + LASSERT(m->mo_ops->moo_readlink); + return m->mo_ops->moo_readlink(cx, m, buf, buf_len, uc); } -static inline int mo_attr_set(const struct lu_context *cx, struct md_object *m, - const struct md_attr *at) +static inline int mo_attr_set(const struct lu_context *cx, + struct md_object *m, + const struct md_attr *at, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_attr_set); - return m->mo_ops->moo_attr_set(cx, m, at); + return m->mo_ops->moo_attr_set(cx, m, at, uc); } static inline int mo_xattr_get(const struct lu_context *cx, struct md_object *m, - void *buf, int buf_len, const char *name) + void *buf, + int buf_len, + const char *name, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_xattr_get); - return m->mo_ops->moo_xattr_get(cx, m, buf, buf_len, name); + return m->mo_ops->moo_xattr_get(cx, m, buf, buf_len, name, uc); } static inline int mo_xattr_del(const struct lu_context *cx, struct md_object *m, - const char *name) + const char *name, + struct md_ucred *uc) { - LASSERT(m->mo_ops->moo_xattr_set); - return m->mo_ops->moo_xattr_del(cx, m, name); + LASSERT(m->mo_ops->moo_xattr_del); + return m->mo_ops->moo_xattr_del(cx, m, name, uc); } static inline int mo_xattr_set(const struct lu_context *cx, - struct md_object *m, const void *buf, - int buf_len, const char *name, int flags) + struct md_object *m, + const void *buf, + int buf_len, + const char *name, + int flags, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_xattr_set); - return m->mo_ops->moo_xattr_set(cx, m, buf, buf_len, name, flags); + return m->mo_ops->moo_xattr_set(cx, m, buf, buf_len, name, flags, uc); } static inline int mo_xattr_list(const struct lu_context *cx, - struct md_object *m, - void *buf, int buf_len) + struct md_object *m, + void *buf, + int buf_len, + struct md_ucred *uc) { - LASSERT(m->mo_ops->moo_xattr_get); - return m->mo_ops->moo_xattr_list(cx, m, buf, buf_len); + LASSERT(m->mo_ops->moo_xattr_list); + return m->mo_ops->moo_xattr_list(cx, m, buf, buf_len, uc); } -static inline int mo_open(const struct lu_context *cx, struct md_object *m, - int flags) +static inline int mo_open(const struct lu_context *cx, + struct md_object *m, + int flags, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_open); - return m->mo_ops->moo_open(cx, m, flags); + return m->mo_ops->moo_open(cx, m, flags, uc); } -static inline int mo_close(const struct lu_context *cx, struct md_object *m, - struct md_attr *ma) +static inline int mo_close(const struct lu_context *cx, + struct md_object *m, + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_close); - return m->mo_ops->moo_close(cx, m, ma); + return m->mo_ops->moo_close(cx, m, ma, uc); } -static inline int mo_readpage(const struct lu_context *cx, struct md_object *m, - const struct lu_rdpg *rdpg) +static inline int mo_readpage(const struct lu_context *cx, + struct md_object *m, + const struct lu_rdpg *rdpg, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_readpage); - return m->mo_ops->moo_readpage(cx, m, rdpg); + return m->mo_ops->moo_readpage(cx, m, rdpg, uc); } static inline int mo_object_create(const struct lu_context *cx, struct md_object *m, const struct md_create_spec *spc, - struct md_attr *at) + struct md_attr *at, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_object_create); - return m->mo_ops->moo_object_create(cx, m, spc, at); + return m->mo_ops->moo_object_create(cx, m, spc, at, uc); } static inline int mo_ref_add(const struct lu_context *cx, - struct md_object *m) + struct md_object *m, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_ref_add); - return m->mo_ops->moo_ref_add(cx, m); + return m->mo_ops->moo_ref_add(cx, m, uc); } static inline int mo_ref_del(const struct lu_context *cx, - struct md_object *m, struct md_attr *ma) + struct md_object *m, + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(m->mo_ops->moo_ref_del); - return m->mo_ops->moo_ref_del(cx, m, ma); + return m->mo_ops->moo_ref_del(cx, m, ma, uc); } -static inline int mdo_lookup(const struct lu_context *cx, struct md_object *p, - const char *name, struct lu_fid *f) +static inline int mdo_lookup(const struct lu_context *cx, + struct md_object *p, + const char *name, + struct lu_fid *f, + struct md_ucred *uc) { LASSERT(p->mo_dir_ops->mdo_lookup); - return p->mo_dir_ops->mdo_lookup(cx, p, name, f); + return p->mo_dir_ops->mdo_lookup(cx, p, name, f, uc); } -static inline int mdo_create(const struct lu_context *cx, struct md_object *p, - const char *child_name, struct md_object *c, +static inline int mdo_create(const struct lu_context *cx, + struct md_object *p, + const char *child_name, + struct md_object *c, const struct md_create_spec *spc, - struct md_attr *at) + struct md_attr *at, + struct md_ucred *uc) { LASSERT(c->mo_dir_ops->mdo_create); - return c->mo_dir_ops->mdo_create(cx, p, child_name, c, spc, at); + return c->mo_dir_ops->mdo_create(cx, p, child_name, c, spc, at, uc); } + static inline int mdo_create_data(const struct lu_context *cx, - struct md_object *p, struct md_object *c, + struct md_object *p, + struct md_object *c, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(c->mo_dir_ops->mdo_create_data); - return c->mo_dir_ops->mdo_create_data(cx, p, c, spec, ma); + return c->mo_dir_ops->mdo_create_data(cx, p, c, spec, ma, uc); } static inline int mdo_rename(const struct lu_context *cx, - struct md_object *sp, struct md_object *tp, - const struct lu_fid *lf, const char *sname, - struct md_object *t, const char *tname, - struct md_attr *ma) + struct md_object *sp, + struct md_object *tp, + const struct lu_fid *lf, + const char *sname, + struct md_object *t, + const char *tname, + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(tp->mo_dir_ops->mdo_rename); - return tp->mo_dir_ops->mdo_rename(cx, sp, tp, lf, sname, t, tname, ma); + return tp->mo_dir_ops->mdo_rename(cx, sp, tp, lf, sname, t, tname, + ma, uc); } -static inline int mdo_is_subdir(const struct lu_context *cx, struct md_object *mo, - const struct lu_fid *fid, struct lu_fid *sfid) +static inline int mdo_is_subdir(const struct lu_context *cx, + struct md_object *mo, + const struct lu_fid *fid, + struct lu_fid *sfid, + struct md_ucred *uc) { LASSERT(mo->mo_dir_ops->mdo_is_subdir); - return mo->mo_dir_ops->mdo_is_subdir(cx, mo, fid, sfid); + return mo->mo_dir_ops->mdo_is_subdir(cx, mo, fid, sfid, uc); } -static inline int mdo_link(const struct lu_context *cx, struct md_object *p, - struct md_object *s, const char *name, - struct md_attr *ma) +static inline int mdo_link(const struct lu_context *cx, + struct md_object *p, + struct md_object *s, + const char *name, + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(s->mo_dir_ops->mdo_link); - return s->mo_dir_ops->mdo_link(cx, p, s, name, ma); + return s->mo_dir_ops->mdo_link(cx, p, s, name, ma, uc); } -static inline int mdo_unlink(const struct lu_context *cx, struct md_object *p, - struct md_object *c, const char *name, - struct md_attr *ma) +static inline int mdo_unlink(const struct lu_context *cx, + struct md_object *p, + struct md_object *c, + const char *name, + struct md_attr *ma, + struct md_ucred *uc) { LASSERT(c->mo_dir_ops->mdo_unlink); - return c->mo_dir_ops->mdo_unlink(cx, p, c, name, ma); + return c->mo_dir_ops->mdo_unlink(cx, p, c, name, ma, uc); } static inline int mdo_name_insert(const struct lu_context *cx, - struct md_object *p, const char *name, - const struct lu_fid *f, int isdir) + struct md_object *p, + const char *name, + const struct lu_fid *f, + int isdir, + struct md_ucred *uc) { LASSERT(p->mo_dir_ops->mdo_name_insert); - return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, isdir); + return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, isdir, uc); } static inline int mdo_name_remove(const struct lu_context *cx, struct md_object *p, - const char *name) + const char *name, + struct md_ucred *uc) { LASSERT(p->mo_dir_ops->mdo_name_remove); - return p->mo_dir_ops->mdo_name_remove(cx, p, name); + return p->mo_dir_ops->mdo_name_remove(cx, p, name, uc); } static inline int mdo_rename_tgt(const struct lu_context *cx, - struct md_object *p, struct md_object *t, - const struct lu_fid *lf, const char *name, - struct md_attr *ma) + struct md_object *p, + struct md_object *t, + const struct lu_fid *lf, + const char *name, + struct md_attr *ma, + struct md_ucred *uc) { if (t) { LASSERT(t->mo_dir_ops->mdo_rename_tgt); - return t->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, ma); + return t->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, + ma, uc); } else { LASSERT(p->mo_dir_ops->mdo_rename_tgt); - return p->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, ma); + return p->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, + ma, uc); } } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 140c51f..65cd41a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -507,6 +507,15 @@ struct mgs_obd { struct semaphore mgs_sem; }; +/* hah, upper limit 64 should be enough */ +#define N_NOSQUASH_NIDS 64 +struct rootsquash_info { + uid_t rsi_uid; + gid_t rsi_gid; + int rsi_n_nosquash_nids; + lnet_nid_t rsi_nosquash_nids[N_NOSQUASH_NIDS]; +}; + struct mds_obd { /* NB this field MUST be first */ struct obd_device_target mds_obt; @@ -545,7 +554,7 @@ struct mds_obd { struct file *mds_lov_objid_filp; struct file *mds_health_check_filp; unsigned long *mds_client_bitmap; - struct upcall_cache *mds_group_hash; +// struct upcall_cache *mds_group_hash; struct lustre_quota_info mds_quota_info; struct semaphore mds_qonoff_sem; @@ -557,6 +566,11 @@ struct mds_obd { /* For CMD add mds_num */ int mds_num; + struct upcall_cache *mds_identity_cache; + struct upcall_cache *mds_rmtacl_cache; + + /* root squash */ + struct rootsquash_info *mds_rootsquash_info; }; struct echo_obd { @@ -1190,6 +1204,9 @@ struct md_ops { int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, int flags, void *opaque); + int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, + struct ptlrpc_request **); + /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index e7602db..94b3743 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -415,8 +415,9 @@ static inline int obd_precleanup(struct obd_device *obd, lu_context_exit(&ctx); lu_context_fini(&ctx); } - } else + } else { rc = 0; + } } else #endif { @@ -1896,6 +1897,16 @@ static inline int md_init_ea_size(struct obd_export *exp, cookiesize)); } +static inline int md_get_remote_perm(struct obd_export *exp, + const struct lu_fid *fid, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, get_remote_perm); + MD_COUNTER_INCREMENT(exp->exp_obd, get_remote_perm); + RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, request)); +} + /* OBD Metadata Support */ extern int obd_init_caches(void); extern void obd_cleanup_caches(void); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3cfed08..955c6cd 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -184,9 +184,10 @@ extern int obd_race_state; #define OBD_FAIL_FLD 0x1100 #define OBD_FAIL_FLD_QUERY_NET 0x1101 -#define OBD_FAIL_SEC_CTX_INIT_NET 0x1200 -#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1210 -#define OBD_FAIL_SEC_CTX_FINI_NET 0x1220 +#define OBD_FAIL_SEC_CTX 0x1200 +#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 +#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202 +#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 #define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index e683de7..29c9360 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1500,6 +1500,61 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) RETURN(0); } +void ldlm_revoke_export_locks(struct obd_export *exp) +{ + struct list_head *locklist = &exp->exp_ldlm_data.led_held_locks; + struct list_head rpc_list; + struct ldlm_lock *lock, *next; + struct ldlm_lock_desc desc; + + ENTRY; + INIT_LIST_HEAD(&rpc_list); + + spin_lock(&exp->exp_ldlm_data.led_lock); + list_for_each_entry_safe(lock, next, locklist, l_export_chain) { + lock_res_and_lock(lock); + if (lock->l_req_mode != lock->l_granted_mode) { + unlock_res_and_lock(lock); + continue; + } + + LASSERT(lock->l_resource); + if (lock->l_resource->lr_type != LDLM_IBITS && + lock->l_resource->lr_type != LDLM_PLAIN) { + unlock_res_and_lock(lock); + continue; + } + + if (lock->l_flags & LDLM_FL_AST_SENT) { + unlock_res_and_lock(lock); + continue; + } + + LASSERT(lock->l_blocking_ast); + LASSERT(!lock->l_blocking_lock); + + lock->l_flags |= LDLM_FL_AST_SENT; + unlock_res_and_lock(lock); + + list_move(&lock->l_export_chain, &rpc_list); + } + spin_unlock(&exp->exp_ldlm_data.led_lock); + + while (!list_empty(&rpc_list)) { + lock = list_entry(rpc_list.next, struct ldlm_lock, + l_export_chain); + list_del_init(&lock->l_export_chain); + + /* the desc just pretend to exclusive */ + ldlm_lock2desc(lock, &desc); + desc.l_req_mode = LCK_EX; + desc.l_granted_mode = 0; + + lock->l_blocking_ast(lock, &desc, NULL, LDLM_CB_BLOCKING); + } + EXIT; +} + #ifdef __KERNEL__ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp) { @@ -1853,6 +1908,7 @@ EXPORT_SYMBOL(ldlm_del_waiting_lock); EXPORT_SYMBOL(ldlm_get_ref); EXPORT_SYMBOL(ldlm_put_ref); EXPORT_SYMBOL(ldlm_refresh_waiting_lock); +EXPORT_SYMBOL(ldlm_revoke_export_locks); /* ldlm_resource.c */ EXPORT_SYMBOL(ldlm_namespace_new); diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index d67f148..2a671f3 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -1,5 +1,5 @@ MODULES := lustre -lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o +lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o remote_perm.o ifeq ($(PATCHLEVEL),4) lustre-objs += rw24.o super.o diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 2e86f72..81f0dc7 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -1100,7 +1100,22 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, } case LL_IOC_FLUSHCTX: RETURN(ll_flush_ctx(inode)); + case LL_IOC_GETFACL: { + struct rmtacl_ioctl_data ioc; + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_getfacl(inode, &ioc)); + } + case LL_IOC_SETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_setfacl(inode, &ioc)); + } default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 9af7595..0f55396 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2139,6 +2139,22 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, */ case LL_IOC_FLUSHCTX: RETURN(ll_flush_ctx(inode)); + case LL_IOC_GETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_getfacl(inode, &ioc)); + } + case LL_IOC_SETFACL: { + struct rmtacl_ioctl_data ioc; + + if (copy_from_user(&ioc, (void *)arg, sizeof(ioc))) + RETURN(-EFAULT); + + RETURN(ll_ioctl_setfacl(inode, &ioc)); + } default: RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, (void *)arg)); @@ -2550,6 +2566,8 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) { CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n", inode->i_ino, inode->i_generation, inode, mask); + if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) + return lustre_check_remote_perm(inode, mask); return generic_permission(inode, mask, lustre_check_acl); } #else @@ -2565,6 +2583,9 @@ int ll_inode_permission(struct inode *inode, int mask) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n", inode->i_ino, inode->i_generation, inode, mask); + if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) + return lustre_check_remote_perm(inode, mask); + if ((mask & MAY_WRITE) && IS_RDONLY(inode) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 54c5c07..8eff9fa 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -57,6 +57,21 @@ extern struct file_operations ll_pgcache_seq_fops; #define LLI_INODE_MAGIC 0x111d0de5 #define LLI_INODE_DEAD 0xdeadd00d +/* remote client permission cache */ +#define REMOTE_PERM_HASHSIZE 16 + +/* llite setxid/access permission for user on remote client */ +struct ll_remote_perm { + struct hlist_node lrp_list; + uid_t lrp_uid; + gid_t lrp_gid; + uid_t lrp_fsuid; + gid_t lrp_fsgid; + int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this + is access permission with + lrp_fsuid/lrp_fsgid. */ +}; + enum lli_flags { /* MDS has an authority for the Size-on-MDS attributes. */ LLIF_MDS_SIZE_LOCK = (1 << 0), @@ -96,6 +111,10 @@ struct ll_inode_info { struct posix_acl *lli_posix_acl; + /* remote permission hash */ + struct hlist_head *lli_remote_perms; + struct semaphore lli_rmtperm_sem; + struct list_head lli_dead_list; struct semaphore lli_och_sem; /* Protects access to och pointers @@ -205,6 +224,7 @@ struct ll_rw_process_info { #define LL_SBI_USER_XATTR 0x08 /* support user xattr */ #define LL_SBI_ACL 0x10 /* support ACL */ #define LL_SBI_JOIN 0x20 /* support JOIN */ +#define LL_SBI_RMT_CLIENT 0x40 /* remote client */ struct ll_sb_info { struct list_head ll_list; @@ -547,6 +567,8 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_process_config(struct lustre_cfg *lcfg); +int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); +int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); /* llite/llite_nfs.c */ extern struct export_operations lustre_export_operations; @@ -685,6 +707,15 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); +/* llite/remote_perm.c */ +extern kmem_cache_t *ll_remote_perm_cachep; +extern kmem_cache_t *ll_rmtperm_hash_cachep; + +struct hlist_head *alloc_rmtperm_hash(void); +void free_rmtperm_hash(struct hlist_head *hash); +int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm); +int lustre_check_remote_perm(struct inode *inode, int mask); + /* llite/llite_fid.c*/ int ll_fid_md_init(struct ll_sb_info *sbi); int ll_fid_dt_init(struct ll_sb_info *sbi); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 87b7b42..bd24410 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -149,7 +149,8 @@ static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) } static int client_common_fill_super(struct super_block *sb, - char *md, char *dt) + char *md, char *dt, + uid_t nllu, gid_t nllg) { struct inode *root = 0; struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -200,6 +201,13 @@ static int client_common_fill_super(struct super_block *sb, /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; + data->ocd_nllu = nllu; + data->ocd_nllg = nllg; + } else { + data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; + } err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { @@ -238,12 +246,23 @@ static int client_common_fill_super(struct super_block *sb, sb->s_flags |= MS_POSIXACL; #endif sbi->ll_flags |= LL_SBI_ACL; - } else + } else { sbi->ll_flags &= ~LL_SBI_ACL; + } if (data->ocd_connect_flags & OBD_CONNECT_JOIN) sbi->ll_flags |= LL_SBI_JOIN; + if ((sbi->ll_flags & LL_SBI_RMT_CLIENT) && + !(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) { + /* sometimes local client claims to be remote, but mds + * will disagree when client gss not applied. */ + LCONSOLE_INFO("client claims to be remote, but server " + "rejected, forced to be local\n"); + sbi->ll_flags &= ~OBD_CONNECT_RMT_CLIENT; + sbi->ll_flags |= OBD_CONNECT_LCL_CLIENT; + } + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) /* We set sb->s_dev equal on all lustre clients in order to support * NFS export clustering. NFSD requires that the FSID be the same @@ -687,6 +706,11 @@ static int ll_options(char *options, int *flags) if (tmp) { goto next; } + tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); + if (tmp) { + *flags |= tmp; + goto next; + } LCONSOLE_ERROR("Unknown option '%s', won't mount.\n", s1); RETURN(-EINVAL); @@ -718,6 +742,7 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0; lli->lli_open_fd_exec_count = 0; INIT_LIST_HEAD(&lli->lli_dead_list); + sema_init(&lli->lli_rmtperm_sem, 1); } /* COMPAT_146 */ @@ -991,7 +1016,9 @@ int ll_fill_super(struct super_block *sb) sprintf(md, "%s-%s", lprof->lp_md, ll_instance); /* connections, registrations, sb setup */ - err = client_common_fill_super(sb, md, dt); + err = client_common_fill_super(sb, md, dt, + lsi->lsi_lmd->lmd_nllu, + lsi->lsi_lmd->lmd_nllg); out_free: if (md) @@ -1167,10 +1194,17 @@ void ll_clear_inode(struct inode *inode) #ifdef CONFIG_FS_POSIX_ACL if (lli->lli_posix_acl) { LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); +// LASSERT(lli->lli_remote_perms == NULL); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; } #endif + if (lli->lli_remote_perms) { + LASSERT(sbi->ll_flags & LL_SBI_RMT_CLIENT); + LASSERT(lli->lli_posix_acl == NULL); + free_rmtperm_hash(lli->lli_remote_perms); + lli->lli_remote_perms = NULL; + } lli->lli_inode_magic = LLI_INODE_DEAD; @@ -1670,6 +1704,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) spin_unlock(&lli->lli_lock); } #endif + if (body->valid & OBD_MD_FLRMTPERM) + ll_update_remote_perm(inode, md->remote_perm); if (body->valid & OBD_MD_FLATIME && body->atime > LTIME_S(inode->i_atime)) @@ -2149,3 +2185,82 @@ int ll_process_config(struct lustre_cfg *lcfg) return(rc); } +int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) +{ + struct ptlrpc_request *req = NULL; + struct mds_body *body; + char *cmd, *buf; + int rc, buflen; + ENTRY; + + LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len); + + OBD_ALLOC(cmd, ioc->cmd_len); + if (!cmd) + RETURN(-ENOMEM); + if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) + GOTO(out, rc = -EFAULT); + + rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), + OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, + ioc->cmd_len, ioc->res_len, 0, &req); + if (rc < 0) { + CERROR("mdc_getxattr %s [%s] failed: %d\n", + XATTR_NAME_LUSTRE_ACL, cmd, rc); + GOTO(out, rc); + } + + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); + LASSERT(body); + + buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF); + LASSERT(buflen <= ioc->res_len); + buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF + 1, ioc->res_len); + LASSERT(buf); + if (copy_to_user(ioc->res, buf, buflen)) + GOTO(out, rc = -EFAULT); + EXIT; +out: + if (req) + ptlrpc_req_finished(req); + OBD_FREE(cmd, ioc->cmd_len); + return rc; +} + +int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) +{ + struct ptlrpc_request *req = NULL; + char *cmd, *buf; + int buflen, rc; + ENTRY; + + LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len); + + OBD_ALLOC(cmd, ioc->cmd_len); + if (!cmd) + RETURN(-ENOMEM); + if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) + GOTO(out, rc = -EFAULT); + + rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), + OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, + ioc->cmd_len, ioc->res_len, 0, &req); + if (rc) { + CERROR("mdc_setxattr %s [%s] failed: %d\n", + XATTR_NAME_LUSTRE_ACL, cmd, rc); + GOTO(out, rc); + } + + buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF); + LASSERT(buflen <= ioc->res_len); + buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF, ioc->res_len); + LASSERT(buf); + if (copy_to_user(ioc->res, buf, buflen)) + GOTO(out, rc = -EFAULT); + EXIT; +out: + if (req) + ptlrpc_req_finished(req); + OBD_FREE(cmd, ioc->cmd_len); + return rc; +} diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 12f06f5..d9f4c15 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -315,6 +315,9 @@ void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, op_data->namelen = namelen; op_data->create_mode = mode; op_data->mod_time = CURRENT_SECONDS; + op_data->fsuid = current->fsuid; + op_data->fsgid = current->fsgid; + op_data->cap = current->cap_effective; } static void ll_d_add(struct dentry *de, struct inode *inode) diff --git a/lustre/llite/remote_perm.c b/lustre/llite/remote_perm.c new file mode 100644 index 0000000..6eaf01e --- /dev/null +++ b/lustre/llite/remote_perm.c @@ -0,0 +1,285 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Permission Cache for Remote Client + * Author: Lai Siyao + * Author: Fan Yong + * + * Copyright (c) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "llite_internal.h" + +kmem_cache_t *ll_remote_perm_cachep = NULL; +kmem_cache_t *ll_rmtperm_hash_cachep = NULL; + +static inline struct ll_remote_perm *alloc_ll_remote_perm(void) +{ + struct ll_remote_perm *lrp; + + OBD_SLAB_ALLOC(lrp, ll_remote_perm_cachep, SLAB_KERNEL, sizeof(*lrp)); + if (lrp) + INIT_HLIST_NODE(&lrp->lrp_list); + return lrp; +} + +static inline void free_ll_remote_perm(struct ll_remote_perm *lrp) +{ + if (!hlist_unhashed(&lrp->lrp_list)) + hlist_del(&lrp->lrp_list); + OBD_SLAB_FREE(lrp, ll_remote_perm_cachep, sizeof(*lrp)); +} + +struct hlist_head *alloc_rmtperm_hash(void) +{ + struct hlist_head *hash; + int i; + + OBD_SLAB_ALLOC(hash, ll_rmtperm_hash_cachep, SLAB_KERNEL, + REMOTE_PERM_HASHSIZE * sizeof(*hash)); + + if (!hash) + return NULL; + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) + INIT_HLIST_HEAD(hash + i); + + return hash; +} + +void free_rmtperm_hash(struct hlist_head *hash) +{ + int i; + struct ll_remote_perm *lrp; + struct hlist_node *node, *next; + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) + hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list) + free_ll_remote_perm(lrp); + OBD_SLAB_FREE(hash, ll_rmtperm_hash_cachep, + REMOTE_PERM_HASHSIZE * sizeof(*hash)); +} + +static inline int remote_perm_hashfunc(uid_t uid) +{ + return uid & (REMOTE_PERM_HASHSIZE - 1); +} + +/* NB: setxid permission is not checked here, instead it's done on + * MDS when client get remote permission. (lookup/mdc_get_remote_perm). */ +static int do_check_remote_perm(struct ll_inode_info *lli, int mask) +{ + struct hlist_head *head; + struct ll_remote_perm *lrp; + struct hlist_node *node; + int found = 0, rc; + ENTRY; + + if (!lli->lli_remote_perms) + RETURN(-ENOENT); + + head = lli->lli_remote_perms + remote_perm_hashfunc(current->uid); + + spin_lock(&lli->lli_lock); + hlist_for_each_entry(lrp, node, head, lrp_list) { + if (lrp->lrp_uid != current->uid) + continue; + if (lrp->lrp_gid != current->gid) + continue; + if (lrp->lrp_fsuid != current->fsuid) + continue; + if (lrp->lrp_fsgid != current->fsgid) + continue; + found = 1; + break; + } + + if (!found) + GOTO(out, rc = -ENOENT); + + CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n", + lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, + lrp->lrp_access_perm); + rc = (lrp->lrp_access_perm & mask) == mask ? 0 : -EACCES; + GOTO(out, rc); +out: + spin_unlock(&lli->lli_lock); + return rc; +} + +int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_remote_perm *lrp, *tmp = NULL; + struct hlist_head *head, *perm_hash = NULL; + struct hlist_node *node; + ENTRY; + + LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT); + + if (perm->rp_uid != current->uid || + perm->rp_gid != current->gid || + perm->rp_fsuid != current->fsuid || + perm->rp_fsgid != current->fsgid) { + /* user might setxid in this small period */ + CDEBUG(D_SEC, + "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n", + perm->rp_uid, perm->rp_gid, perm->rp_fsuid, + perm->rp_fsgid, current->uid, current->gid, + current->fsuid, current->fsgid); + RETURN(-EAGAIN); + } + + if (!lli->lli_remote_perms) { + perm_hash = alloc_rmtperm_hash(); + if (perm_hash == NULL) { + CERROR("alloc lli_remote_perms failed!\n"); + RETURN(-ENOMEM); + } + } + + lrp = alloc_ll_remote_perm(); + if (!lrp) { + CERROR("alloc memory for ll_remote_perm failed!\n"); + RETURN(-ENOMEM); + } + + spin_lock(&lli->lli_lock); + + if (!lli->lli_remote_perms) + lli->lli_remote_perms = perm_hash; + else if (perm_hash) + free_rmtperm_hash(perm_hash); + + head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid); + hlist_for_each_entry(tmp, node, head, lrp_list) { + if (tmp->lrp_uid != current->uid) + continue; + if (tmp->lrp_gid != current->gid) + continue; + if (tmp->lrp_fsuid != current->fsuid) + continue; + if (tmp->lrp_fsgid != current->fsgid) + continue; + free_ll_remote_perm(lrp); + lrp = tmp; + break; + } + + lrp->lrp_uid = perm->rp_uid; + lrp->lrp_gid = perm->rp_gid; + lrp->lrp_fsuid = perm->rp_fsuid; + lrp->lrp_fsgid = perm->rp_fsgid; + lrp->lrp_access_perm = perm->rp_access_perm; + if (lrp != tmp) + hlist_add_head(&lrp->lrp_list, head); + spin_unlock(&lli->lli_lock); + + CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n", + lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid, + lrp->lrp_access_perm); + + RETURN(0); +} + +int lustre_check_remote_perm(struct inode *inode, int mask) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + struct mdt_remote_perm *perm; + int i = 0, rc; + ENTRY; + +check: + rc = do_check_remote_perm(lli, mask); + if (rc != -ENOENT) + RETURN(rc); + + might_sleep(); + + down(&lli->lli_rmtperm_sem); + /* check again */ + rc = do_check_remote_perm(lli, mask); + if (rc != -ENOENT) { + up(&lli->lli_rmtperm_sem); + RETURN(rc); + } + + if (i++ > 5) { + CERROR("check remote perm falls in dead loop!\n"); + LBUG(); + } + + rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), &req); + if (rc) { + up(&lli->lli_rmtperm_sem); + RETURN(rc); + } + + perm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, sizeof(*perm)); + LASSERT(perm); + LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1); + + rc = ll_update_remote_perm(inode, perm); + up(&lli->lli_rmtperm_sem); + + ptlrpc_req_finished(req); + + if (rc == -ENOMEM) + RETURN(rc); + + goto check; +} + +#if 0 /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock, + * because it will fail sanity test 48. + */ +void ll_free_remote_perms(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct hlist_head *hash = lli->lli_remote_perms; + struct ll_remote_perm *lrp; + struct hlist_node *node, *next; + int i; + + LASSERT(hash); + + spin_lock(&lli->lli_lock); + + for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) { + hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list) + free_ll_remote_perm(lrp); + } + + spin_unlock(&lli->lli_lock); +} +#endif diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 1b1351c..77cc526 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -71,6 +71,31 @@ static int __init init_lustre_lite(void) if (ll_file_data_slab == NULL) return -ENOMEM; + LASSERT(ll_remote_perm_cachep == NULL); + ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm", + sizeof(struct ll_remote_perm), + 0, SLAB_HWCACHE_ALIGN, NULL, + NULL); + if (!ll_remote_perm_cachep) { + kmem_cache_destroy(ll_file_data_slab); + ll_file_data_slab = NULL; + return -ENOMEM; + } + + LASSERT(ll_rmtperm_hash_cachep == NULL); + ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash", + REMOTE_PERM_HASHSIZE * + sizeof(struct list_head), + 0, SLAB_HWCACHE_ALIGN, NULL, + NULL); + if (!ll_rmtperm_hash_cachep) { + kmem_cache_destroy(ll_remote_perm_cachep); + kmem_cache_destroy(ll_file_data_slab); + ll_remote_perm_cachep = NULL; + ll_file_data_slab = NULL; + return -ENOMEM; + } + if (proc_lustre_root) proc_lustre_fs_root = proc_mkdir("llite", proc_lustre_root); @@ -94,6 +119,14 @@ static void __exit exit_lustre_lite(void) ll_unregister_cache(&ll_cache_definition); + rc = kmem_cache_destroy(ll_rmtperm_hash_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_rmtperm_hash_cachep\n"); + ll_rmtperm_hash_cachep = NULL; + + rc = kmem_cache_destroy(ll_remote_perm_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_remote_perm_cachep\n"); + ll_remote_perm_cachep = NULL; + rc = kmem_cache_destroy(ll_file_data_slab); LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); if (ll_async_page_slab) { diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 2dd87ad..1e6052c 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -116,6 +116,29 @@ static int __init init_lustre_lite(void) return -ENOMEM; } + LASSERT(ll_remote_perm_cachep == NULL); + ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache", + sizeof(struct ll_remote_perm), + 0, 0, NULL, NULL); + if (!ll_remote_perm_cachep) { + kmem_cache_destroy(ll_file_data_slab); + ll_file_data_slab = NULL; + return -ENOMEM; + } + + LASSERT(ll_rmtperm_hash_cachep == NULL); + ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache", + REMOTE_PERM_HASHSIZE * + sizeof(struct list_head), + 0, 0, NULL, NULL); + if (!ll_rmtperm_hash_cachep) { + kmem_cache_destroy(ll_remote_perm_cachep); + kmem_cache_destroy(ll_file_data_slab); + ll_remote_perm_cachep = NULL; + ll_file_data_slab = NULL; + return -ENOMEM; + } + proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL; @@ -141,6 +164,14 @@ static void __exit exit_lustre_lite(void) ll_destroy_inodecache(); + rc = kmem_cache_destroy(ll_rmtperm_hash_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_rmtperm_hash_cachep\n"); + ll_rmtperm_hash_cachep = NULL; + + rc = kmem_cache_destroy(ll_remote_perm_cachep); + LASSERTF(rc == 0, "couldn't destroy ll_remote_perm_cachep\n"); + ll_remote_perm_cachep = NULL; + rc = kmem_cache_destroy(ll_file_data_slab); LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); if (ll_async_page_slab) { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index b103c42..83ed667 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1677,6 +1677,7 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, op_data->fsuid = current->fsuid; op_data->fsgid = current->fsgid; + op_data->cap = current->cap_effective; rc = md_link(lmv->tgts[mds].ltd_exp, op_data, request); RETURN(rc); @@ -1776,6 +1777,7 @@ request: } op_data->fsuid = current->fsuid; op_data->fsgid = current->fsgid; + op_data->cap = current->cap_effective; rc = md_rename(lmv->tgts[mds].ltd_exp, op_data, old, oldlen, new, newlen, request); RETURN(rc); @@ -2121,6 +2123,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, RETURN(PTR_ERR(tgt_exp)); op_data->fsuid = current->fsuid; op_data->fsgid = current->fsgid; + op_data->cap = current->cap_effective; rc = md_unlink(tgt_exp, op_data, request); RETURN(rc); } @@ -2452,8 +2455,9 @@ int lmv_set_open_replay_data(struct obd_export *exp, struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct obd_export *tgt_exp; + ENTRY; - + tgt_exp = lmv_get_export(lmv, och->och_fid); if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); @@ -2470,10 +2474,31 @@ int lmv_clear_open_replay_data(struct obd_export *exp, ENTRY; tgt_exp = lmv_get_export(lmv, och->och_fid); + + RETURN(md_clear_open_replay_data(tgt_exp, och)); +} + +static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_get_export(lmv, fid); if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - RETURN(md_clear_open_replay_data(tgt_exp, och)); + rc = md_get_remote_perm(tgt_exp, fid, request); + + RETURN(rc); } struct obd_ops lmv_obd_ops = { @@ -2524,7 +2549,8 @@ struct md_ops lmv_md_ops = { .m_get_lustre_md = lmv_get_lustre_md, .m_free_lustre_md = lmv_free_lustre_md, .m_set_open_replay_data = lmv_set_open_replay_data, - .m_clear_open_replay_data = lmv_clear_open_replay_data + .m_clear_open_replay_data = lmv_clear_open_replay_data, + .m_get_remote_perm = lmv_get_remote_perm }; int __init lmv_init(void) diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index f89cc99..8ae1790 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -66,10 +66,8 @@ int obd_memmax; #endif static void push_group_info(struct lvfs_run_ctxt *save, - struct upcall_cache_entry *uce) + struct group_info *ginfo) { - struct group_info *ginfo = uce ? uce->ue_group_info : NULL; - if (!ginfo) { save->ngroups = current_ngroups; current_ngroups = 0; @@ -97,10 +95,8 @@ static void push_group_info(struct lvfs_run_ctxt *save, } static void pop_group_info(struct lvfs_run_ctxt *save, - struct upcall_cache_entry *uce) + struct group_info *ginfo) { - struct group_info *ginfo = uce ? uce->ue_group_info : NULL; - if (!ginfo) { current_ngroups = save->ngroups; } else { @@ -142,6 +138,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); save->luc.luc_umask = current->fs->umask; + save->ngroups = current->group_info->ngroups; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -149,14 +146,22 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { + save->luc.luc_uid = current->uid; + save->luc.luc_gid = current->gid; save->luc.luc_fsuid = current->fsuid; save->luc.luc_fsgid = current->fsgid; save->luc.luc_cap = current->cap_effective; + current->uid = uc->luc_uid; + current->gid = uc->luc_gid; current->fsuid = uc->luc_fsuid; current->fsgid = uc->luc_fsgid; current->cap_effective = uc->luc_cap; - push_group_info(save, uc->luc_uce); + + push_group_info(save, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); @@ -206,10 +211,15 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, mntput(saved->pwdmnt); current->fs->umask = saved->luc.luc_umask; if (uc) { + current->uid = saved->luc.luc_uid; + current->gid = saved->luc.luc_gid; current->fsuid = saved->luc.luc_fsuid; current->fsgid = saved->luc.luc_fsgid; current->cap_effective = saved->luc.luc_cap; - pop_group_info(saved, uc->luc_uce); + pop_group_info(saved, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } /* diff --git a/lustre/lvfs/upcall_cache.c b/lustre/lvfs/upcall_cache.c index d2f5b0c..9c94c77 100644 --- a/lustre/lvfs/upcall_cache.c +++ b/lustre/lvfs/upcall_cache.c @@ -74,11 +74,12 @@ void groups_free(struct group_info *ginfo) } #endif -static struct upcall_cache_entry *alloc_entry(__u64 key) +static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache, + __u64 key, void *args) { struct upcall_cache_entry *entry; - OBD_ALLOC(entry, sizeof(*entry)); + OBD_ALLOC_PTR(entry); if (!entry) return NULL; @@ -87,34 +88,66 @@ static struct upcall_cache_entry *alloc_entry(__u64 key) entry->ue_key = key; atomic_set(&entry->ue_refcount, 0); init_waitqueue_head(&entry->ue_waitq); + if (cache->uc_ops->init_entry) + cache->uc_ops->init_entry(entry, args); return entry; } -/* protected by hash lock */ -static void free_entry(struct upcall_cache_entry *entry) +/* protected by cache lock */ +static void free_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { - if (entry->ue_group_info) - groups_free(entry->ue_group_info); + if (cache->uc_ops->free_entry) + cache->uc_ops->free_entry(cache, entry); + list_del(&entry->ue_hash); CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n", entry, entry->ue_key); - OBD_FREE(entry, sizeof(*entry)); + OBD_FREE_PTR(entry); +} + +static inline int upcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + if (entry->ue_key != key) + return -1; + + if (cache->uc_ops->upcall_compare) + return cache->uc_ops->upcall_compare(cache, entry, key, args); + + return 0; +} + +static inline int downcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + if (entry->ue_key != key) + return -1; + + if (cache->uc_ops->downcall_compare) + return cache->uc_ops->downcall_compare(cache, entry, key, args); + + return 0; } -static void get_entry(struct upcall_cache_entry *entry) +static inline void get_entry(struct upcall_cache_entry *entry) { atomic_inc(&entry->ue_refcount); } -static void put_entry(struct upcall_cache_entry *entry) +static inline void put_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { if (atomic_dec_and_test(&entry->ue_refcount) && (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) { - free_entry(entry); + free_entry(cache, entry); } } -static int check_unlink_entry(struct upcall_cache_entry *entry) +static int check_unlink_entry(struct upcall_cache *cache, + struct upcall_cache_entry *entry) { if (UC_CACHE_IS_VALID(entry) && time_before(jiffies, entry->ue_expire)) @@ -132,86 +165,19 @@ static int check_unlink_entry(struct upcall_cache_entry *entry) list_del_init(&entry->ue_hash); if (!atomic_read(&entry->ue_refcount)) - free_entry(entry); + free_entry(cache, entry); return 1; } -static int refresh_entry(struct upcall_cache *hash, +static inline int refresh_entry(struct upcall_cache *cache, struct upcall_cache_entry *entry) { - char *argv[4]; - char *envp[3]; - char keystr[16]; - int rc; - ENTRY; - - snprintf(keystr, 16, LPU64, entry->ue_key); - - CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall); - argv[0] = hash->uc_upcall; - argv[1] = hash->uc_name; - argv[2] = keystr; - argv[3] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/usr/sbin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; " - "check /proc/fs/lustre/mds/%s/group_upcall\n", - hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]); - } else { - CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name, - argv[0], argv[1], argv[2]); - rc = 0; - } - RETURN(rc); + LASSERT(cache->uc_ops->do_upcall); + return cache->uc_ops->do_upcall(cache, entry); } -static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary, - __u32 ngroups, __u32 *groups) -{ - struct group_info *ginfo; - int i, j; - ENTRY; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) - if (ngroups > NGROUPS) - ngroups = NGROUPS; -#endif - - if (ngroups > NGROUPS_MAX) { - CERROR("using first %d supplementary groups for uid "LPU64"\n", - NGROUPS_MAX, entry->ue_key); - ngroups = NGROUPS_MAX; - } - - ginfo = groups_alloc(ngroups); - if (!ginfo) { - CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n", - entry->ue_key, ngroups); - RETURN(-ENOMEM); - } - entry->ue_group_info = ginfo; - entry->ue_primary = primary; - - for (i = 0; i < ginfo->nblocks; i++) { - int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups); - int off = i * NGROUPS_PER_BLOCK; - - for (j = 0; j < cp_count; j++) - ginfo->blocks[i][j] = groups[off + j]; - - ngroups -= cp_count; - } - RETURN(0); -} - -struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, - __u64 key, __u32 primary, - __u32 ngroups, __u32 *groups) +struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, + __u64 key, void *args) { struct upcall_cache_entry *entry = NULL, *new = NULL, *next; struct list_head *head; @@ -219,49 +185,17 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, int rc, found; ENTRY; - LASSERT(hash); + LASSERT(cache); - if (strcmp(hash->uc_upcall, "NONE") == 0) { - new = alloc_entry(key); - if (!new) { - CERROR("fail to alloc entry\n"); - RETURN(NULL); - } - get_entry(new); - - /* We have to sort the groups for 2.6 kernels */ - LASSERT(ngroups <= 2); - if (ngroups == 2 && groups[1] == -1) - ngroups--; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - /* 2.6 needs groups array sorted */ - if (ngroups == 2 && groups[0] > groups[1]) { - __u32 tmp = groups[1]; - groups[1] = groups[0]; - groups[0] = tmp; - } -#endif - if (ngroups > 0 && groups[0] == -1) { - groups[0] = groups[1]; - ngroups--; - } - - rc = entry_set_group_info(new, primary, ngroups, groups); - - /* We can't cache this entry as it only has a subset of - * the user's groups, as sent in suppgid1, suppgid2. */ - UC_CACHE_SET_EXPIRED(new); - RETURN(new); - } - head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; find_again: found = 0; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); list_for_each_entry_safe(entry, next, head, ue_hash) { /* check invalid & expired items */ - if (check_unlink_entry(entry)) + if (check_unlink_entry(cache, entry)) continue; - if (entry->ue_key == key) { + if (upcall_compare(cache, entry, key, args) == 0) { found = 1; break; } @@ -269,8 +203,8 @@ find_again: if (!found) { /* didn't find it */ if (!new) { - spin_unlock(&hash->uc_lock); - new = alloc_entry(key); + spin_unlock(&cache->uc_lock); + new = alloc_entry(cache, key, args); if (!new) { CERROR("fail to alloc entry\n"); RETURN(ERR_PTR(-ENOMEM)); @@ -282,7 +216,7 @@ find_again: } } else { if (new) { - free_entry(new); + free_entry(cache, new); new = NULL; } list_move(&entry->ue_hash, head); @@ -293,10 +227,10 @@ find_again: if (UC_CACHE_IS_NEW(entry)) { UC_CACHE_SET_ACQUIRING(entry); UC_CACHE_CLEAR_NEW(entry); - entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire; - spin_unlock(&hash->uc_lock); - rc = refresh_entry(hash, entry); - spin_lock(&hash->uc_lock); + entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire; + spin_unlock(&cache->uc_lock); + rc = refresh_entry(cache, entry); + spin_lock(&cache->uc_lock); if (rc < 0) { UC_CACHE_CLEAR_ACQUIRING(entry); UC_CACHE_SET_INVALID(entry); @@ -307,32 +241,31 @@ find_again: * this item, just wait it complete */ if (UC_CACHE_IS_ACQUIRING(entry)) { + unsigned long expiry = jiffies + cache->uc_acquire_expire; + init_waitqueue_entry(&wait, current); add_wait_queue(&entry->ue_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); - schedule_timeout(hash->uc_acquire_expire); + schedule_timeout(cache->uc_acquire_expire); - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); remove_wait_queue(&entry->ue_waitq, &wait); if (UC_CACHE_IS_ACQUIRING(entry)) { - static unsigned long next; /* we're interrupted or upcall failed in the middle */ - if (time_after(jiffies, next)) { - CERROR("key "LPU64" update failed: check %s\n", - entry->ue_key, hash->uc_upcall); - next = jiffies + 1800; - } - put_entry(entry); - GOTO(out, entry = ERR_PTR(-EIDRM)); + rc = time_before(jiffies, expiry) ? -EINTR : -ETIMEDOUT; + put_entry(cache, entry); + CERROR("key "LPU64" update failed: rc = %d, check %s\n", + entry->ue_key, rc, cache->uc_upcall); + GOTO(out, entry = ERR_PTR(rc)); } /* fall through */ } /* invalid means error, don't need to try again */ if (UC_CACHE_IS_INVALID(entry)) { - put_entry(entry); + put_entry(cache, entry); GOTO(out, entry = ERR_PTR(-EIDRM)); } @@ -340,15 +273,15 @@ find_again: * We can't refresh the existing one because some * memory might be shared by multiple processes. */ - if (check_unlink_entry(entry)) { + if (check_unlink_entry(cache, entry)) { /* if expired, try again. but if this entry is * created by me but too quickly turn to expired * without any error, should at least give a * chance to use it once. */ if (entry != new) { - put_entry(entry); - spin_unlock(&hash->uc_lock); + put_entry(cache, entry); + spin_unlock(&cache->uc_lock); new = NULL; goto find_again; } @@ -356,12 +289,12 @@ find_again: /* Now we know it's good */ out: - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); RETURN(entry); } EXPORT_SYMBOL(upcall_cache_get_entry); -void upcall_cache_put_entry(struct upcall_cache *hash, +void upcall_cache_put_entry(struct upcall_cache *cache, struct upcall_cache_entry *entry) { ENTRY; @@ -372,28 +305,28 @@ void upcall_cache_put_entry(struct upcall_cache *hash, } LASSERT(atomic_read(&entry->ue_refcount) > 0); - spin_lock(&hash->uc_lock); - put_entry(entry); - spin_unlock(&hash->uc_lock); + spin_lock(&cache->uc_lock); + put_entry(cache, entry); + spin_unlock(&cache->uc_lock); EXIT; } EXPORT_SYMBOL(upcall_cache_put_entry); -int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key, - __u32 primary, __u32 ngroups, __u32 *groups) +int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, + void *args) { struct upcall_cache_entry *entry = NULL; struct list_head *head; int found = 0, rc = 0; ENTRY; - LASSERT(hash); + LASSERT(cache); - head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); list_for_each_entry(entry, head, ue_hash) { - if (entry->ue_key == key) { + if (downcall_compare(cache, entry, key, args) == 0) { found = 1; get_entry(entry); break; @@ -402,73 +335,74 @@ int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key, if (!found) { CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n", - hash->uc_name, entry->ue_key); + cache->uc_name, key); /* haven't found, it's possible */ - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); RETURN(-EINVAL); } if (err) { CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n", - hash->uc_name, entry->ue_key, err); + cache->uc_name, entry->ue_key, err); GOTO(out, rc = -EINVAL); } if (!UC_CACHE_IS_ACQUIRING(entry)) { - CWARN("%s: found uptodate entry %p (key "LPU64") in ioctl\n", - hash->uc_name, entry, entry->ue_key); + CERROR("%s: found uptodate entry %p (key "LPU64") in ioctl\n", + cache->uc_name, entry, entry->ue_key); GOTO(out, rc = 0); } if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) { CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n", - hash->uc_name, entry, entry->ue_key); + cache->uc_name, entry, entry->ue_key); GOTO(out, rc = -EINVAL); } - spin_unlock(&hash->uc_lock); - rc = entry_set_group_info(entry, primary, ngroups, groups); - spin_lock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); + if (cache->uc_ops->parse_downcall) + rc = cache->uc_ops->parse_downcall(cache, entry, args); + spin_lock(&cache->uc_lock); if (rc) GOTO(out, rc); - entry->ue_expire = jiffies + hash->uc_entry_expire; + entry->ue_expire = jiffies + cache->uc_entry_expire; UC_CACHE_SET_VALID(entry); CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n", - hash->uc_name, entry, entry->ue_key); + cache->uc_name, entry, entry->ue_key); out: if (rc) { UC_CACHE_SET_INVALID(entry); list_del_init(&entry->ue_hash); } UC_CACHE_CLEAR_ACQUIRING(entry); - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); wake_up_all(&entry->ue_waitq); - put_entry(entry); + put_entry(cache, entry); RETURN(rc); } EXPORT_SYMBOL(upcall_cache_downcall); -static void cache_flush(struct upcall_cache *hash, int force) +static void cache_flush(struct upcall_cache *cache, int force) { struct upcall_cache_entry *entry, *next; int i; ENTRY; - spin_lock(&hash->uc_lock); + spin_lock(&cache->uc_lock); for (i = 0; i < UC_CACHE_HASH_SIZE; i++) { list_for_each_entry_safe(entry, next, - &hash->uc_hashtable[i], ue_hash) { + &cache->uc_hashtable[i], ue_hash) { if (!force && atomic_read(&entry->ue_refcount)) { UC_CACHE_SET_EXPIRED(entry); continue; } LASSERT(!atomic_read(&entry->ue_refcount)); - free_entry(entry); + free_entry(cache, entry); } } - spin_unlock(&hash->uc_lock); + spin_unlock(&cache->uc_lock); EXIT; } @@ -484,34 +418,68 @@ void upcall_cache_flush_all(struct upcall_cache *cache) } EXPORT_SYMBOL(upcall_cache_flush_all); -struct upcall_cache *upcall_cache_init(const char *name) +void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args) +{ + struct list_head *head; + struct upcall_cache_entry *entry; + int found = 0; + ENTRY; + + head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)]; + + spin_lock(&cache->uc_lock); + list_for_each_entry(entry, head, ue_hash) { + if (upcall_compare(cache, entry, key, args) == 0) { + found = 1; + break; + } + } + + if (found) { + CWARN("%s: flush entry %p: key "LPU64", ref %d, fl %x, " + "cur %lu, ex %ld/%ld\n", + cache->uc_name, entry, entry->ue_key, + atomic_read(&entry->ue_refcount), entry->ue_flags, + get_seconds(), entry->ue_acquire_expire, + entry->ue_expire); + UC_CACHE_SET_EXPIRED(entry); + if (!atomic_read(&entry->ue_refcount)) + free_entry(cache, entry); + } + spin_unlock(&cache->uc_lock); +} +EXPORT_SYMBOL(upcall_cache_flush_one); + +struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, + struct upcall_cache_ops *ops) { - struct upcall_cache *hash; + struct upcall_cache *cache; int i; ENTRY; - OBD_ALLOC(hash, sizeof(*hash)); - if (!hash) + OBD_ALLOC(cache, sizeof(*cache)); + if (!cache) RETURN(ERR_PTR(-ENOMEM)); - spin_lock_init(&hash->uc_lock); + spin_lock_init(&cache->uc_lock); for (i = 0; i < UC_CACHE_HASH_SIZE; i++) - INIT_LIST_HEAD(&hash->uc_hashtable[i]); - strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1); - /* set default value, proc tunable */ - strcpy(hash->uc_upcall, "NONE"); - hash->uc_entry_expire = 5 * 60 * HZ; - hash->uc_acquire_expire = 5 * HZ; - - RETURN(hash); + INIT_LIST_HEAD(&cache->uc_hashtable[i]); + strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1); + /* upcall pathname proc tunable */ + strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1); + cache->uc_entry_expire = 5 * 60 * HZ; + cache->uc_acquire_expire = 5 * HZ; + cache->uc_ops = ops; + + RETURN(cache); } EXPORT_SYMBOL(upcall_cache_init); -void upcall_cache_cleanup(struct upcall_cache *hash) +void upcall_cache_cleanup(struct upcall_cache *cache) { - if (!hash) + if (!cache) return; - upcall_cache_flush_all(hash); - OBD_FREE(hash, sizeof(*hash)); + upcall_cache_flush_all(cache); + OBD_FREE(cache, sizeof(*cache)); } EXPORT_SYMBOL(upcall_cache_cleanup); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index fc33ca9..311f5df 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -101,6 +101,12 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, EXIT; } +static inline int client_is_remote(struct obd_export *exp) +{ + return class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_RMT_CLIENT ? 1 : 0; +} + /* Quota stuff */ extern quota_interface_t *quota_interface; diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index ad24dbd..6b688eb 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -366,7 +366,10 @@ int mdc_enqueue(struct obd_export *exp, it->it_create_mode, 0, it->it_flags, lmm, lmmsize); - repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE; + /* for remote client, fetch remote perm for current user */ + repsize[repbufcnt++] = client_is_remote(exp) ? + sizeof(struct mdt_remote_perm) : + LUSTRE_POSIX_ACL_MAX_SIZE; } else if (it->it_op & IT_UNLINK) { size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink); size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1; @@ -387,8 +390,9 @@ int mdc_enqueue(struct obd_export *exp, repsize[repbufcnt++] = obddev->u.cli.cl_max_mds_cookiesize; } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | - OBD_MD_FLACL | OBD_MD_FLMODEASIZE | - OBD_MD_FLDIREA; + OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA; + valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM : + OBD_MD_FLACL; size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body); size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1; @@ -409,7 +413,9 @@ int mdc_enqueue(struct obd_export *exp, mdc_getattr_pack(req, DLM_INTENT_REC_OFF, valid, it->it_flags, op_data); - repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = client_is_remote(exp) ? + sizeof(struct mdt_remote_perm) : + LUSTRE_POSIX_ACL_MAX_SIZE; } else if (it->it_op == IT_READDIR) { policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, @@ -493,8 +499,9 @@ int mdc_enqueue(struct obd_export *exp, LASSERT(repbufcnt == 5 || repbufcnt == 2); if (repbufcnt == 5) { struct mdt_body *body; + int offset = DLM_REPLY_REC_OFF; - body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body), + body = lustre_swab_repbuf(req, offset++, sizeof(*body), lustre_swab_mdt_body); if (body == NULL) { CERROR ("Can't swab mdt_body\n"); @@ -511,8 +518,7 @@ int mdc_enqueue(struct obd_export *exp, if ((body->valid & OBD_MD_FLDIREA) != 0) { if (body->eadatasize) { - eadata = lustre_swab_repbuf(req, - DLM_REPLY_REC_OFF + 1, + eadata = lustre_swab_repbuf(req, offset++, body->eadatasize, NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); @@ -523,7 +529,7 @@ int mdc_enqueue(struct obd_export *exp, if ((body->valid & OBD_MD_FLEASIZE)) { /* The eadata is opaque; just check that it is there. * Eventually, obd_unpackmd() will check the contents */ - eadata = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF + 1, + eadata = lustre_swab_repbuf(req, offset++, body->eadatasize, NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); @@ -550,18 +556,29 @@ int mdc_enqueue(struct obd_export *exp, * large enough request buffer above we need to * reallocate it here to hold the actual LOV EA. */ if (it->it_op & IT_OPEN) { - int offset = DLM_INTENT_REC_OFF + 2; - - if (lustre_msg_buflen(req->rq_reqmsg, offset) < + if (lustre_msg_buflen(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 2) < body->eadatasize) mdc_realloc_openmsg(req, body, size); - lmm = lustre_msg_buf(req->rq_reqmsg, offset, + lmm = lustre_msg_buf(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 2, body->eadatasize); if (lmm) memcpy(lmm, eadata, body->eadatasize); } } + if (body->valid & OBD_MD_FLRMTPERM) { + struct mdt_remote_perm *perm; + + LASSERT(client_is_remote(exp)); + perm = lustre_swab_repbuf(req, offset++, sizeof(*perm), + lustre_swab_mdt_remote_perm); + if (perm == NULL) { + CERROR("missing remote permission!\n"); + RETURN(-EPROTO); + } + } } RETURN(rc); @@ -628,10 +645,10 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, /* As not all attributes are kept under update lock, e.g. owner/group/acls are under lookup lock, we need both ibits for GETATTR. */ - + /* For CMD, UPDATE lock and LOOKUP lock can not be got * at the same for cross-object, so we can not match - * the 2 lock at the same time FIXME: but how to handle + * the 2 lock at the same time FIXME: but how to handle * the above situation */ policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ? MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index ed773ca..2871f15 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -284,7 +284,7 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, struct ptlrpc_request *req; int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; // int size[3] = {sizeof(struct mdt_body)}, bufcnt = 1; - int rc, xattr_namelen = 0, bufcnt = 2, offset; + int rc, xattr_namelen = 0, bufcnt = 2, offset, remote_acl = 0; void *tmp; ENTRY; @@ -310,6 +310,8 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, if (xattr_name) { tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen); memcpy(tmp, xattr_name, xattr_namelen); + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + remote_acl = 1; } if (input_size) { tmp = lustre_msg_buf(req->rq_reqmsg, offset++, input_size); @@ -329,12 +331,15 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, ptlrpc_req_set_repsize(req, bufcnt, size); /* make rpc */ - if (opcode == MDS_SETXATTR) + /* NB: set remote acl doesn't need hold rpc lock, because it just + * send command to MDS, and when it's executed on mountpoint on MDS, + * another mdc_xattr_common() will be invoked there. */ + if (opcode == MDS_SETXATTR && !remote_acl) mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - if (opcode == MDS_SETXATTR) + if (opcode == MDS_SETXATTR && !remote_acl) mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc != 0) @@ -496,6 +501,14 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, GOTO(err_out, rc); offset++; } + + /* remote permission */ + if (md->body->valid & OBD_MD_FLRMTPERM) { + md->remote_perm = lustre_msg_buf(req->rq_repmsg, offset, + sizeof(struct mdt_remote_perm)); + LASSERT(md->remote_perm); + offset++; + } out: RETURN(rc); @@ -1447,6 +1460,47 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) return(rc); } +/* get remote permission for current user on fid */ +int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct ptlrpc_request **request) +{ + struct ptlrpc_request *req; + struct mdt_body *body; + struct mdt_remote_perm *perm; + int size[3] = { sizeof(struct ptlrpc_body), + sizeof(*body), + sizeof(*perm) }; + int rc; + ENTRY; + + *request = NULL; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_GETATTR, 2, size, NULL); + if (!req) + RETURN(-ENOMEM); + + mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, 0, 0); + + ptlrpc_req_set_repsize(req, 3, size); + rc = ptlrpc_queue_wait(req); + if (rc) { + ptlrpc_req_finished(req); + RETURN(rc); + } + + body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), + lustre_swab_mdt_body); + LASSERT(body); + LASSERT(body->valid & OBD_MD_FLRMTPERM); + + perm = lustre_swab_repbuf(req, REPLY_REC_OFF + 1, sizeof(*perm), + lustre_swab_mdt_remote_perm); + LASSERT(perm); + + *request = req; + RETURN(0); +} + struct obd_ops mdc_obd_ops = { .o_owner = THIS_MODULE, .o_setup = mdc_setup, @@ -1497,7 +1551,8 @@ struct md_ops mdc_md_ops = { .m_get_lustre_md = mdc_get_lustre_md, .m_free_lustre_md = mdc_free_lustre_md, .m_set_open_replay_data = mdc_set_open_replay_data, - .m_clear_open_replay_data = mdc_clear_open_replay_data + .m_clear_open_replay_data = mdc_clear_open_replay_data, + .m_get_remote_perm = mdc_get_remote_perm }; extern quota_interface_t mdc_quota_interface; diff --git a/lustre/mdd/mdd_handler.c b/lustre/mdd/mdd_handler.c index 3ff593e..d707dca 100644 --- a/lustre/mdd/mdd_handler.c +++ b/lustre/mdd/mdd_handler.c @@ -54,8 +54,17 @@ static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj, struct thandle *handle); static void __mdd_ref_del(const struct lu_context *ctxt, struct mdd_object *obj, struct thandle *handle); -static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj, - const char *name, struct lu_fid* fid); +static int mdd_lookup_intent(const struct lu_context *ctxt, + struct md_object *pobj, + const char *name, const struct lu_fid* fid, + int mask, struct md_ucred *uc); +static int mdd_exec_permission_lite(const struct lu_context *ctxt, + struct mdd_object *obj, + struct md_ucred *uc); +static int __mdd_permission_internal(const struct lu_context *ctxt, + struct mdd_object *obj, + int mask, int getattr, + struct md_ucred *uc); static struct md_object_operations mdd_obj_ops; static struct md_dir_operations mdd_dir_ops; @@ -68,6 +77,76 @@ static const char dot[] = "."; static const char dotdot[] = ".."; +#define mdd_get_group_info(group_info) do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +#define mdd_put_group_info(group_info) do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +#define MDD_NGROUPS_PER_BLOCK ((int)(CFS_PAGE_SIZE / sizeof(gid_t))) + +#define MDD_GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / MDD_NGROUPS_PER_BLOCK][(i) % MDD_NGROUPS_PER_BLOCK]) + +/* groups_search() is copied from linux kernel! */ +/* a simple bsearch */ +static int mdd_groups_search(struct group_info *group_info, gid_t grp) +{ + int left, right; + + if (!group_info) + return 0; + + left = 0; + right = group_info->ngroups; + while (left < right) { + int mid = (left + right) / 2; + int cmp = grp - MDD_GROUP_AT(group_info, mid); + + if (cmp > 0) + left = mid + 1; + else if (cmp < 0) + right = mid; + else + return 1; + } + return 0; +} + +static int mdd_in_group_p(struct md_ucred *uc, gid_t grp) +{ + int rc = 1; + + if (grp != uc->mu_fsgid) { + struct group_info *group_info = NULL; + + if (uc->mu_ginfo) + group_info = uc->mu_ginfo; +#if 0 + else if (uc->mu_identity) + group_info = uc->mu_identity->mi_ginfo; +#endif + + if (!group_info) + return 0; + + mdd_get_group_info(group_info); + rc = mdd_groups_search(group_info, grp); + mdd_put_group_info(group_info); + } + return rc; +} + +static inline int mdd_permission_internal(const struct lu_context *ctxt, + struct mdd_object *obj, int mask, + struct md_ucred *uc) +{ + return __mdd_permission_internal(ctxt, obj, mask, 1, uc); +} + struct mdd_thread_info *mdd_ctx_info(const struct lu_context *ctx) { struct mdd_thread_info *info; @@ -178,9 +257,12 @@ static inline int mdd_is_dead_obj(struct mdd_object *obj) /*Check whether it may create the cobj under the pobj*/ static int mdd_may_create(const struct lu_context *ctxt, - struct mdd_object *pobj, struct mdd_object *cobj) + struct mdd_object *pobj, struct mdd_object *cobj, + int need_check, struct md_ucred *uc) { + int rc = 0; ENTRY; + if (cobj && lu_object_exists(&cobj->mod_obj.mo_lu)) RETURN(-EEXIST); @@ -188,7 +270,11 @@ static int mdd_may_create(const struct lu_context *ctxt, RETURN(-ENOENT); /*check pobj may create or not*/ - RETURN(0); + if (need_check) + rc = mdd_permission_internal(ctxt, pobj, + MAY_WRITE | MAY_EXEC, uc); + + RETURN(rc); } static inline int __mdd_la_get(const struct lu_context *ctxt, @@ -224,10 +310,54 @@ static int mdd_get_flags(const struct lu_context *ctxt, struct mdd_object *obj) RETURN(rc); } +#define mdd_cap_t(x) (x) + +#define MDD_CAP_TO_MASK(x) (1 << (x)) + +#define mdd_cap_raised(c, flag) (mdd_cap_t(c) & MDD_CAP_TO_MASK(flag)) + +/* capable() is copied from linux kernel! */ +static inline int mdd_capable(struct md_ucred *uc, int cap) +{ + if (mdd_cap_raised(uc->mu_cap, cap)) + return 1; + return 0; +} + +/* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. + */ +static inline int mdd_is_sticky(const struct lu_context *ctxt, + struct mdd_object *pobj, + struct mdd_object *cobj, + struct md_ucred *uc) +{ + struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la; + int rc; + + rc = __mdd_la_get(ctxt, cobj, tmp_la); + if (rc) { + return rc; + } else if (tmp_la->la_uid == uc->mu_fsuid) { + return 0; + } else { + rc = __mdd_la_get(ctxt, pobj, tmp_la); + if (rc) + return rc; + else if (!(tmp_la->la_mode & S_ISVTX)) + return 0; + else if (tmp_la->la_uid == uc->mu_fsuid) + return 0; + else + return !mdd_capable(uc, CAP_FOWNER); + } +} + /*Check whether it may delete the cobj under the pobj*/ static int mdd_may_delete(const struct lu_context *ctxt, struct mdd_object *pobj, struct mdd_object *cobj, - int is_dir) + int is_dir, int need_check, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(&cobj->mod_obj); int rc = 0; @@ -248,14 +378,24 @@ static int mdd_may_delete(const struct lu_context *ctxt, if (lu_fid_eq(mdo2fid(cobj), &mdd->mdd_root_fid)) RETURN(-EBUSY); - } else if (S_ISDIR(mdd_object_type(cobj))) + } else if (S_ISDIR(mdd_object_type(cobj))) { RETURN(-EISDIR); + } - if (pobj && mdd_is_dead_obj(pobj)) - RETURN(-ENOENT); + if (pobj) { + if (mdd_is_dead_obj(pobj)) + RETURN(-ENOENT); + if (mdd_is_sticky(ctxt, pobj, cobj, uc)) + RETURN(-EPERM); + + if (need_check) + rc = mdd_permission_internal(ctxt, pobj, + MAY_WRITE | MAY_EXEC, uc); + } RETURN(rc); } + /* get only inode attributes */ static int __mdd_iattr_get(const struct lu_context *ctxt, struct mdd_object *mdd_obj, struct md_attr *ma) @@ -268,6 +408,7 @@ static int __mdd_iattr_get(const struct lu_context *ctxt, ma->ma_valid = MA_INODE; RETURN(rc); } + /* get lov EA only */ static int __mdd_lmm_get(const struct lu_context *ctxt, struct mdd_object *mdd_obj, struct md_attr *ma) @@ -335,8 +476,11 @@ static inline int mdd_attr_get_internal_locked(const struct lu_context *ctxt, return rc; } -static int mdd_attr_get(const struct lu_context *ctxt, - struct md_object *obj, struct md_attr *ma) +/* + * No permission check is needed. + */ +static int mdd_attr_get(const struct lu_context *ctxt, struct md_object *obj, + struct md_attr *ma, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); int rc; @@ -346,8 +490,12 @@ static int mdd_attr_get(const struct lu_context *ctxt, RETURN(rc); } -static int mdd_xattr_get(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len, const char *name) +/* + * No permission check is needed. + */ +static int mdd_xattr_get(const struct lu_context *ctxt, + struct md_object *obj, void *buf, int buf_len, + const char *name, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct dt_object *next; @@ -365,8 +513,12 @@ static int mdd_xattr_get(const struct lu_context *ctxt, struct md_object *obj, RETURN(rc); } +/* + * Permission check is done when open, + * no need check again. + */ static int mdd_readlink(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len) + void *buf, int buf_len, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct dt_object *next; @@ -380,8 +532,9 @@ static int mdd_readlink(const struct lu_context *ctxt, struct md_object *obj, rc = next->do_body_ops->dbo_read(ctxt, next, buf, buf_len, &pos); RETURN(rc); } + static int mdd_xattr_list(const struct lu_context *ctxt, struct md_object *obj, - void *buf, int buf_len) + void *buf, int buf_len, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct dt_object *next; @@ -742,6 +895,7 @@ static int __mdd_xattr_set(const struct lu_context *ctxt, struct mdd_object *o, } RETURN(rc); } + /* this gives the same functionality as the code between * sys_chmod and inode_setattr * chown_common and inode_setattr @@ -750,64 +904,97 @@ static int __mdd_xattr_set(const struct lu_context *ctxt, struct mdd_object *o, * and port to */ int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj, - const struct md_attr *ma, struct lu_attr *la) + struct lu_attr *la, struct md_ucred *uc) { - struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la; - time_t now = CURRENT_SECONDS; + struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la; + time_t now = CURRENT_SECONDS; int rc; ENTRY; + if (!la->la_valid) + RETURN(0); + + /* Do not permit change file type */ + if (la->la_valid & LA_TYPE) + RETURN(-EPERM); + + /* They should not be processed by setattr */ + if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE)) + RETURN(-EPERM); + rc = __mdd_la_get(ctxt, obj, tmp_la); if (rc) RETURN(rc); - /*XXX Check permission */ + if (mdd_is_immutable(obj) || mdd_is_append(obj)) { - /*If only change flags of the object, we should + /* + * If only change flags of the object, we should * let it pass, but also need capability check * here if (!capable(CAP_LINUX_IMMUTABLE)), - * fix it, when implement capable in mds*/ + * fix it, when implement capable in mds + */ if (la->la_valid & ~LA_FLAGS) RETURN(-EPERM); - /*According to Ext3 implementation on this, the - *Ctime will be changed, but not clear why?*/ + if (!mdd_capable(uc, CAP_LINUX_IMMUTABLE)) + RETURN(-EPERM); + + if ((uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + /* + * According to Ext3 implementation on this, the + * Ctime will be changed, but not clear why? + */ la->la_ctime = now; la->la_valid |= LA_CTIME; RETURN(0); } - if (!(la->la_valid & LA_CTIME)) { - la->la_ctime = now; - la->la_valid |= LA_CTIME; - } -#if 0 - /* times */ - if ((ia_valid & (ATTR_MTIME|ATTR_ATIME)) == (ATTR_MTIME|ATTR_ATIME)) { - if (current->fsuid != inode->i_uid && - (error = ll_permission(inode, MAY_WRITE, NULL)) != 0) - RETURN(error); - } - if (ia_valid & ATTR_SIZE && - /* NFSD hack for open(O_CREAT|O_TRUNC)=mknod+truncate (bug 5781) */ - !(rec->ur_uc.luc_fsuid == inode->i_uid && - ia_valid & MDS_OPEN_OWNEROVERRIDE)) { - if ((error = ll_permission(inode, MAY_WRITE, NULL)) != 0) - RETURN(error); + /* Check for setting the obj time. */ + if (la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) { + if ((uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER) && + !(la->la_valid & (LA_SIZE | LA_BLOCKS))) + RETURN(-EPERM); } -#endif - if (la->la_valid & (LA_UID | LA_GID)) { - /* chown */ - - if (mdd_is_immutable(obj) || mdd_is_append(obj)) + /* Make sure a caller can chmod. */ + if (la->la_valid & LA_MODE) { + /* + * Bypass la_vaild == LA_MODE, + * this is for changing file with SUID or SGID. + */ + if ((la->la_valid & ~LA_MODE) && + (uc->mu_fsuid != tmp_la->la_uid) && + !mdd_capable(uc, CAP_FOWNER)) RETURN(-EPERM); + + if (la->la_mode == (umode_t) -1) + la->la_mode = tmp_la->la_mode; + else + la->la_mode = (la->la_mode & S_IALLUGO) | + (tmp_la->la_mode & ~S_IALLUGO); + + /* Also check the setgid bit! */ + if (!mdd_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid : + tmp_la->la_gid) && !mdd_capable(uc, CAP_FSETID)) + la->la_mode &= ~S_ISGID; + } else { + la->la_mode = tmp_la->la_mode; + } + + /* Make sure a caller can chown. */ + if (la->la_valid & LA_UID) { if (la->la_uid == (uid_t) -1) la->la_uid = tmp_la->la_uid; - if (la->la_gid == (gid_t) -1) - la->la_gid = tmp_la->la_gid; - if (!(la->la_valid & LA_MODE)) - la->la_mode = tmp_la->la_mode; + if (((uc->mu_fsuid != tmp_la->la_uid) || + (la->la_uid != tmp_la->la_uid)) && + !mdd_capable(uc, CAP_CHOWN)) + RETURN(-EPERM); + /* * If the user or group of a non-directory has been * changed by a non-root user, remove the setuid bit. @@ -819,11 +1006,23 @@ int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj, * for 2.2 anyway, as it should have been using * CAP_FSETID rather than fsuid -- 19990830 SD. */ - if ((tmp_la->la_mode & S_ISUID) == S_ISUID && + if (((tmp_la->la_mode & S_ISUID) == S_ISUID) && !S_ISDIR(tmp_la->la_mode)) { la->la_mode &= ~S_ISUID; la->la_valid |= LA_MODE; } + } + + /* Make sure caller can chgrp. */ + if (la->la_valid & LA_GID) { + if (la->la_gid == (gid_t) -1) + la->la_gid = tmp_la->la_gid; + if (((uc->mu_fsuid != tmp_la->la_uid) || + ((la->la_gid != tmp_la->la_gid) && + !mdd_in_group_p(uc, la->la_gid))) && + !mdd_capable(uc, CAP_CHOWN)) + RETURN(-EPERM); + /* * Likewise, if the user or group of a non-directory * has been changed by a non-root user, remove the @@ -839,35 +1038,44 @@ int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj, la->la_mode &= ~S_ISGID; la->la_valid |= LA_MODE; } - } else if (la->la_valid & LA_MODE) { - int mode = la->la_mode; - /* chmod */ - if (la->la_mode == (umode_t)-1) - mode = tmp_la->la_mode; - la->la_mode = - (mode & S_IALLUGO) | (tmp_la->la_mode & ~S_IALLUGO); } - /* For the "Size-on-MDS" setattr update, merge coming attributes with - * the set in the inode. */ - if (la->la_valid & LA_SIZE) { + /* For tuncate (or setsize), we should have MAY_WRITE perm */ + if (la->la_valid & (LA_SIZE | LA_BLOCKS)) { + rc = mdd_permission_internal(ctxt, obj, MAY_WRITE, uc); + if (rc) + RETURN(rc); + + /* + * For the "Size-on-MDS" setattr update, merge coming + * attributes with the set in the inode. BUG 10641 + */ if ((la->la_valid & LA_ATIME) && (la->la_atime < tmp_la->la_atime)) la->la_valid &= ~LA_ATIME; - - if ((la->la_valid & LA_CTIME) && + + if ((la->la_valid & LA_CTIME) && (la->la_ctime < tmp_la->la_ctime)) la->la_valid &= ~(LA_MTIME | LA_CTIME); + + if (!(la->la_valid & LA_MTIME) && (now > tmp_la->la_mtime)) { + la->la_mtime = now; + la->la_valid |= LA_MTIME; + } } - RETURN(rc); -} + /* For last, ctime must be fixed */ + if (!(la->la_valid & LA_CTIME) && (now > tmp_la->la_ctime)) { + la->la_ctime = now; + la->la_valid |= LA_CTIME; + } + RETURN(0); +} /* set attr and LOV EA at once, return updated attr */ -static int mdd_attr_set(const struct lu_context *ctxt, - struct md_object *obj, - const struct md_attr *ma) +static int mdd_attr_set(const struct lu_context *ctxt, struct md_object *obj, + const struct md_attr *ma, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); @@ -903,7 +1111,7 @@ static int mdd_attr_set(const struct lu_context *ctxt, *la_copy = ma->ma_attr; mdd_write_lock(ctxt, mdd_obj); - rc = mdd_fix_attr(ctxt, mdd_obj, ma, la_copy); + rc = mdd_fix_attr(ctxt, mdd_obj, la_copy, uc); mdd_write_unlock(ctxt, mdd_obj); if (rc) GOTO(cleanup, rc); @@ -960,15 +1168,43 @@ int mdd_xattr_set_txn(const struct lu_context *ctxt, struct mdd_object *obj, RETURN(rc); } +static int mdd_xattr_sanity_check(const struct lu_context *ctxt, + struct mdd_object *obj, + struct md_ucred *uc) +{ + struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la; + int rc; + ENTRY; + + if (mdd_is_immutable(obj) || mdd_is_append(obj)) + RETURN(-EPERM); + + mdd_read_lock(ctxt, obj); + rc = __mdd_la_get(ctxt, obj, tmp_la); + mdd_read_unlock(ctxt, obj); + if (rc) + RETURN(rc); + + if ((uc->mu_fsuid != tmp_la->la_uid) && !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + RETURN(rc); +} + static int mdd_xattr_set(const struct lu_context *ctxt, struct md_object *obj, - const void *buf, int buf_len, const char *name, - int fl) + const void *buf, int buf_len, const char *name, int fl, + struct md_ucred *uc) { + struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); struct thandle *handle; int rc; ENTRY; + rc = mdd_xattr_sanity_check(ctxt, mdd_obj, uc); + if (rc) + RETURN(rc); + mdd_txn_param_build(ctxt, &MDD_TXN_XATTR_SET); handle = mdd_trans_start(ctxt, mdd); if (IS_ERR(handle)) @@ -1005,7 +1241,7 @@ static int __mdd_xattr_del(const struct lu_context *ctxt,struct mdd_device *mdd, } int mdd_xattr_del(const struct lu_context *ctxt, struct md_object *obj, - const char *name) + const char *name, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); @@ -1013,6 +1249,10 @@ int mdd_xattr_del(const struct lu_context *ctxt, struct md_object *obj, int rc; ENTRY; + rc = mdd_xattr_sanity_check(ctxt, mdd_obj, uc); + if (rc) + RETURN(rc); + mdd_txn_param_build(ctxt, &MDD_TXN_XATTR_SET); handle = mdd_trans_start(ctxt, mdd); if (IS_ERR(handle)) @@ -1096,13 +1336,16 @@ static int __mdd_index_delete(const struct lu_context *ctxt, static int mdd_link_sanity_check(const struct lu_context *ctxt, struct mdd_object *tgt_obj, - struct mdd_object *src_obj) + struct mdd_object *src_obj, + struct md_ucred *uc) { int rc; + ENTRY; - rc = mdd_may_create(ctxt, tgt_obj, NULL); + rc = mdd_may_create(ctxt, tgt_obj, NULL, 1, uc); if (rc) RETURN(rc); + if (S_ISDIR(mdd_object_type(src_obj))) RETURN(-EPERM); @@ -1114,7 +1357,7 @@ static int mdd_link_sanity_check(const struct lu_context *ctxt, static int mdd_link(const struct lu_context *ctxt, struct md_object *tgt_obj, struct md_object *src_obj, const char *name, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct mdd_object *mdd_tobj = md2mdd_obj(tgt_obj); struct mdd_object *mdd_sobj = md2mdd_obj(src_obj); @@ -1131,7 +1374,7 @@ static int mdd_link(const struct lu_context *ctxt, struct md_object *tgt_obj, mdd_lock2(ctxt, mdd_tobj, mdd_sobj); - rc = mdd_link_sanity_check(ctxt, mdd_tobj, mdd_sobj); + rc = mdd_link_sanity_check(ctxt, mdd_tobj, mdd_sobj, uc); if (rc) GOTO(out, rc); @@ -1211,12 +1454,13 @@ int __mdd_object_kill(const struct lu_context *ctxt, mdd_set_dead_obj(obj); if (S_ISREG(mdd_object_type(obj)) && ma->ma_need&MA_LOV) { rc = __mdd_lmm_get(ctxt, obj, ma); - if (ma->ma_valid&MA_LOV && ma->ma_need&MA_COOKIE) + if ((ma->ma_valid & MA_LOV) && (ma->ma_need & MA_COOKIE)) rc = mdd_unlink_log(ctxt, mdo2mdd(&obj->mod_obj), obj, ma); } RETURN(rc); } + /* caller should take a lock before calling */ static int __mdd_finish_unlink(const struct lu_context *ctxt, struct mdd_object *obj, struct md_attr *ma, @@ -1240,29 +1484,31 @@ static int __mdd_finish_unlink(const struct lu_context *ctxt, static int mdd_unlink_sanity_check(const struct lu_context *ctxt, struct mdd_object *pobj, struct mdd_object *cobj, - struct md_attr *ma) + struct md_attr *ma, + struct md_ucred *uc) { struct dt_object *dt_cobj = mdd_object_child(cobj); int rc = 0; ENTRY; - rc = mdd_may_delete(ctxt, pobj, cobj, S_ISDIR(ma->ma_attr.la_mode)); + rc = mdd_may_delete(ctxt, pobj, cobj, + S_ISDIR(ma->ma_attr.la_mode), 1, uc); if (rc) RETURN(rc); - if (S_ISDIR(mdd_object_type(cobj)) && - dt_try_as_dir(ctxt, dt_cobj)) { - rc = mdd_dir_is_empty(ctxt, cobj); - if (rc != 0) - RETURN(rc); + if (S_ISDIR(mdd_object_type(cobj))) { + if (dt_try_as_dir(ctxt, dt_cobj)) + rc = mdd_dir_is_empty(ctxt, cobj); + else + rc = -ENOTDIR; } RETURN(rc); } -static int mdd_unlink(const struct lu_context *ctxt, struct md_object *pobj, - struct md_object *cobj, const char *name, - struct md_attr *ma) +static int mdd_unlink(const struct lu_context *ctxt, + struct md_object *pobj, struct md_object *cobj, + const char *name, struct md_attr *ma, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(pobj); struct mdd_object *mdd_pobj = md2mdd_obj(pobj); @@ -1279,11 +1525,10 @@ static int mdd_unlink(const struct lu_context *ctxt, struct md_object *pobj, mdd_lock2(ctxt, mdd_pobj, mdd_cobj); - rc = mdd_unlink_sanity_check(ctxt, mdd_pobj, mdd_cobj, ma); + rc = mdd_unlink_sanity_check(ctxt, mdd_pobj, mdd_cobj, ma, uc); if (rc) GOTO(cleanup, rc); - rc = __mdd_index_delete(ctxt, mdd_pobj, name, handle); if (rc) GOTO(cleanup, rc); @@ -1314,9 +1559,10 @@ cleanup: mdd_trans_stop(ctxt, mdd, rc, handle); RETURN(rc); } + /* partial unlink */ static int mdd_ref_del(const struct lu_context *ctxt, struct md_object *obj, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); @@ -1331,7 +1577,7 @@ static int mdd_ref_del(const struct lu_context *ctxt, struct md_object *obj, mdd_write_lock(ctxt, mdd_obj); - rc = mdd_unlink_sanity_check(ctxt, NULL, mdd_obj, ma); + rc = mdd_unlink_sanity_check(ctxt, NULL, mdd_obj, ma, uc); if (rc) GOTO(cleanup, rc); @@ -1355,7 +1601,7 @@ static int mdd_parent_fid(const struct lu_context *ctxt, struct mdd_object *obj, struct lu_fid *fid) { - return mdd_lookup(ctxt, &obj->mod_obj, dotdot, fid); + return mdd_lookup_intent(ctxt, &obj->mod_obj, dotdot, fid, 0, NULL); } /* @@ -1463,39 +1709,54 @@ static int mdd_rename_sanity_check(const struct lu_context *ctxt, struct mdd_object *tgt_pobj, const struct lu_fid *sfid, int src_is_dir, - struct mdd_object *tobj) + struct mdd_object *sobj, + struct mdd_object *tobj, + struct md_ucred *uc) { - int rc = 0, tgt_is_dir; + struct mdd_device *mdd = mdo2mdd(&src_pobj->mod_obj); + int rc = 0, need_check = 1; ENTRY; - if (mdd_is_dead_obj(src_pobj)) - RETURN(-ENOENT); + mdd_read_lock(ctxt, src_pobj); + rc = mdd_may_delete(ctxt, src_pobj, sobj, src_is_dir, need_check, uc); + mdd_read_unlock(ctxt, src_pobj); + if (rc) + RETURN(rc); + + if (src_pobj == tgt_pobj) + need_check = 0; if (!tobj) { - rc = mdd_may_create(ctxt, tgt_pobj, NULL); + mdd_read_lock(ctxt, tgt_pobj); + rc = mdd_may_create(ctxt, tgt_pobj, NULL, need_check, uc); + mdd_read_unlock(ctxt, tgt_pobj); } else { - rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir); - if (rc == 0) { - tgt_is_dir = S_ISDIR(mdd_object_type(tobj)); - if (tgt_is_dir && mdd_dir_is_empty(ctxt, tobj)) - rc = -ENOTEMPTY; - } + mdd_read_lock(ctxt, tgt_pobj); + rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir, + need_check, uc); + mdd_read_unlock(ctxt, tgt_pobj); + if (!rc && S_ISDIR(mdd_object_type(tobj)) && + mdd_dir_is_empty(ctxt, tobj)) + RETURN(-ENOTEMPTY); } - if (rc) - RETURN(rc); + + /* source should not be ancestor of target dir */ + if (!rc && src_is_dir && mdd_is_parent(ctxt, mdd, tgt_pobj, sfid, NULL)) + RETURN(-EINVAL); RETURN(rc); } /* src object can be remote that is why we use only fid and type of object */ -static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj, - struct md_object *tgt_pobj, const struct lu_fid *lf, - const char *sname, struct md_object *tobj, - const char *tname, struct md_attr *ma) +static int mdd_rename(const struct lu_context *ctxt, + struct md_object *src_pobj, struct md_object *tgt_pobj, + const struct lu_fid *lf, const char *sname, + struct md_object *tobj, const char *tname, + struct md_attr *ma, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(src_pobj); struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj); struct mdd_object *mdd_tpobj = md2mdd_obj(tgt_pobj); - struct mdd_object *mdd_sobj = NULL; + struct mdd_object *mdd_sobj = mdd_object_find(ctxt, mdd, lf); struct mdd_object *mdd_tobj = NULL; struct lu_attr *la_copy = &mdd_ctx_info(ctxt)->mti_la_for_fix; struct thandle *handle; @@ -1507,21 +1768,21 @@ static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj, is_dir = S_ISDIR(ma->ma_attr.la_mode); if (ma->ma_attr.la_valid & LA_FLAGS && ma->ma_attr.la_flags & (LUSTRE_APPEND_FL | LUSTRE_IMMUTABLE_FL)) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); if (tobj) mdd_tobj = md2mdd_obj(tobj); /*XXX: shouldn't this check be done under lock below? */ rc = mdd_rename_sanity_check(ctxt, mdd_spobj, mdd_tpobj, - lf, is_dir, mdd_tobj); + lf, is_dir, mdd_sobj, mdd_tobj, uc); if (rc) - RETURN(rc); + GOTO(out, rc); mdd_txn_param_build(ctxt, &MDD_TXN_RENAME); handle = mdd_trans_start(ctxt, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out, rc = PTR_ERR(handle)); /*FIXME: Should consider tobj and sobj too in rename_lock*/ rc = mdd_rename_lock(ctxt, mdd, mdd_spobj, mdd_tpobj); @@ -1546,7 +1807,6 @@ static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj, if (rc) GOTO(cleanup, rc); - mdd_sobj = mdd_object_find(ctxt, mdd, lf); *la_copy = ma->ma_attr; la_copy->la_valid = LA_CTIME; if (mdd_sobj) { @@ -1587,13 +1847,16 @@ cleanup: mdd_rename_unlock(ctxt, mdd_spobj, mdd_tpobj); cleanup_unlocked: mdd_trans_stop(ctxt, mdd, rc, handle); +out: if (mdd_sobj) mdd_object_put(ctxt, mdd_sobj); RETURN(rc); } -static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj, - const char *name, struct lu_fid* fid) +static int mdd_lookup_intent(const struct lu_context *ctxt, + struct md_object *pobj, + const char *name, const struct lu_fid* fid, + int mask, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(pobj); struct dt_object *dir = mdd_object_child(mdd_obj); @@ -1604,15 +1867,35 @@ static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj, if (mdd_is_dead_obj(mdd_obj)) RETURN(-ESTALE); + mdd_read_lock(ctxt, mdd_obj); + if (mask == MAY_EXEC) + rc = mdd_exec_permission_lite(ctxt, mdd_obj, uc); + else + rc = mdd_permission_internal(ctxt, mdd_obj, mask, uc); + if (rc) + GOTO(out_unlock, rc); + if (S_ISDIR(mdd_object_type(mdd_obj)) && dt_try_as_dir(ctxt, dir)) rc = dir->do_index_ops->dio_lookup(ctxt, dir, rec, key); else rc = -ENOTDIR; + +out_unlock: mdd_read_unlock(ctxt, mdd_obj); RETURN(rc); } +static int mdd_lookup(const struct lu_context *ctxt, + struct md_object *pobj, const char *name, + struct lu_fid* fid, struct md_ucred *uc) +{ + int rc; + ENTRY; + rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_EXEC, uc); + RETURN(rc); +} + /* * returns 1: if fid is ancestor of @mo; * returns 0: if fid is not a ancestor of @mo; @@ -1622,8 +1905,9 @@ static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj, * * returns < 0: if error */ -static int mdd_is_subdir(const struct lu_context *ctx, struct md_object *mo, - const struct lu_fid *fid, struct lu_fid *sfid) +static int mdd_is_subdir(const struct lu_context *ctx, + struct md_object *mo, const struct lu_fid *fid, + struct lu_fid *sfid, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(mo); int rc; @@ -1679,10 +1963,33 @@ static int __mdd_object_initialize(const struct lu_context *ctxt, RETURN(rc); } +/* + * XXX: Need MAY_WRITE to be checked? + */ +static int mdd_cd_sanity_check(const struct lu_context *ctxt, + struct mdd_object *obj, struct md_ucred *uc) +{ + int rc = 0; + ENTRY; + + /* EEXIST check */ + if (!obj || mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + +#if 0 + mdd_read_lock(ctxt, obj); + rc = mdd_permission_internal(ctxt, obj, MAY_WRITE, uc); + mdd_read_unlock(ctxt, obj); +#endif + + RETURN(rc); + +} + static int mdd_create_data(const struct lu_context *ctxt, struct md_object *pobj, struct md_object *cobj, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(cobj); struct mdd_object *mdd_pobj = md2mdd_obj(pobj);/* XXX maybe NULL */ @@ -1694,6 +2001,10 @@ static int mdd_create_data(const struct lu_context *ctxt, int rc; ENTRY; + rc = mdd_cd_sanity_check(ctxt, son, uc); + if (rc) + RETURN(rc); + if (spec->sp_cr_flags & MDS_OPEN_DELAY_CREATE || !(spec->sp_cr_flags & FMODE_WRITE)) RETURN(0); @@ -1732,9 +2043,9 @@ static int mdd_create_data(const struct lu_context *ctxt, } static int mdd_create_sanity_check(const struct lu_context *ctxt, - struct mdd_device *mdd, struct md_object *pobj, - const char *name, struct md_attr *ma) + const char *name, struct md_attr *ma, + struct md_ucred *uc) { struct mdd_thread_info *info = mdd_ctx_info(ctxt); struct lu_attr *la = &info->mti_la; @@ -1746,7 +2057,8 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt, /* EEXIST check */ if (mdd_is_dead_obj(obj)) RETURN(-ENOENT); - rc = mdd_lookup(ctxt, pobj, name, fid); + + rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc); if (rc != -ENOENT) RETURN(rc ? : -EEXIST); @@ -1754,7 +2066,7 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt, mdd_read_lock(ctxt, obj); rc = __mdd_la_get(ctxt, obj, la); mdd_read_unlock(ctxt, obj); - if (rc != 0) + if (rc) RETURN(rc); if (la->la_mode & S_ISGID) { @@ -1785,10 +2097,11 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt, /* * Create object and insert it into namespace. */ -static int mdd_create(const struct lu_context *ctxt, struct md_object *pobj, - const char *name, struct md_object *child, +static int mdd_create(const struct lu_context *ctxt, + struct md_object *pobj, const char *name, + struct md_object *child, const struct md_create_spec *spec, - struct md_attr* ma) + struct md_attr* ma, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(pobj); struct mdd_object *mdd_pobj = md2mdd_obj(pobj); @@ -1801,9 +2114,10 @@ static int mdd_create(const struct lu_context *ctxt, struct md_object *pobj, ENTRY; /* sanity checks before big job */ - rc = mdd_create_sanity_check(ctxt, mdd, pobj, name, ma); + rc = mdd_create_sanity_check(ctxt, pobj, name, ma, uc); if (rc) RETURN(rc); + /* no RPC inside the transaction, so OST objects should be created at * first */ if (S_ISREG(attr->la_mode)) { @@ -1945,11 +2259,42 @@ cleanup: mdd_trans_stop(ctxt, mdd, rc, handle); RETURN(rc); } + /* partial operation */ +static int mdd_oc_sanity_check(const struct lu_context *ctxt, + struct mdd_object *obj, + struct md_attr *ma, + struct md_ucred *uc) +{ + int rc; + ENTRY; + + /* EEXIST check */ + if (lu_object_exists(&obj->mod_obj.mo_lu)) + RETURN(-EEXIST); + + switch (ma->ma_attr.la_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + rc = 0; + break; + default: + rc = -EINVAL; + break; + } + RETURN(rc); +} + static int mdd_object_create(const struct lu_context *ctxt, struct md_object *obj, const struct md_create_spec *spec, - struct md_attr *ma) + struct md_attr *ma, + struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(obj); @@ -1959,6 +2304,10 @@ static int mdd_object_create(const struct lu_context *ctxt, int rc; ENTRY; + rc = mdd_oc_sanity_check(ctxt, mdd_obj, ma, uc); + if (rc) + RETURN(rc); + mdd_txn_param_build(ctxt, &MDD_TXN_OBJECT_CREATE); handle = mdd_trans_start(ctxt, mdd); if (IS_ERR(handle)) @@ -1982,13 +2331,37 @@ static int mdd_object_create(const struct lu_context *ctxt, if (rc == 0) rc = mdd_attr_get_internal_locked(ctxt, mdd_obj, ma); + mdd_trans_stop(ctxt, mdd, rc, handle); RETURN(rc); } + /* partial operation */ +static int mdd_ni_sanity_check(const struct lu_context *ctxt, + struct md_object *pobj, + const char *name, + const struct lu_fid *fid, + struct md_ucred *uc) +{ + struct mdd_object *obj = md2mdd_obj(pobj); + int rc; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc); + if (rc != -ENOENT) + RETURN(rc ? : -EEXIST); + else + RETURN(0); +} + static int mdd_name_insert(const struct lu_context *ctxt, - struct md_object *pobj, const char *name, - const struct lu_fid *fid, int isdir) + struct md_object *pobj, + const char *name, const struct lu_fid *fid, + int isdir, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(pobj); struct thandle *handle; @@ -2001,16 +2374,42 @@ static int mdd_name_insert(const struct lu_context *ctxt, RETURN(PTR_ERR(handle)); mdd_write_lock(ctxt, mdd_obj); + rc = mdd_ni_sanity_check(ctxt, pobj, name, fid, uc); + if (rc) + GOTO(out_unlock, rc); + rc = __mdd_index_insert(ctxt, mdd_obj, fid, name, isdir, handle); + +out_unlock: mdd_write_unlock(ctxt, mdd_obj); mdd_trans_stop(ctxt, mdo2mdd(pobj), rc, handle); RETURN(rc); } +static int mdd_nr_sanity_check(const struct lu_context *ctxt, + struct md_object *pobj, + const char *name, + struct md_ucred *uc) +{ + struct mdd_thread_info *info = mdd_ctx_info(ctxt); + struct lu_fid *fid = &info->mti_fid; + struct mdd_object *obj = md2mdd_obj(pobj); + int rc; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc); + RETURN(rc); +} + static int mdd_name_remove(const struct lu_context *ctxt, struct md_object *pobj, - const char *name) + const char *name, + struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(pobj); struct mdd_object *mdd_obj = md2mdd_obj(pobj); @@ -2024,18 +2423,55 @@ static int mdd_name_remove(const struct lu_context *ctxt, RETURN(PTR_ERR(handle)); mdd_write_lock(ctxt, mdd_obj); + rc = mdd_nr_sanity_check(ctxt, pobj, name, uc); + if (rc) + GOTO(out_unlock, rc); rc = __mdd_index_delete(ctxt, mdd_obj, name, handle); +out_unlock: mdd_write_unlock(ctxt, mdd_obj); mdd_trans_stop(ctxt, mdd, rc, handle); RETURN(rc); } -static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj, - struct md_object *tobj, const struct lu_fid *lf, - const char *name, struct md_attr *ma) +static int mdd_rt_sanity_check(const struct lu_context *ctxt, + struct mdd_object *tgt_pobj, + struct mdd_object *tobj, + const struct lu_fid *sfid, + const char *name, struct md_attr *ma, + struct md_ucred *uc) +{ + struct mdd_device *mdd = mdo2mdd(&tgt_pobj->mod_obj); + int rc, src_is_dir; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(tgt_pobj)) + RETURN(-ENOENT); + + src_is_dir = S_ISDIR(ma->ma_attr.la_mode); + if (tobj) { + rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir, 1, uc); + if (!rc && S_ISDIR(mdd_object_type(tobj)) && + mdd_dir_is_empty(ctxt, tobj)) + RETURN(-ENOTEMPTY); + } else { + rc = mdd_may_create(ctxt, tgt_pobj, NULL, 1, uc); + } + + /* source should not be ancestor of target dir */ + if (!rc &&& src_is_dir && mdd_is_parent(ctxt, mdd, tgt_pobj, sfid, NULL)) + RETURN(-EINVAL); + + RETURN(rc); +} + +static int mdd_rename_tgt(const struct lu_context *ctxt, + struct md_object *pobj, struct md_object *tobj, + const struct lu_fid *lf, const char *name, + struct md_attr *ma, struct md_ucred *uc) { struct mdd_device *mdd = mdo2mdd(pobj); struct mdd_object *mdd_tpobj = md2mdd_obj(pobj); @@ -2049,12 +2485,18 @@ static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj, if (IS_ERR(handle)) RETURN(PTR_ERR(handle)); - if (tobj) + if (tobj) { mdd_tobj = md2mdd_obj(tobj); - - mdd_lock2(ctxt, mdd_tpobj, mdd_tobj); + mdd_lock2(ctxt, mdd_tpobj, mdd_tobj); + } else { + mdd_write_lock(ctxt, mdd_tpobj); + } /*TODO rename sanity checking*/ + rc = mdd_rt_sanity_check(ctxt, mdd_tpobj, mdd_tobj, lf, name, ma, uc); + if (rc) + GOTO(cleanup, rc); + if (tobj) { rc = __mdd_index_delete(ctxt, mdd_tpobj, name, handle); if (rc) @@ -2068,13 +2510,20 @@ static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj, if (tobj && lu_object_exists(&tobj->mo_lu)) __mdd_ref_del(ctxt, mdd_tobj, handle); cleanup: - mdd_unlock2(ctxt, mdd_tpobj, mdd_tobj); + if (tobj) + mdd_unlock2(ctxt, mdd_tpobj, mdd_tobj); + else + mdd_write_unlock(ctxt, mdd_tpobj); mdd_trans_stop(ctxt, mdd, rc, handle); RETURN(rc); } +/* + * No permission check is needed. + */ static int mdd_root_get(const struct lu_context *ctx, - struct md_device *m, struct lu_fid *f) + struct md_device *m, struct lu_fid *f, + struct md_ucred *uc) { struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); @@ -2083,8 +2532,11 @@ static int mdd_root_get(const struct lu_context *ctx, RETURN(0); } -static int mdd_statfs(const struct lu_context *ctx, - struct md_device *m, struct kstatfs *sfs) +/* + * No permission check is needed. + */ +static int mdd_statfs(const struct lu_context *ctx, struct md_device *m, + struct kstatfs *sfs, struct md_ucred *uc) { struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); int rc; @@ -2096,9 +2548,11 @@ static int mdd_statfs(const struct lu_context *ctx, RETURN(rc); } -static int mdd_maxsize_get(const struct lu_context *ctx, - struct md_device *m, int *md_size, - int *cookie_size) +/* + * No permission check is needed. + */ +static int mdd_maxsize_get(const struct lu_context *ctx, struct md_device *m, + int *md_size, int *cookie_size, struct md_ucred *uc) { struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); ENTRY; @@ -2119,7 +2573,11 @@ static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj, next->do_ops->do_ref_add(ctxt, next, handle); } -static int mdd_ref_add(const struct lu_context *ctxt, struct md_object *obj) +/* + * XXX: if permission check is needed here? + */ +static int mdd_ref_add(const struct lu_context *ctxt, + struct md_object *obj, struct md_ucred *uc) { struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); @@ -2167,26 +2625,78 @@ static int accmode(struct mdd_object *mdd_obj, int flags) #endif if (flags & FMODE_READ) res = MAY_READ; - if (flags & (FMODE_WRITE|MDS_OPEN_TRUNC)) + if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND)) res |= MAY_WRITE; if (flags & MDS_FMODE_EXEC) res = MAY_EXEC; return res; } +static int mdd_open_sanity_check(const struct lu_context *ctxt, + struct mdd_object *obj, int flag, + struct md_ucred *uc) +{ + struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la; + int mode = accmode(obj, flag); + int rc; + ENTRY; + + /* EEXIST check */ + if (mdd_is_dead_obj(obj)) + RETURN(-ENOENT); + + rc = __mdd_la_get(ctxt, obj, tmp_la); + if (rc) + RETURN(rc); + + if (S_ISLNK(tmp_la->la_mode)) + RETURN(-ELOOP); + + if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE)) + RETURN(-EISDIR); + + if (!(flag & MDS_OPEN_CREATED)) { + rc = __mdd_permission_internal(ctxt, obj, mode, 0, uc); + if (rc) + RETURN(rc); + } + + /* + * FIFO's, sockets and device files are special: they don't + * actually live on the filesystem itself, and as such you + * can write to them even if the filesystem is read-only. + */ + if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) || + S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode)) + flag &= ~O_TRUNC; + + /* + * An append-only file must be opened in append mode for writing. + */ + if (mdd_is_append(obj)) { + if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) + RETURN(-EPERM); + if (flag & O_TRUNC) + RETURN(-EPERM); + } + + /* O_NOATIME can only be set by the owner or superuser */ + if (flag & O_NOATIME) + if (uc->mu_fsuid != tmp_la->la_uid && !mdd_capable(uc, CAP_FOWNER)) + RETURN(-EPERM); + + RETURN(0); +} + static int mdd_open(const struct lu_context *ctxt, struct md_object *obj, - int flags) + int flags, struct md_ucred *uc) { - int mode = accmode(md2mdd_obj(obj), flags); + struct mdd_object *mdd_obj = md2mdd_obj(obj); int rc = 0; mdd_write_lock(ctxt, md2mdd_obj(obj)); - if (mode & MAY_WRITE) { - if (mdd_is_immutable(md2mdd_obj(obj))) - rc = -EACCES; - } - + rc = mdd_open_sanity_check(ctxt, mdd_obj, flags, uc); if (rc == 0) md2mdd_obj(obj)->mod_count ++; @@ -2194,8 +2704,11 @@ static int mdd_open(const struct lu_context *ctxt, struct md_object *obj, return rc; } +/* + * No permission check is needed. + */ static int mdd_close(const struct lu_context *ctxt, struct md_object *obj, - struct md_attr *ma) + struct md_attr *ma, struct md_ucred *uc) { int rc; struct mdd_object *mdd_obj; @@ -2223,26 +2736,306 @@ static int mdd_close(const struct lu_context *ctxt, struct md_object *obj, RETURN(rc); } +static int mdd_readpage_sanity_check(const struct lu_context *ctxt, + struct mdd_object *obj, + struct md_ucred *uc) +{ + struct dt_object *next = mdd_object_child(obj); + int rc; + ENTRY; + + if (S_ISDIR(mdd_object_type(obj)) && + dt_try_as_dir(ctxt, next)) + rc = mdd_permission_internal(ctxt, obj, MAY_READ, uc); + else + rc = -ENOTDIR; + + RETURN(rc); +} + static int mdd_readpage(const struct lu_context *ctxt, struct md_object *obj, - const struct lu_rdpg *rdpg) + const struct lu_rdpg *rdpg, struct md_ucred *uc) { struct dt_object *next; struct mdd_object *mdd_obj = md2mdd_obj(obj); int rc; + ENTRY; LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj))); next = mdd_object_child(mdd_obj); mdd_read_lock(ctxt, mdd_obj); - if (S_ISDIR(mdd_object_type(mdd_obj)) && - dt_try_as_dir(ctxt, next)) - rc = next->do_ops->do_readpage(ctxt, next, rdpg); - else - rc = -ENOTDIR; + rc = mdd_readpage_sanity_check(ctxt, mdd_obj, uc); + if (rc) + GOTO(out_unlock, rc); + + rc = next->do_ops->do_readpage(ctxt, next, rdpg); + +out_unlock: mdd_read_unlock(ctxt, mdd_obj); + RETURN(rc); +} + +#ifdef CONFIG_FS_POSIX_ACL +#include +#include + +static int mdd_posix_acl_permission(struct md_ucred *uc, struct lu_attr *la, + int want, posix_acl_xattr_entry *entry, + int count) +{ + posix_acl_xattr_entry *pa, *pe, *mask_obj; + int found = 0; + ENTRY; + + if (count <= 0) + RETURN(-EACCES); + + pa = &entry[0]; + pe = &entry[count - 1]; + for (; pa <= pe; pa++) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + /* (May have been checked already) */ + if (la->la_uid == uc->mu_fsuid) + goto check_perm; + break; + case ACL_USER: + if (pa->e_id == uc->mu_fsuid) + goto mask; + break; + case ACL_GROUP_OBJ: + if (mdd_in_group_p(uc, la->la_gid)) { + found = 1; + if ((pa->e_perm & want) == want) + goto mask; + } + break; + case ACL_GROUP: + if (mdd_in_group_p(uc, pa->e_id)) { + found = 1; + if ((pa->e_perm & want) == want) + goto mask; + } + break; + case ACL_MASK: + break; + case ACL_OTHER: + if (found) + RETURN(-EACCES); + else + goto check_perm; + default: + RETURN(-EIO); + } + } + RETURN(-EIO); + +mask: + for (mask_obj = pa + 1; mask_obj <= pe; mask_obj++) { + if (mask_obj->e_tag == ACL_MASK) { + if ((pa->e_perm & mask_obj->e_perm & want) == want) + RETURN(0); + + RETURN(-EACCES); + } + } + +check_perm: + if ((pa->e_perm & want) == want) + RETURN(0); + + RETURN(-EACCES); +} +#endif + +static int mdd_check_acl(const struct lu_context *ctxt, struct mdd_object *obj, + struct lu_attr* la, int mask, struct md_ucred *uc) +{ +#ifdef CONFIG_FS_POSIX_ACL + struct dt_object *next; + void *buf; + int buf_len; + posix_acl_xattr_entry *entry; + int entry_count; + int rc; + ENTRY; + + next = mdd_object_child(obj); + buf_len = next->do_ops->do_xattr_get(ctxt, next, NULL, 0, ""); + if (buf_len <= 0) + RETURN(buf_len ? : -EACCES); + + OBD_ALLOC(buf, buf_len); + if (buf == NULL) + RETURN(-ENOMEM); + + rc = next->do_ops->do_xattr_get(ctxt, next, buf, buf_len, ""); + if (rc <= 0) + GOTO(out, rc = rc ? : -EACCES); + + entry = ((posix_acl_xattr_header *)buf)->a_entries; + entry_count = (rc - 4) / sizeof(posix_acl_xattr_entry); + + rc = mdd_posix_acl_permission(uc, la, mask, entry, entry_count); + +out: + OBD_FREE(buf, buf_len); + RETURN(rc); +#else + ENTRY; + RETURN(-EAGAIN); +#endif +} + +static int mdd_exec_permission_lite(const struct lu_context *ctxt, + struct mdd_object *obj, + struct md_ucred *uc) +{ + struct lu_attr *la = &mdd_ctx_info(ctxt)->mti_la; + umode_t mode; + int rc; + ENTRY; + + /* These means unnecessary for permission check */ + if ((uc == NULL) || (uc->mu_valid == UCRED_INIT)) + RETURN(0); + + /* Invalid user credit */ + if (uc->mu_valid == UCRED_INVALID) + RETURN(-EACCES); + + rc = __mdd_la_get(ctxt, obj, la); + if (rc) + RETURN(rc); + + mode = la->la_mode; + if (uc->mu_fsuid == la->la_uid) + mode >>= 6; + else if (mdd_in_group_p(uc, la->la_gid)) + mode >>= 3; + + if (mode & MAY_EXEC) + RETURN(0); + + if (((la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode)) && + mdd_capable(uc, CAP_DAC_OVERRIDE)) + RETURN(0); + + if (S_ISDIR(la->la_mode) && mdd_capable(uc, CAP_DAC_READ_SEARCH)) + RETURN(0); + + RETURN(-EACCES); +} + +static int __mdd_permission_internal(const struct lu_context *ctxt, + struct mdd_object *obj, + int mask, int getattr, + struct md_ucred *uc) +{ + struct lu_attr *la = &mdd_ctx_info(ctxt)->mti_la; + __u32 mode; + int rc; + + ENTRY; + + if (mask == 0) + RETURN(0); + + /* These means unnecessary for permission check */ + if ((uc == NULL) || (uc->mu_valid == UCRED_INIT)) + RETURN(0); + + /* Invalid user credit */ + if (uc->mu_valid == UCRED_INVALID) + RETURN(-EACCES); + + /* + * Nobody gets write access to an immutable file. + */ + if ((mask & MAY_WRITE) && mdd_is_immutable(obj)) + RETURN(-EACCES); + + if (getattr) { + rc = __mdd_la_get(ctxt, obj, la); + if (rc) + RETURN(rc); + } + + mode = la->la_mode; + if (uc->mu_fsuid == la->la_uid) { + mode >>= 6; + } else { + if (mode & S_IRWXG) { + if (((mode >> 3) & mask & S_IRWXO) != mask) + goto check_groups; + + rc = mdd_check_acl(ctxt, obj, la, mask, uc); + if (rc == -EACCES) + goto check_capabilities; + else if ((rc != -EAGAIN) && (rc != -EOPNOTSUPP)) + RETURN(rc); + } + +check_groups: + if (mdd_in_group_p(uc, la->la_gid)) + mode >>= 3; + } + + /* + * If the DACs are ok we don't need any capability check. + */ + if (((mode & mask & S_IRWXO) == mask)) + RETURN(0); + +check_capabilities: + + /* + * Read/write DACs are always overridable. + * Executable DACs are overridable if at least one exec bit is set. + * Dir's DACs are always overridable. + */ + if (!(mask & MAY_EXEC) || + (la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode)) + if (mdd_capable(uc, CAP_DAC_OVERRIDE)) + RETURN(0); + + /* + * Searching includes executable on directories, else just read. + */ + if ((mask == MAY_READ) || + (S_ISDIR(la->la_mode) && !(mask & MAY_WRITE))) + if (mdd_capable(uc, CAP_DAC_READ_SEARCH)) + RETURN(0); + + RETURN(-EACCES); +} + +static inline int mdd_permission_internal_locked(const struct lu_context *ctxt, + struct mdd_object *obj, + int mask, struct md_ucred *uc) +{ + int rc; + + mdd_read_lock(ctxt, obj); + rc = mdd_permission_internal(ctxt, obj, mask, uc); + mdd_read_unlock(ctxt, obj); + return rc; } +static int mdd_permission(const struct lu_context *ctxt, struct md_object *obj, + int mask, struct md_ucred *uc) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + int rc; + ENTRY; + + rc = mdd_permission_internal_locked(ctxt, mdd_obj, mask, uc); + + RETURN(rc); +} + struct md_device_operations mdd_ops = { .mdo_statfs = mdd_statfs, .mdo_root_get = mdd_root_get, @@ -2262,8 +3055,8 @@ static struct md_dir_operations mdd_dir_ops = { .mdo_create_data = mdd_create_data }; - static struct md_object_operations mdd_obj_ops = { + .moo_permission = mdd_permission, .moo_attr_get = mdd_attr_get, .moo_attr_set = mdd_attr_set, .moo_xattr_get = mdd_xattr_get, diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 6483ee3..e92dd445 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -31,6 +31,8 @@ #include #include +#include +#include struct dt_device; diff --git a/lustre/mdd/mdd_lov.c b/lustre/mdd/mdd_lov.c index a0b4002..3f1bbca 100644 --- a/lustre/mdd/mdd_lov.c +++ b/lustre/mdd/mdd_lov.c @@ -533,8 +533,9 @@ int mdd_unlink_log(const struct lu_context *ctxt, struct mdd_device *mdd, { struct obd_device *obd = mdd2obd_dev(mdd); - if (mds_log_op_unlink(obd, ma->ma_lmm, ma->ma_lmm_size, - ma->ma_cookie, ma->ma_cookie_size) > 0) { + if ((ma->ma_cookie_size > 0) && + (mds_log_op_unlink(obd, ma->ma_lmm, ma->ma_lmm_size, + ma->ma_cookie, ma->ma_cookie_size) > 0)) { ma->ma_valid |= MA_COOKIE; } return 0; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 1bd28c3..b93dbb5 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -812,7 +812,7 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset, struct lvfs_run_ctxt saved; struct mds_body *body; struct dentry *dparent = NULL, *dchild = NULL; - struct lvfs_ucred uc = {NULL,}; + struct lvfs_ucred uc = {0,}; struct lustre_handle parent_lockh; int namesize; int rc = 0, cleanup_phase = 0, resent_req = 0; @@ -978,7 +978,7 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc = 0; ENTRY; @@ -1131,7 +1131,7 @@ static int mds_readpage(struct ptlrpc_request *req, int offset) struct mds_body *body, *repbody; struct lvfs_run_ctxt saved; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) }; - struct lvfs_ucred uc = {NULL,}; + struct lvfs_ucred uc = {0,}; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) @@ -1970,12 +1970,14 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg) if (rc) GOTO(err_fs, rc); +#if 0 mds->mds_group_hash = upcall_cache_init(obd->obd_name); if (IS_ERR(mds->mds_group_hash)) { rc = PTR_ERR(mds->mds_group_hash); mds->mds_group_hash = NULL; GOTO(err_qctxt, rc); } +#endif /* Don't wait for mds_postrecov trying to clear orphans */ obd->obd_async_recov = 1; @@ -2051,8 +2053,10 @@ err_qctxt: err_fs: /* No extra cleanup needed for llog_init_commit_thread() */ mds_fs_cleanup(obd); +#if 0 upcall_cache_cleanup(mds->mds_group_hash); mds->mds_group_hash = NULL; +#endif err_ns: ldlm_namespace_free(obd->obd_namespace, 0); obd->obd_namespace = NULL; @@ -2255,8 +2259,10 @@ static int mds_cleanup(struct obd_device *obd) OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size); mds_fs_cleanup(obd); +#if 0 upcall_cache_cleanup(mds->mds_group_hash); mds->mds_group_hash = NULL; +#endif must_put = server_put_mount(obd->obd_name, mds->mds_vfsmnt); /* must_put is for old method (l_p_m returns non-0 on err) */ diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 1140a61..8ca1319 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -82,6 +82,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, return count; } +#if 0 static int lprocfs_wr_group_info(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -235,6 +236,7 @@ static int lprocfs_wr_group_flush(struct file *file, const char *buffer, upcall_cache_flush_idle(obd->u.mds.mds_group_hash); return count; } +#endif #ifdef HAVE_QUOTA_SUPPORT static int lprocfs_mds_rd_bunit(char *page, char **start, off_t off, int count, @@ -406,6 +408,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "quota_iunit_sz", lprocfs_mds_rd_iunit, lprocfs_mds_wr_iunit, 0 }, { "quota_itune_sz", lprocfs_mds_rd_itune, lprocfs_mds_wr_itune, 0 }, #endif +#if 0 { "group_expire_interval", lprocfs_rd_group_expire, lprocfs_wr_group_expire, 0}, { "group_acquire_expire", lprocfs_rd_group_acquire_expire, @@ -414,6 +417,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { lprocfs_wr_group_upcall, 0}, { "group_flush", 0, lprocfs_wr_group_flush, 0}, { "group_info", 0, lprocfs_wr_group_info, 0 }, +#endif { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, { 0 } }; diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index c9d33f4..e46c3e2 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -104,8 +104,10 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->sa_fsuid; r->ur_uc.luc_fsgid = rec->sa_fsgid; r->ur_uc.luc_cap = rec->sa_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->sa_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->sa_fid; attr->ia_valid = rec->sa_valid; attr->ia_mode = rec->sa_mode; @@ -150,8 +152,10 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->cr_fsuid; r->ur_uc.luc_fsgid = rec->cr_fsgid; r->ur_uc.luc_cap = rec->cr_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->cr_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->cr_fid; r->ur_fid2 = &rec->cr_replayfid; r->ur_mode = rec->cr_mode; @@ -196,8 +200,10 @@ static int mds_link_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->lk_fsuid; r->ur_uc.luc_fsgid = rec->lk_fsgid; r->ur_uc.luc_cap = rec->lk_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->lk_suppgid1; r->ur_uc.luc_suppgid2 = rec->lk_suppgid2; +#endif r->ur_fid1 = &rec->lk_fid1; r->ur_fid2 = &rec->lk_fid2; r->ur_time = rec->lk_time; @@ -224,8 +230,10 @@ static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->ul_fsuid; r->ur_uc.luc_fsgid = rec->ul_fsgid; r->ur_uc.luc_cap = rec->ul_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->ul_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_mode = rec->ul_mode; r->ur_fid1 = &rec->ul_fid1; r->ur_fid2 = &rec->ul_fid2; @@ -253,8 +261,10 @@ static int mds_rename_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->rn_fsuid; r->ur_uc.luc_fsgid = rec->rn_fsgid; r->ur_uc.luc_cap = rec->rn_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->rn_suppgid1; r->ur_uc.luc_suppgid2 = rec->rn_suppgid2; +#endif r->ur_fid1 = &rec->rn_fid1; r->ur_fid2 = &rec->rn_fid2; r->ur_time = rec->rn_time; @@ -287,8 +297,10 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, r->ur_uc.luc_fsuid = rec->cr_fsuid; r->ur_uc.luc_fsgid = rec->cr_fsgid; r->ur_uc.luc_cap = rec->cr_cap; +#if 0 r->ur_uc.luc_suppgid1 = rec->cr_suppgid; r->ur_uc.luc_suppgid2 = -1; +#endif r->ur_fid1 = &rec->cr_fid; r->ur_fid2 = &rec->cr_replayfid; r->ur_mode = rec->cr_mode; @@ -358,8 +370,10 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, { struct mds_body *body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); +#if 0 struct mds_obd *mds = mds_req2mds(req); int rc; +#endif LASSERT(body != NULL); /* previously verified & swabbed by caller */ @@ -376,6 +390,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, ucred->luc_cap = body->capability; } +#if 0 ucred->luc_uce = upcall_cache_get_entry(mds->mds_group_hash, ucred->luc_fsuid, ucred->luc_fsgid, 1, @@ -390,11 +405,14 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, if (ucred->luc_uce) ucred->luc_fsgid = ucred->luc_uce->ue_primary; #endif +#endif return 0; } void mds_exit_ucred(struct lvfs_ucred *ucred, struct mds_obd *mds) { +#if 0 upcall_cache_put_entry(mds->mds_group_hash, ucred->luc_uce); +#endif } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index e8574054..82f3a9f 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -2306,7 +2306,9 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *lockh) { struct obd_device *obd = req->rq_export->exp_obd; +#if 0 struct mds_obd *mds = &obd->u.mds; +#endif struct lvfs_run_ctxt saved; int rc; ENTRY; @@ -2321,6 +2323,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, } #endif +#if 0 /* get group info of this user */ rec->ur_uc.luc_uce = upcall_cache_get_entry(mds->mds_group_hash, rec->ur_uc.luc_fsuid, @@ -2340,11 +2343,14 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, if (rec->ur_uc.luc_uce) rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary; #endif +#endif push_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); rc = reinters[rec->ur_opcode] (rec, offset, req, lockh); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); +#if 0 upcall_cache_put_entry(mds->mds_group_hash, rec->ur_uc.luc_uce); +#endif RETURN(rc); } diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index ca46092..33695d0 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -170,7 +170,7 @@ int mds_getxattr(struct ptlrpc_request *req) struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc = 0; ENTRY; @@ -332,7 +332,7 @@ int mds_setxattr(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct lvfs_run_ctxt saved; struct mds_body *body; - struct lvfs_ucred uc = { NULL, }; + struct lvfs_ucred uc = {0,}; int rc; ENTRY; diff --git a/lustre/mdt/Makefile.in b/lustre/mdt/Makefile.in index b1d1c39..3dcf8bf 100644 --- a/lustre/mdt/Makefile.in +++ b/lustre/mdt/Makefile.in @@ -1,4 +1,5 @@ MODULES := mdt mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o mdt_open.o +mdt-objs += mdt_idmap.o mdt_identity.o mdt_rmtacl.o @INCLUDE_RULES@ diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index d6503ce..34e3695 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -57,6 +57,7 @@ #include #include "mdt_internal.h" #include +#include /* * Initialized in mdt_mod_init(). */ @@ -168,12 +169,12 @@ static int mdt_getstatus(struct mdt_thread_info *info) ENTRY; - if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) { rc = -ENOMEM; - else { + } else { body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); - rc = next->md_ops->mdo_root_get(info->mti_ctxt, - next, &body->fid1); + rc = next->md_ops->mdo_root_get(info->mti_ctxt, next, + &body->fid1, NULL); if (rc == 0) body->valid |= OBD_MD_FLID; } @@ -199,8 +200,8 @@ static int mdt_statfs(struct mdt_thread_info *info) } else { osfs = req_capsule_server_get(&info->mti_pill,&RMF_OBD_STATFS); /* XXX max_age optimisation is needed here. See mds_statfs */ - rc = next->md_ops->mdo_statfs(info->mti_ctxt, - next, &info->mti_u.ksfs); + rc = next->md_ops->mdo_statfs(info->mti_ctxt, next, + &info->mti_u.ksfs, NULL); statfs_pack(osfs, &info->mti_u.ksfs); } @@ -292,7 +293,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, ma->ma_lmm_size = req_capsule_get_size(pill, &RMF_MDT_MD, RCL_SERVER); } - rc = mo_attr_get(ctxt, next, ma); + rc = mo_attr_get(ctxt, next, ma, &info->mti_uc); if (rc == -EREMOTE) { /* This object is located on remote node.*/ repbody->fid1 = *mdt_object_fid(o); @@ -304,10 +305,12 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, RETURN(rc); } - if (ma->ma_valid & MA_INODE) + if (ma->ma_valid & MA_INODE) { mdt_pack_attr2body(repbody, la, mdt_object_fid(o)); - else + mdt_body_reverse_idmap(info, repbody); + } else { RETURN(-EFAULT); + } if (mdt_body_has_lov(la, reqbody)) { if (ma->ma_valid & MA_LOV) { @@ -327,7 +330,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, } } else if (S_ISLNK(la->la_mode) && reqbody->valid & OBD_MD_LINKNAME) { - rc = mo_readlink(ctxt, next, ma->ma_lmm, ma->ma_lmm_size); + rc = mo_readlink(ctxt, next, ma->ma_lmm, ma->ma_lmm_size, + &info->mti_uc); if (rc <= 0) { CERROR("readlink failed: %d\n", rc); rc = -EFAULT; @@ -351,14 +355,26 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, repbody->max_cookiesize); } + if (reqbody->valid & OBD_MD_FLRMTPERM) { + buffer = req_capsule_server_get(pill, &RMF_ACL); + /* mdt_getattr_lock only */ + rc = mdt_pack_remote_perm(info, o, buffer); + if (rc) { + RETURN(rc); + } else { + repbody->valid |= OBD_MD_FLRMTPERM; + repbody->aclsize = sizeof(struct mdt_remote_perm); + } + } + #ifdef CONFIG_FS_POSIX_ACL - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && - (reqbody->valid & OBD_MD_FLACL)) { + else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && + (reqbody->valid & OBD_MD_FLACL)) { buffer = req_capsule_server_get(pill, &RMF_ACL); length = req_capsule_get_size(pill, &RMF_ACL, RCL_SERVER); if (length > 0) { - rc = mo_xattr_get(ctxt, next, buffer, - length, XATTR_NAME_ACL_ACCESS); + rc = mo_xattr_get(ctxt, next, buffer, length, + XATTR_NAME_ACL_ACCESS, &info->mti_uc); if (rc < 0) { if (rc == -ENODATA || rc == -EOPNOTSUPP) rc = 0; @@ -379,14 +395,24 @@ static int mdt_getattr(struct mdt_thread_info *info) { int rc; struct mdt_object *obj; + struct mdt_body *reqbody; obj = info->mti_object; LASSERT(obj != NULL); LASSERT(lu_object_assert_exists(&obj->mot_obj.mo_lu)); ENTRY; + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(-EFAULT); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + rc = mdt_getattr_internal(info, obj); mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_exit_ucred(info); RETURN(rc); } @@ -410,7 +436,7 @@ static int mdt_is_subdir(struct mdt_thread_info *info) */ LASSERT(fid_is_sane(&info->mti_body->fid2)); rc = mdo_is_subdir(info->mti_ctxt, mdt_object_child(obj), - &info->mti_body->fid2, &repbody->fid1); + &info->mti_body->fid2, &repbody->fid1, NULL); if (rc < 0) RETURN(rc); @@ -511,7 +537,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, RETURN(rc); /*step 2: lookup child's fid by name */ - rc = mdo_lookup(info->mti_ctxt, next, name, child_fid); + rc = mdo_lookup(info->mti_ctxt, next, name, child_fid, &info->mti_uc); if (rc != 0) { if (rc == -ENOENT) mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG); @@ -592,16 +618,26 @@ out: static int mdt_getattr_name(struct mdt_thread_info *info) { struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_CHILD]; + struct mdt_body *reqbody; int rc; ENTRY; + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(-EFAULT); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + rc = mdt_getattr_name_lock(info, lhc, MDS_INODELOCK_UPDATE, NULL); if (lustre_handle_is_used(&lhc->mlh_lh)) { ldlm_lock_decref(&lhc->mlh_lh, lhc->mlh_mode); lhc->mlh_lh.cookie = 0; } mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_exit_ucred(info); RETURN(rc); } @@ -628,6 +664,7 @@ static int mdt_connect(struct mdt_thread_info *info) if (rc == 0) { LASSERT(req->rq_export != NULL); info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev); + rc = mdt_init_idmap(info); } return rc; } @@ -712,6 +749,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page) struct lu_dirent *ent; int rc = 0; + ENTRY; /* Disable trans for this name insert, since it will * include many trans for this */ @@ -733,7 +771,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page) memcpy(name, ent->lde_name, ent->lde_namelen); rc = mdo_name_insert(info->mti_ctxt, md_object_next(&object->mot_obj), - name, lf, 0); + name, lf, 0, &info->mti_uc); OBD_FREE(name, ent->lde_namelen + 1); if (rc) GOTO(out, rc); @@ -852,6 +890,10 @@ static int mdt_readpage(struct mdt_thread_info *info) if (reqbody == NULL || repbody == NULL) RETURN(-EFAULT); + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + /* * prepare @rdpg before calling lower layers and transfer itself. Here * reqbody->size contains offset of where to start to read and @@ -861,13 +903,13 @@ static int mdt_readpage(struct mdt_thread_info *info) if ((__u64)rdpg->rp_hash != reqbody->size) { CERROR("Invalid hash: %#llx != %#llx\n", (__u64)rdpg->rp_hash, reqbody->size); - RETURN(-EFAULT); + GOTO(out, rc = -EFAULT); } rdpg->rp_count = reqbody->nlink; rdpg->rp_npages = (rdpg->rp_count + CFS_PAGE_SIZE - 1)>>CFS_PAGE_SHIFT; OBD_ALLOC(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]); if (rdpg->rp_pages == NULL) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); for (i = 0; i < rdpg->rp_npages; ++i) { rdpg->rp_pages[i] = alloc_pages(GFP_KERNEL, 0); @@ -876,11 +918,12 @@ static int mdt_readpage(struct mdt_thread_info *info) } /* call lower layers to fill allocated pages with directory data */ - rc = mo_readpage(info->mti_ctxt, mdt_object_child(object), rdpg); + rc = mo_readpage(info->mti_ctxt, mdt_object_child(object), rdpg, + &info->mti_uc); if (rc) { if (rc == -ERANGE) rc1 = rc; - else + else GOTO(free_rdpg, rc); } @@ -895,8 +938,11 @@ free_rdpg: __free_pages(rdpg->rp_pages[i], 0); OBD_FREE(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]); + mdt_exit_ucred(info); MDT_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, 0); +out: + mdt_exit_ucred(info); return rc ? rc : rc1; } @@ -937,6 +983,14 @@ static int mdt_reint_internal(struct mdt_thread_info *info, RETURN(rc); } + rc = mdt_init_ucred_reint(info); + if (rc != 0) + RETURN(rc); + + rc = mdt_fix_attr_ucred(info, op); + if (rc != 0) + GOTO(out, rc); + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { struct mdt_client_data *mcd; @@ -944,13 +998,16 @@ static int mdt_reint_internal(struct mdt_thread_info *info, if (mcd->mcd_last_xid == req->rq_xid || mcd->mcd_last_close_xid == req->rq_xid) { mdt_reconstruct(info, lhc); - RETURN(lustre_msg_get_status(req->rq_repmsg)); + rc = lustre_msg_get_status(req->rq_repmsg); + GOTO(out, rc); } DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")", mcd->mcd_last_xid); } rc = mdt_reint_rec(info, lhc); +out: + mdt_exit_ucred(info); RETURN(rc); } @@ -996,8 +1053,9 @@ static int mdt_reint(struct mdt_thread_info *info) * path. */ rc = mdt_reint_internal(info, NULL, opc); - } else + } else { rc = opc; + } info->mti_fail_id = OBD_FAIL_MDS_REINT_NET_REP; RETURN(rc); @@ -1031,8 +1089,12 @@ static int mdt_sync(struct mdt_thread_info *info) if (body == NULL) RETURN(-EINVAL); + rc = mdt_init_ucred(info, body); + if (rc) + RETURN(rc); + if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SYNC_PACK)) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); if (fid_seq(&body->fid1) == 0) { /* sync the whole device */ @@ -1052,17 +1114,22 @@ static int mdt_sync(struct mdt_thread_info *info) next = mdt_object_child(info->mti_object); info->mti_attr.ma_need = MA_INODE; rc = mo_attr_get(info->mti_ctxt, next, - &info->mti_attr); + &info->mti_attr, + &info->mti_uc); if (rc == 0) { body = req_capsule_server_get(pill, &RMF_MDT_BODY); fid = mdt_object_fid(info->mti_object); mdt_pack_attr2body(body, la, fid); + mdt_body_reverse_idmap(info, body); } } } } - RETURN(rc); + EXIT; +out: + mdt_exit_ucred(info); + return rc; } static int mdt_quotacheck_handle(struct mdt_thread_info *info) @@ -1160,7 +1227,7 @@ static int mdt_cp_callback(struct mdt_thread_info *info) */ static int mdt_sec_ctx_handle(struct mdt_thread_info *info) { - return 0; + return mdt_handle_idmap(info); } static struct mdt_object *mdt_obj(struct lu_object *o) @@ -1518,7 +1585,7 @@ static int mdt_req_handle(struct mdt_thread_info *info, /* If we're DISCONNECTing, the mdt_export_data is already freed */ if (rc == 0 && h->mh_opc != MDS_DISCONNECT) target_committed_to_req(req); - + RETURN(rc); } @@ -1579,8 +1646,9 @@ extern int mds_filter_recovery_request(struct ptlrpc_request *req, * -ve: abort immediately with the given error code; * 0: send reply with error code in req->rq_status; */ -static int mdt_recovery(struct ptlrpc_request *req) +static int mdt_recovery(struct mdt_thread_info *info) { + struct ptlrpc_request *req = mdt_info_req(info); int recovering; int abort_recovery; struct obd_device *obd; @@ -1592,6 +1660,7 @@ static int mdt_recovery(struct ptlrpc_request *req) case SEC_CTX_INIT: case SEC_CTX_INIT_CONT: case SEC_CTX_FINI: + mdt_handle_idmap(info); RETURN(+1); } @@ -1687,7 +1756,7 @@ static int mdt_handle0(struct ptlrpc_request *req, msg = req->rq_reqmsg; rc = mds_msg_check_version(msg); if (rc == 0) { - rc = mdt_recovery(req); + rc = mdt_recovery(info); switch (rc) { case +1: h = mdt_handler_find(lustre_msg_get_opc(msg), @@ -2010,6 +2079,8 @@ static int mdt_intent_getattr(enum mdt_it_code opcode, __u64 child_bits; struct ldlm_reply *ldlm_rep; struct ptlrpc_request *req; + struct mdt_body *reqbody; + int rc; ENTRY; @@ -2025,6 +2096,14 @@ static int mdt_intent_getattr(enum mdt_it_code opcode, RETURN(-EINVAL); } + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(-EFAULT); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + req = info->mti_pill.rc_req; ldlm_rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); mdt_set_disposition(info, ldlm_rep, DISP_IT_EXECD); @@ -2040,10 +2119,14 @@ static int mdt_intent_getattr(enum mdt_it_code opcode, ldlm_rep->lock_policy_res2 = 0; if (!mdt_get_disposition(ldlm_rep, DISP_LOOKUP_POS) || ldlm_rep->lock_policy_res2) { - RETURN(ELDLM_LOCK_ABORTED); + GOTO(out, rc = ELDLM_LOCK_ABORTED); } - return mdt_intent_lock_replace(info, lockp, new_lock, lhc, flags); + rc = mdt_intent_lock_replace(info, lockp, new_lock, lhc, flags); + EXIT; +out: + mdt_exit_ucred(info); + return rc; } static int mdt_intent_reint(enum mdt_it_code opcode, @@ -2081,7 +2164,7 @@ static int mdt_intent_reint(enum mdt_it_code opcode, rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP); if (rep == NULL) RETURN(-EFAULT); - + /* MDC expects this in any case */ if (rc != 0) mdt_set_disposition(info, rep, DISP_LOOKUP_EXECD); @@ -2092,7 +2175,7 @@ static int mdt_intent_reint(enum mdt_it_code opcode, if (rc == -EREMOTE) { LASSERT(lustre_handle_is_used(&lhc->mlh_lh)); rep->lock_policy_res2 = 0; - return mdt_intent_lock_replace(info, lockp, NULL, lhc, flags); + RETURN(mdt_intent_lock_replace(info, lockp, NULL, lhc, flags)); } rep->lock_policy_res2 = rc; @@ -2935,6 +3018,17 @@ static void mdt_fini(const struct lu_context *ctx, struct mdt_device *m) ENTRY; target_cleanup_recovery(m->mdt_md_dev.md_lu_dev.ld_obd); + + upcall_cache_cleanup(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + + upcall_cache_cleanup(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + + if (m->mdt_rootsquash_info) + OBD_FREE_PTR(m->mdt_rootsquash_info); + + mdt_fs_cleanup(ctx, m); ping_evictor_stop(); mdt_stop_ptlrpc_service(m); @@ -3052,13 +3146,41 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m, GOTO(err_free_ns, rc); ping_evictor_start(); - rc = mdt_fs_setup(ctx, m); + rc = mdt_fs_setup(ctx, m, obd); if (rc) GOTO(err_stop_service, rc); + + m->mdt_identity_cache = upcall_cache_init(obd->obd_name, + MDT_IDENTITY_UPCALL_PATH, + &mdt_identity_upcall_cache_ops); + if (IS_ERR(m->mdt_identity_cache)) { + rc = PTR_ERR(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + GOTO(err_fs, rc); + } + + m->mdt_rmtacl_cache = upcall_cache_init(obd->obd_name, + MDT_RMTACL_UPCALL_PATH, + &mdt_rmtacl_upcall_cache_ops); + if (IS_ERR(m->mdt_rmtacl_cache)) { + rc = PTR_ERR(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + GOTO(err_fs, rc); + } + if(obd->obd_recovering == 0) mdt_postrecov(ctx, m); + + m->no_gss_support = 1; + RETURN(0); +err_fs: + upcall_cache_cleanup(m->mdt_rmtacl_cache); + m->mdt_rmtacl_cache = NULL; + upcall_cache_cleanup(m->mdt_identity_cache); + m->mdt_identity_cache = NULL; + mdt_fs_cleanup(ctx, m); err_stop_service: mdt_stop_ptlrpc_service(m); err_free_ns: @@ -3079,6 +3201,39 @@ err_free_site: return (rc); } +/* FIXME: this macro is copied from lnet/libcfs/nidstring.c */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ +static void do_process_nosquash_nids(struct mdt_device *m, char *buf) +{ + struct rootsquash_info *rsi = m->mdt_rootsquash_info; + char str[LNET_NIDSTR_SIZE], *end; + lnet_nid_t nid; + + LASSERT(rsi); + rsi->rsi_n_nosquash_nids = 0; + while (rsi->rsi_n_nosquash_nids < N_NOSQUASH_NIDS) { + end = strchr(buf, ','); + memset(str, 0, sizeof(str)); + if (end) + strncpy(str, buf, min_t(int, sizeof(str), end - buf)); + else + strncpy(str, buf, min_t(int, sizeof(str), strlen(buf))); + + if (!strcmp(str, "*")) { + nid = LNET_NID_ANY; + } else { + nid = libcfs_str2nid(str); + if (nid == LNET_NID_ANY) + goto ignore; + } + rsi->rsi_nosquash_nids[rsi->rsi_n_nosquash_nids++] = nid; +ignore: + if (!end || (*(end + 1) == 0)) + return; + buf = end + 1; + } +} + /* used by MGS to process specific configurations */ static int mdt_process_config(const struct lu_context *ctx, struct lu_device *d, struct lustre_cfg *cfg) @@ -3086,26 +3241,90 @@ static int mdt_process_config(const struct lu_context *ctx, struct mdt_device *m = mdt_dev(d); struct md_device *md_next = m->mdt_child; struct lu_device *next = md2lu_dev(md_next); - int err; + int rc = 0; ENTRY; switch (cfg->lcfg_command) { + case LCFG_PARAM: { + int i; + + for (i = 1; i < cfg->lcfg_bufcount; i++) { + char *key, *val; + + key = lustre_cfg_buf(cfg, i); + val = strchr(key, '='); + if (!val || (*(val + 1) == 0)) { + CERROR("Can't parse param %s\n", key); + rc = -EINVAL; + /* continue parsing other params */ + continue; + } + + val++; + if (class_match_param(key, + PARAM_GSS_SUPPORT, 0) == 0) { + if (memcmp(val, "no", 2) == 0) { + m->no_gss_support = 1; + } else if (memcmp(val, "yes", 3) == 0) { + m->no_gss_support = 0; + } else { + CERROR("Can't parse param %s\n", key); + rc = -EINVAL; + /* continue parsing other params */ + continue; + } + } else if (class_match_param(key, + PARAM_ROOTSQUASH_UID, 0) == 0) { + if (!m->mdt_rootsquash_info) + OBD_ALLOC_PTR(m->mdt_rootsquash_info); + if (!m->mdt_rootsquash_info) + RETURN(-ENOMEM); + + m->mdt_rootsquash_info->rsi_uid = + simple_strtoul(val, NULL, 0); + } else if (class_match_param(key, + PARAM_ROOTSQUASH_GID, 0) == 0) { + if (!m->mdt_rootsquash_info) + OBD_ALLOC_PTR(m->mdt_rootsquash_info); + if (!m->mdt_rootsquash_info) + RETURN(-ENOMEM); + + m->mdt_rootsquash_info->rsi_gid = + simple_strtoul(val, NULL, 0); + } else if (class_match_param(key, + PARAM_ROOTSQUASH_SKIPS, 0) == 0) { + if (!m->mdt_rootsquash_info) + OBD_ALLOC_PTR(m->mdt_rootsquash_info); + if (!m->mdt_rootsquash_info) + RETURN(-ENOMEM); + + do_process_nosquash_nids(m, val); + } else { + rc = -EINVAL; + } + } + + if (rc) + /* others are passed further */ + rc = next->ld_ops->ldo_process_config(ctx, next, cfg); + break; + } case LCFG_ADD_MDC: /* * Add mdc hook to get first MDT uuid and connect it to * ls->controller to use for seq manager. */ - err = mdt_seq_init_cli(ctx, mdt_dev(d), cfg); - if (err) { + rc = mdt_seq_init_cli(ctx, mdt_dev(d), cfg); + if (rc) { CERROR("can't initialize controller export, " - "rc %d\n", err); + "rc %d\n", rc); } default: /* others are passed further */ - err = next->ld_ops->ldo_process_config(ctx, next, cfg); + rc = next->ld_ops->ldo_process_config(ctx, next, cfg); break; } - RETURN(err); + RETURN(rc); } static struct lu_object *mdt_object_alloc(const struct lu_context *ctxt, @@ -3190,6 +3409,8 @@ static int mdt_connect_internal(struct obd_export *exp, struct mdt_device *mdt, struct obd_connect_data *data) { + __u64 flags; + if (data != NULL) { data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; data->ocd_ibits_known &= MDS_INODELOCK_FULL; @@ -3218,6 +3439,14 @@ static int mdt_connect_internal(struct obd_export *exp, mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); return -EBADE; } + + flags = OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT; + if ((exp->exp_connect_flags & flags) == flags) { + CWARN("%s: both local and remote client flags are set\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + return 0; } @@ -3341,6 +3570,8 @@ static int mdt_destroy_export(struct obd_export *export) ENTRY; med = &export->exp_mdt_data; + if (med->med_rmtclient) + mdt_cleanup_idmap(med); target_destroy_export(export); @@ -3419,7 +3650,7 @@ static int mdt_upcall(const struct lu_context *ctx, struct md_device *md, case MD_LOV_SYNC: rc = next->md_ops->mdo_maxsize_get(ctx, next, &m->mdt_max_mdsize, - &m->mdt_max_cookiesize); + &m->mdt_max_cookiesize, NULL); CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n", m->mdt_max_mdsize, m->mdt_max_cookiesize); break; diff --git a/lustre/mdt/mdt_identity.c b/lustre/mdt/mdt_identity.c new file mode 100644 index 0000000..17f78b8 --- /dev/null +++ b/lustre/mdt/mdt_identity.c @@ -0,0 +1,307 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao + * Author: Fan Yong + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mdt_internal.h" + +static void mdt_identity_entry_init(struct upcall_cache_entry *entry, + void *unused) +{ + entry->u.identity.mi_uc_entry = entry; +} + +static void mdt_identity_entry_free(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_identity *identity = &entry->u.identity; + + if (identity->mi_ginfo) + groups_free(identity->mi_ginfo); + + if (identity->mi_nperms) { + LASSERT(identity->mi_perms); + OBD_FREE(identity->mi_perms, + identity->mi_nperms * sizeof(struct mdt_setxid_perm)); + } +} + +static int mdt_identity_do_upcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + char keystr[16]; + char *argv[] = { + [0] = cache->uc_upcall, + [1] = cache->uc_name, + [2] = keystr, + [3] = NULL + }; + char *envp[] = { + [0] = "HOME=/", + [1] = "PATH=/sbin:/usr/sbin", + [2] = NULL + }; + int rc; + ENTRY; + + snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key); + + LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!"); + CDEBUG(D_INFO, "The upcall is: %s \n", cache->uc_upcall); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0) { + CERROR("%s: error invoking upcall %s %s %s: rc %d; " + "check /proc/fs/lustre/mdt/%s/identity_upcall\n", + cache->uc_name, argv[0], argv[1], argv[2], rc, + cache->uc_name); + } else { + CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", cache->uc_name, + argv[0], argv[1], argv[2]); + rc = 0; + } + RETURN(rc); +} + +static int mdt_identity_parse_downcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + void *args) +{ + struct mdt_identity *identity = &entry->u.identity; + struct identity_downcall_data *data = args; + struct group_info *ginfo; + struct mdt_setxid_perm *perms = NULL; + int size, i; + ENTRY; + + LASSERT(data); + if (data->idd_ngroups > NGROUPS_MAX) + return -E2BIG; + + ginfo = groups_alloc(data->idd_ngroups); + if (!ginfo) { + CERROR("failed to alloc %d groups\n", data->idd_ngroups); + RETURN(-ENOMEM); + } + + groups_from_list(ginfo, data->idd_groups); + groups_sort(ginfo); + identity->mi_ginfo = ginfo; + + if (data->idd_nperms) { + size = data->idd_nperms * sizeof(*perms); + OBD_ALLOC(perms, size); + if (!perms) { + CERROR("failed to alloc %d permissions\n", + data->idd_nperms); + put_group_info(ginfo); + RETURN(-ENOMEM); + } + for (i = 0; i < data->idd_nperms; i++) { + perms[i].mp_nid = data->idd_perms[i].pdd_nid; + perms[i].mp_perm = data->idd_perms[i].pdd_perm; + } + } + + identity->mi_uid = data->idd_uid; + identity->mi_gid = data->idd_gid; + identity->mi_ginfo = ginfo; + identity->mi_nperms = data->idd_nperms; + identity->mi_perms = perms; + + CDEBUG(D_OTHER, "parse mdt identity@%p: %d:%d, ngroups %u, nperms %u\n", + identity, identity->mi_uid, identity->mi_gid, + identity->mi_ginfo->ngroups, identity->mi_nperms); + + RETURN(0); +} + +struct mdt_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid) +{ + struct upcall_cache_entry *entry; + + entry = upcall_cache_get_entry(cache, (__u64)uid, NULL); + if (IS_ERR(entry)) { + CERROR("upcall_cache_get_entry failed: %ld\n", PTR_ERR(entry)); + return NULL; + } + + return &entry->u.identity; +} + +#if 0 +struct mdt_identity *mdt_identity_get(struct mdt_thread_info *info, + struct upcall_cache *cache, __u32 uid) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct lvfs_run_ctxt saved; + struct obd_device *obd = req->rq_export->exp_obd; + struct upcall_cache_entry *entry; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &info->mti_uc); + entry = upcall_cache_get_entry(cache, (__u64)uid, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &info->mti_uc); + if (IS_ERR(entry)) { + CERROR("upcall_cache_get_entry failed: %ld\n", PTR_ERR(entry)); + return NULL; + } + + return &entry->u.identity; +} +#endif + +void mdt_identity_put(struct upcall_cache *cache, struct mdt_identity *identity) +{ + LASSERT(identity); + upcall_cache_put_entry(cache, identity->mi_uc_entry); +} + +struct upcall_cache_ops mdt_identity_upcall_cache_ops = { + .init_entry = mdt_identity_entry_init, + .free_entry = mdt_identity_entry_free, + .do_upcall = mdt_identity_do_upcall, + .parse_downcall = mdt_identity_parse_downcall, +}; + +void mdt_flush_identity(struct upcall_cache *cache, __u32 uid) +{ + if (uid == -1) + upcall_cache_flush_idle(cache); + else + upcall_cache_flush_one(cache, (__u64)uid, NULL); +} + +__u32 mdt_identity_get_setxid_perm(struct mdt_identity *identity, + __u32 is_rmtclient, lnet_nid_t nid) +{ + struct mdt_setxid_perm *perm = identity->mi_perms; + int i; + + for (i = 0; i < identity->mi_nperms; i++) { + if ((perm[i].mp_nid != LNET_NID_ANY) && (perm[i].mp_nid != nid)) + continue; + return perm[i].mp_perm; + } + + /* default */ + return is_rmtclient ? 0 : LUSTRE_SETGRP_PERM; +} + +int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o, + void *buf) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = &info->mti_uc; + struct md_object *next = mdt_object_child(o); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_remote_perm *perm = buf; + + ENTRY; + + /* remote client request always pack ptlrpc_user_desc! */ + LASSERT(perm); + + if (!med->med_rmtclient) + RETURN(-EBADE); + + perm->rp_uid = uc->mu_o_uid; + perm->rp_gid = uc->mu_o_gid; + perm->rp_fsuid = uc->mu_o_fsuid; + perm->rp_fsgid = uc->mu_o_fsgid; + + perm->rp_access_perm = 0; + if (mo_permission(info->mti_ctxt, next, MAY_READ, &info->mti_uc) == 0) + perm->rp_access_perm |= MAY_READ; + if (mo_permission(info->mti_ctxt, next, MAY_WRITE, &info->mti_uc) == 0) + perm->rp_access_perm |= MAY_WRITE; + if (mo_permission(info->mti_ctxt, next, MAY_EXEC, &info->mti_uc) == 0) + perm->rp_access_perm |= MAY_EXEC; + + RETURN(0); +} + +#if 0 +int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o, + void *buf) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct lvfs_ucred *uc = &info->mti_uc; + struct md_object *next = mdt_object_child(o); + struct mdt_export_data *med = mdt_req2med(req); + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct mdt_remote_perm *perm = buf; + int rc; + ENTRY; + + /* remote client request always pack ptlrpc_user_desc! */ + LASSERT(pud); + LASSERT(perm); + + if (!med->med_rmtclient) + RETURN(-EBADE); + + perm->rp_uid = pud->pud_uid; + perm->rp_gid = pud->pud_gid; + perm->rp_fsuid = pud->pud_fsuid; + perm->rp_fsgid = pud->pud_fsgid; + + rc = mdt_remote_perm_reverse_idmap(req, perm); + if (rc) + RETURN(rc); + + return mo_permission(ctxt, &info->mti_uc, next, + (MAY_EXEC | MAY_WRITE | MAY_READ), + &perm->rp_access_perm); +} +#endif diff --git a/lustre/mdt/mdt_idmap.c b/lustre/mdt/mdt_idmap.c new file mode 100644 index 0000000..88f67fe --- /dev/null +++ b/lustre/mdt/mdt_idmap.c @@ -0,0 +1,739 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao + * Author: Fan Yong + + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mdt_internal.h" + +enum { + MDT_IDMAP_NOTFOUND = -1, +}; + +struct mdt_idmap_entry { + struct list_head mie_rmt_hash; /* hashed as mie_rmt_id; */ + struct list_head mie_lcl_hash; /* hashed as mie_lcl_id; */ + int mie_refcount; + uid_t mie_rmt_id; /* remote uid/gid */ + uid_t mie_lcl_id; /* local uid/gid */ +}; + +/* uid/gid mapping */ +static struct mdt_idmap_table *mdt_idmap_alloc(void) +{ + struct mdt_idmap_table *tbl; + int i, j; + + OBD_ALLOC_PTR(tbl); + if (!tbl) + return NULL; + + spin_lock_init(&tbl->mit_lock); + for (i = 0; i < ARRAY_SIZE(tbl->mit_idmaps); i++) + for (j = 0; j < ARRAY_SIZE(tbl->mit_idmaps[i]); j++) + INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]); + + return tbl; +} + +static struct mdt_idmap_entry *idmap_entry_alloc(__u32 mie_rmt_id, + __u32 mie_lcl_id) +{ + struct mdt_idmap_entry *e; + + OBD_ALLOC_PTR(e); + if (!e) + return NULL; + + INIT_LIST_HEAD(&e->mie_rmt_hash); + INIT_LIST_HEAD(&e->mie_lcl_hash); + e->mie_refcount = 1; + e->mie_rmt_id = mie_rmt_id; + e->mie_lcl_id = mie_lcl_id; + + return e; +} + +static void idmap_entry_free(struct mdt_idmap_entry *e) +{ + if (!list_empty(&e->mie_rmt_hash)) + list_del(&e->mie_rmt_hash); + if (!list_empty(&e->mie_lcl_hash)) + list_del(&e->mie_lcl_hash); + OBD_FREE_PTR(e); +} + +int mdt_init_idmap(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *client = libcfs_nid2str(req->rq_peer.nid); + struct mdt_export_data *med = mdt_req2med(req); + struct obd_device *obd = req->rq_export->exp_obd; + struct obd_connect_data *data, *reply; + int rc = 0, remote; + ENTRY; + + data = req_capsule_client_get(&info->mti_pill, &RMF_CONNECT_DATA); + reply = req_capsule_server_get(&info->mti_pill, &RMF_CONNECT_DATA); + if (data == NULL || reply == NULL) + RETURN(-EFAULT); + + remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT; + + if (req->rq_auth_uid == INVALID_UID) { + if (remote) + CWARN("client %s -> target %s null sec is used, force " + "to be local!\n", client, obd->obd_name); + } else { + if (remote) { + if (!req->rq_auth_remote) + CWARN("client %s -> target %s local realm asked" + " to be remote!\n", + client, obd->obd_name); + med->med_rmtclient = 1; + med->med_nllu = data->ocd_nllu; + med->med_nllg = data->ocd_nllg; + } else if (req->rq_auth_remote) { + CWARN("client %s -> target %s remote realm asked to be " + "local!\n", client, obd->obd_name); + } + } + + if (med->med_rmtclient) { + if (!med->med_idmap) + med->med_idmap = mdt_idmap_alloc(); + if (!med->med_idmap) { + CERROR("client %s -> target %s failed to alloc idmap!\n" + , client, obd->obd_name); + RETURN(-ENOMEM); + } + + reply->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; + reply->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; + CDEBUG(D_SEC, "client %s -> target %s is remote.\n", + client, obd->obd_name); + + /* NB, MDT_CONNECT establish root idmap too! */ + rc = mdt_handle_idmap(info); + } else { + reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; + reply->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; + } + + RETURN(rc); +} + +static void idmap_clear_mie_rmt_hash(struct list_head *list) +{ + struct mdt_idmap_entry *e; + int i; + + for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) { + while (!list_empty(&list[i])) { + e = list_entry(list[i].next, struct mdt_idmap_entry, + mie_rmt_hash); + idmap_entry_free(e); + } + } +} + +void mdt_cleanup_idmap(struct mdt_export_data *med) +{ + struct mdt_idmap_table *tbl = med->med_idmap; + int i; + + LASSERT(med->med_rmtclient); + LASSERT(tbl); + + spin_lock(&tbl->mit_lock); + idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_UIDMAP_IDX]); + idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_GIDMAP_IDX]); + + /* paranoid checking */ + for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) { + LASSERT(list_empty(&tbl->mit_idmaps[LCL_UIDMAP_IDX][i])); + LASSERT(list_empty(&tbl->mit_idmaps[LCL_GIDMAP_IDX][i])); + } + spin_unlock(&tbl->mit_lock); + + OBD_FREE_PTR(tbl); + med->med_idmap = NULL; +} + +static inline void mdd_revoke_export_locks(struct obd_export *exp) +{ + if (!exp->exp_mdt_data.med_rmtclient) + return; + + /* don't revoke locks during recovery */ + if (exp->exp_obd->obd_recovering) + return; + + ldlm_revoke_export_locks(exp); +} + +static +struct mdt_idmap_entry *idmap_search_entry(struct list_head *mie_rmt_hash, + uid_t mie_rmt_id, uid_t mie_lcl_id, + const char *warn_msg) +{ + struct list_head *rmt_head = + &mie_rmt_hash[MDT_IDMAP_HASHFUNC(mie_rmt_id)]; + struct mdt_idmap_entry *e; + + list_for_each_entry(e, rmt_head, mie_rmt_hash) { + if ((e->mie_rmt_id == mie_rmt_id) && + (e->mie_lcl_id == mie_lcl_id)) { + e->mie_refcount++; + return e; + } + if ((e->mie_rmt_id == mie_rmt_id) && warn_msg) + CWARN("%s: rmt id %u already map to %u (new %u)\n", + warn_msg, e->mie_rmt_id, e->mie_lcl_id, + mie_lcl_id); + if ((e->mie_lcl_id == mie_lcl_id) && warn_msg) + CWARN("%s: lcl id %u already be mapped from %u " + "(new %u)\n", warn_msg, + e->mie_lcl_id, e->mie_rmt_id, mie_rmt_id); + } + return NULL; +} + +static int idmap_insert_entry(struct list_head *mie_rmt_hash, + struct list_head *mie_lcl_hash, + struct mdt_idmap_entry *new, + const char *warn_msg) +{ + struct list_head *rmt_head = + &mie_rmt_hash[MDT_IDMAP_HASHFUNC(new->mie_rmt_id)]; + struct list_head *lcl_head = + &mie_lcl_hash[MDT_IDMAP_HASHFUNC(new->mie_lcl_id)]; + struct mdt_idmap_entry *e; + + e = idmap_search_entry(mie_rmt_hash, + new->mie_rmt_id, new->mie_lcl_id, warn_msg); + if (!e) { + list_add_tail(&new->mie_rmt_hash, rmt_head); + list_add_tail(&new->mie_lcl_hash, lcl_head); + return 0; + } else { + return 1; + } +} + +static int idmap_remove_entry(struct list_head *mie_rmt_hash, + struct list_head *mie_lcl_hash, + __u32 mie_rmt_id, __u32 mie_lcl_id) +{ + struct mdt_idmap_entry *e; + + e = idmap_search_entry(mie_rmt_hash, + mie_rmt_id, mie_lcl_id, NULL); + if (e) { + e->mie_refcount -= 2; + if (e->mie_refcount <= 0) { + list_del(&e->mie_rmt_hash); + list_del(&e->mie_lcl_hash); + OBD_FREE_PTR(e); + return 0; + } else { + return 1; + } + } else { + return -ENOENT; + } +} + +static int mdt_idmap_add(struct mdt_idmap_table *tbl, + uid_t ruid, uid_t luid, + gid_t rgid, gid_t lgid) +{ + struct mdt_idmap_entry *ue, *ge; + ENTRY; + + LASSERT(tbl); + + spin_lock(&tbl->mit_lock); + ue = idmap_search_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], + ruid, luid, "UID mapping"); + spin_unlock(&tbl->mit_lock); + if (!ue) { + ue = idmap_entry_alloc(ruid, luid); + if (!ue) + RETURN(-ENOMEM); + + spin_lock(&tbl->mit_lock); + if (idmap_insert_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], + tbl->mit_idmaps[LCL_UIDMAP_IDX], + ue, "UID mapping")) + idmap_entry_free(ue); + spin_unlock(&tbl->mit_lock); + } + + spin_lock(&tbl->mit_lock); + ge = idmap_search_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], + rgid, lgid, "GID mapping"); + spin_unlock(&tbl->mit_lock); + if (!ge) { + ge = idmap_entry_alloc(rgid, lgid); + spin_lock(&tbl->mit_lock); + if (!ge) { + ue->mie_refcount--; + if (ue->mie_refcount <= 0) { + list_del(&ue->mie_rmt_hash); + list_del(&ue->mie_lcl_hash); + OBD_FREE_PTR(ue); + } + spin_unlock(&tbl->mit_lock); + RETURN(-ENOMEM); + } + + if (idmap_insert_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], + tbl->mit_idmaps[LCL_GIDMAP_IDX], + ge, "GID mapping")) + idmap_entry_free(ge); + spin_unlock(&tbl->mit_lock); + } + + RETURN(0); +} + +static int mdt_idmap_del(struct mdt_idmap_table *tbl, + uid_t ruid, uid_t luid, + gid_t rgid, gid_t lgid) +{ + ENTRY; + + if (!tbl) + RETURN(0); + + spin_lock(&tbl->mit_lock); + idmap_remove_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX], + tbl->mit_idmaps[LCL_UIDMAP_IDX], + ruid, luid); + idmap_remove_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX], + tbl->mit_idmaps[LCL_GIDMAP_IDX], + rgid, lgid); + spin_unlock(&tbl->mit_lock); + + RETURN(0); +} + +int mdt_handle_idmap(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_device *mdt = info->mti_mdt; + struct mdt_export_data *med; + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct mdt_identity *identity; + __u32 opc; + int rc = 0; + + ENTRY; + + if (!req->rq_export) + RETURN(0); + + med = mdt_req2med(req); + if (!med->med_rmtclient) + RETURN(0); + + if (req->rq_auth_usr_mdt) + RETURN(0); + + opc = lustre_msg_get_opc(req->rq_reqmsg); + /* Bypass other opc */ + if ((opc != SEC_CTX_INIT) && (opc != SEC_CTX_INIT_CONT) && + (opc != SEC_CTX_FINI) && (opc != MDS_CONNECT)) + RETURN(0); + + LASSERT(pud); + LASSERT(med->med_idmap); + + if (mdt->no_gss_support) { + CWARN("The server is running with no GSS support now! " + "and don't permit remote client to access!\n"); + RETURN(-EACCES); + } + + if (req->rq_auth_mapped_uid == INVALID_UID) { + CERROR("invalid authorized mapped uid, please check " + "/etc/lustre/idmap.conf!\n"); + RETURN(-EACCES); + } + + identity = mdt_identity_get(mdt->mdt_identity_cache, + req->rq_auth_mapped_uid); + if (!identity) { + CERROR("can't get mdt identity(%u), no mapping added\n", + req->rq_auth_mapped_uid); + RETURN(-EACCES); + } + + switch (opc) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case MDS_CONNECT: + rc = mdt_idmap_add(med->med_idmap, + pud->pud_uid, identity->mi_uid, + pud->pud_gid, identity->mi_gid); + break; + case SEC_CTX_FINI: + rc = mdt_idmap_del(med->med_idmap, + pud->pud_uid, identity->mi_uid, + pud->pud_gid, identity->mi_gid); + break; + } + + mdt_identity_put(mdt->mdt_identity_cache, identity); + + if (rc) + RETURN(rc); + + switch (opc) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: + mdd_revoke_export_locks(req->rq_export); + break; + } + RETURN(0); +} + +static __u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id) +{ + struct list_head *head = &hash[MDT_IDMAP_HASHFUNC(id)]; + struct mdt_idmap_entry *e; + + if (!reverse) { + list_for_each_entry(e, head, mie_rmt_hash) { + if (e->mie_rmt_id == id) + return e->mie_lcl_id; + } + } else { + list_for_each_entry(e, head, mie_lcl_hash) { + if (e->mie_lcl_id == id) + return e->mie_rmt_id; + } + } + return MDT_IDMAP_NOTFOUND; +} + +static int mdt_idmap_lookup_uid(struct mdt_idmap_table *tbl, int reverse, + uid_t uid) +{ + struct list_head *hash; + + if (!tbl) + return MDT_IDMAP_NOTFOUND; + + hash = tbl->mit_idmaps[reverse ? LCL_UIDMAP_IDX : RMT_UIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + uid = idmap_lookup_id(hash, reverse, uid); + spin_unlock(&tbl->mit_lock); + + return uid; +} + +static int mdt_idmap_lookup_gid(struct mdt_idmap_table *tbl, int reverse, + gid_t gid) +{ + struct list_head *hash; + + if (!tbl) + return MDT_IDMAP_NOTFOUND; + + hash = tbl->mit_idmaps[reverse ? LCL_GIDMAP_IDX : RMT_GIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + gid = idmap_lookup_id(hash, reverse, gid); + spin_unlock(&tbl->mit_lock); + + return gid; +} + +int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *req, + struct ptlrpc_user_desc *pud) +{ + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + uid_t uid, fsuid; + gid_t gid, fsgid; + + /* Only remote client need desc_to_idmap. */ + if (!med->med_rmtclient) + return 0; + + if (req->rq_auth_usr_mdt) + return 0; + + uid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_uid); + if (uid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for uid %u\n", pud->pud_uid); + return -EACCES; + } + + if (pud->pud_uid == pud->pud_fsuid) { + fsuid = uid; + } else { + fsuid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_fsuid); + if (fsuid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsuid %u\n", pud->pud_fsuid); + return -EACCES; + } + } + + gid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_gid); + if (gid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for gid %u\n", pud->pud_gid); + return -EACCES; + } + + if (pud->pud_gid == pud->pud_fsgid) { + fsgid = gid; + } else { + fsgid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_fsgid); + if (fsgid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsgid %u\n", pud->pud_fsgid); + return -EACCES; + } + } + + pud->pud_uid = uid; + pud->pud_gid = gid; + pud->pud_fsuid = fsuid; + pud->pud_fsgid = fsgid; + +#if 0 + /* remote client doesn't support setgroups */ + if (med->med_rmtclient) + return 0; + + for (i = 0; i < pud->pud_ngroups; i++) { + gid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_groups[i]); + if (gid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for gid %u\n", pud->pud_gid); + return -EACCES; + } + pud->pud_groups[i] = gid; + } +#endif + + return 0; +} + +/* reverse map */ +void mdt_body_reverse_idmap(struct mdt_thread_info *info, struct mdt_body *body) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = &info->mti_uc; + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + uid_t uid; + gid_t gid; + + if (!med->med_rmtclient) + return; + + if (req->rq_auth_usr_mdt) + return; + + if (body->valid & OBD_MD_FLUID) { + if (body->uid == uc->mu_uid) + uid = uc->mu_o_uid; + else if (body->uid == uc->mu_fsuid) + uid = uc->mu_o_fsuid; + else + uid = mdt_idmap_lookup_uid(idmap, 1, body->uid); + + if (uid == MDT_IDMAP_NOTFOUND) { + uid = med->med_nllu; + if (body->valid & OBD_MD_FLMODE) + body->mode = (body->mode & ~S_IRWXU) | + ((body->mode & S_IRWXO) << 6); + } + + body->uid = uid; + } + + if (body->valid & OBD_MD_FLGID) { + if (body->gid == uc->mu_gid) + gid = uc->mu_o_gid; + else if (body->gid == uc->mu_fsgid) + gid = uc->mu_o_fsgid; + else + gid = mdt_idmap_lookup_gid(idmap, 1, body->gid); + + if (gid == MDT_IDMAP_NOTFOUND) { + gid = med->med_nllg; + if (body->valid & OBD_MD_FLMODE) + body->mode = (body->mode & ~S_IRWXG) | + ((body->mode & S_IRWXO) << 3); + } + + body->gid = gid; + } +} + +/* NB: return error if no mapping, so this will look strange: + * if client hasn't kinit the to map xid for the mapped xid, client + * will always get -EPERM, and the same for rootsquash case. */ +int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *req, + struct mdt_remote_perm *perm) +{ + struct mdt_export_data *med = mdt_req2med(req); + uid_t uid, fsuid; + gid_t gid, fsgid; + + LASSERT(med->med_rmtclient); + + if (req->rq_auth_usr_mdt) + return 0; + + uid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_uid); + if (uid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for uid %u\n", perm->rp_uid); + return -EPERM; + } + + gid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_gid); + if (gid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for gid %u\n", perm->rp_gid); + return -EPERM; + } + + fsuid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_fsuid); + if (fsuid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsuid %u\n", perm->rp_fsuid); + return -EPERM; + } + + fsgid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_fsgid); + if (fsgid == MDT_IDMAP_NOTFOUND) { + CERROR("no mapping for fsgid %u\n", perm->rp_fsgid); + return -EPERM; + } + + perm->rp_uid = uid; + perm->rp_gid = gid; + perm->rp_fsuid = fsuid; + perm->rp_fsgid = fsgid; + return 0; +} + +int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = &info->mti_uc; + struct lu_attr *attr = &info->mti_attr.ma_attr; + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_idmap_table *idmap = med->med_idmap; + + ENTRY; + + if (!med->med_rmtclient) + RETURN(0); + + if (req->rq_auth_usr_mdt) + RETURN(0); + + if (op != REINT_SETATTR) { + if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) + attr->la_uid = uc->mu_fsuid; + if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) + attr->la_gid = uc->mu_fsgid; + } else { + /* NB: -1 case will be handled by mdt_fix_attr() later. */ + if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) { + uid_t uid; + + if (attr->la_uid == uc->mu_o_uid) + uid = uc->mu_uid; + else if (attr->la_uid == uc->mu_o_fsuid) + uid = uc->mu_fsuid; + else + uid = mdt_idmap_lookup_uid(idmap, 0, + attr->la_uid); + + if (uid == MDT_IDMAP_NOTFOUND) { + CWARN("Deny chown to uid %u\n", attr->la_uid); + RETURN(-EPERM); + } + + attr->la_uid = uid; + } + if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) { + gid_t gid; + + if (attr->la_gid == uc->mu_o_gid) + gid = uc->mu_gid; + else if (attr->la_gid == uc->mu_o_fsgid) + gid = uc->mu_fsgid; + else + gid = mdt_idmap_lookup_gid(idmap, 0, + attr->la_gid); + + if (gid == MDT_IDMAP_NOTFOUND) { + CWARN("Deny chown to gid %u\n", attr->la_gid); + RETURN(-EPERM); + } + + attr->la_gid = gid; + } + } + + RETURN(0); +} diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 627d838..d37743a 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -53,6 +53,8 @@ #include /* LR_CLIENT_SIZE, etc. */ #include +#include +#include /* Data stored per client in the last_rcvd file. In le32 order. */ @@ -159,6 +161,13 @@ struct mdt_device { struct mdt_server_data mdt_msd; spinlock_t mdt_client_bitmap_lock; unsigned long mdt_client_bitmap[(LR_MAX_CLIENTS >> 3) / sizeof(long)]; + + struct upcall_cache *mdt_identity_cache; + struct upcall_cache *mdt_rmtacl_cache; + + /* root squash */ + struct rootsquash_info *mdt_rootsquash_info; + int no_gss_support; }; /*XXX copied from mds_internal.h */ @@ -243,6 +252,10 @@ struct mdt_thread_info { */ struct mdt_object *mti_object; /* + * User credential. + */ + struct md_ucred mti_uc; + /* * Object attributes. */ struct md_attr mti_attr; @@ -393,7 +406,8 @@ void mdt_lock_handle_fini(struct mdt_lock_handle *lh); void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *); -int mdt_fs_setup(const struct lu_context *, struct mdt_device *); +int mdt_fs_setup(const struct lu_context *, struct mdt_device *, + struct obd_device *); void mdt_fs_cleanup(const struct lu_context *, struct mdt_device *); int mdt_client_del(const struct lu_context *ctxt, @@ -436,6 +450,57 @@ void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *); void mdt_dump_lmm(int level, const struct lov_mds_md *lmm); +int mdt_init_ucred(struct mdt_thread_info *, struct mdt_body *); + +int mdt_init_ucred_reint(struct mdt_thread_info *); + +void mdt_exit_ucred(struct mdt_thread_info *); + +int groups_from_list(struct group_info *, gid_t *); + +void groups_sort(struct group_info *); + +/* mdt_idmap.c */ +int mdt_init_idmap(struct mdt_thread_info *); + +void mdt_cleanup_idmap(struct mdt_export_data *); + +int mdt_handle_idmap(struct mdt_thread_info *); + +int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *, + struct ptlrpc_user_desc *); + +void mdt_body_reverse_idmap(struct mdt_thread_info *, + struct mdt_body *); + +int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *, + struct mdt_remote_perm *); + +int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32); + +/* mdt/mdt_identity.c */ +#define MDT_IDENTITY_UPCALL_PATH "/usr/sbin/l_getidentity" + +extern struct upcall_cache_ops mdt_identity_upcall_cache_ops; + +struct mdt_identity *mdt_identity_get(struct upcall_cache *, __u32); + +void mdt_identity_put(struct upcall_cache *, struct mdt_identity *); + +void mdt_flush_identity(struct upcall_cache *, __u32); + +__u32 mdt_identity_get_setxid_perm(struct mdt_identity *, __u32, lnet_nid_t); + +int mdt_pack_remote_perm(struct mdt_thread_info *, struct mdt_object *, void *); + +/* mdt/mdt_rmtacl.c */ +#define MDT_RMTACL_UPCALL_PATH "/usr/sbin/l_facl" + +extern struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops; + +int mdt_rmtacl_upcall(struct mdt_thread_info *, unsigned long, + char *, char *, int); + extern struct lu_context_key mdt_thread_key; /* debug issues helper starts here*/ static inline void mdt_fail_write(const struct lu_context *ctx, @@ -450,6 +515,11 @@ static inline void mdt_fail_write(const struct lu_context *ctx, } } +static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req) +{ + return &req->rq_export->exp_mdt_data; +} + #define MDT_FAIL_CHECK(id) \ ({ \ if (OBD_FAIL_CHECK(id)) \ diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 0b5fd93..9de4efe 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -41,6 +41,391 @@ #include "mdt_internal.h" +int groups_from_list(struct group_info *ginfo, gid_t *glist) +{ + int i; + int count = ginfo->ngroups; + + /* fill group_info from gid array */ + for (i = 0; i < ginfo->nblocks; i++) { + int cp_count = min(NGROUPS_PER_BLOCK, count); + int off = i * NGROUPS_PER_BLOCK; + int len = cp_count * sizeof(*glist); + + if (memcpy(ginfo->blocks[i], glist + off, len)) + return -EFAULT; + + count -= cp_count; + } + return 0; +} + +/* groups_sort() is copied from linux kernel! */ +/* a simple shell-metzner sort */ +void groups_sort(struct group_info *group_info) +{ + int base, max, stride; + int gidsetsize = group_info->ngroups; + + for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) + ; /* nothing */ + stride /= 3; + + while (stride) { + max = gidsetsize - stride; + for (base = 0; base < max; base++) { + int left = base; + int right = left + stride; + gid_t tmp = GROUP_AT(group_info, right); + + while (left >= 0 && GROUP_AT(group_info, left) > tmp) { + GROUP_AT(group_info, right) = + GROUP_AT(group_info, left); + right = left; + left -= stride; + } + GROUP_AT(group_info, right) = tmp; + } + stride /= 3; + } +} + +void mdt_exit_ucred(struct mdt_thread_info *info) +{ + struct md_ucred *uc = &info->mti_uc; + struct mdt_device *mdt = info->mti_mdt; + + if (uc->mu_valid != UCRED_INIT) { + if (uc->mu_ginfo && (uc->mu_valid != UCRED_OLD)) { + groups_free(uc->mu_ginfo); + uc->mu_ginfo = NULL; + } + if (uc->mu_identity) { + mdt_identity_put(mdt->mdt_identity_cache, + uc->mu_identity); + uc->mu_identity = NULL; + } + uc->mu_valid = UCRED_INIT; + } +} + +static int nid_nosquash(struct mdt_device *mdt, lnet_nid_t nid) +{ + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + int i; + + for (i = 0; i < rsi->rsi_n_nosquash_nids; i++) + if (rsi->rsi_nosquash_nids[i] == nid || + rsi->rsi_nosquash_nids[i] == LNET_NID_ANY) + return 1; + + return 0; +} + +/* + * FIXME: here we follow simple rule: once uid/fsuid is root, we also squash + * the gid/fsgid, don't care setuid/setgid attributes. + * + * NB: don't change pud fields in root squash, because xid in pud will be + * packed in remote perm reply. + */ +static int mdt_squash_root(struct mdt_device *mdt, struct md_ucred *ucred, + struct ptlrpc_user_desc *pud, lnet_nid_t peernid) +{ + struct rootsquash_info *rsi = mdt->mdt_rootsquash_info; + + if (pud->pud_uid && pud->pud_fsuid) + return 0; + + if (!rsi || !rsi->rsi_uid || nid_nosquash(mdt, peernid)) + return 0; + + CDEBUG(D_SEC, "squash req from "LPX64":" + "(%u:%u-%u:%u/%x)=>(%u:%u-%u:%u/%x)\n", peernid, + pud->pud_uid, pud->pud_gid, + pud->pud_fsuid, pud->pud_fsgid, pud->pud_cap, + pud->pud_uid ? pud->pud_uid : rsi->rsi_uid, + pud->pud_uid ? pud->pud_gid : rsi->rsi_gid, + pud->pud_fsuid ? pud->pud_fsuid : rsi->rsi_uid, + pud->pud_fsuid ? pud->pud_fsgid : rsi->rsi_gid, + pud->pud_cap & ~CAP_FS_MASK); + + if (pud->pud_uid == 0) { + ucred->mu_uid = rsi->rsi_uid; + ucred->mu_gid = rsi->rsi_gid; + } else { + ucred->mu_uid = pud->pud_uid; + ucred->mu_gid = pud->pud_gid; + } + + if (pud->pud_fsuid == 0) { + ucred->mu_fsuid = rsi->rsi_uid; + ucred->mu_fsgid = rsi->rsi_gid; + } else { + ucred->mu_fsuid = pud->pud_fsuid; + ucred->mu_fsgid = pud->pud_fsgid; + } + + ucred->mu_cap &= (pud->pud_cap & ~CAP_FS_MASK); + + return 1; +} + +static int new_init_ucred(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = mdt_req2med(req); + struct mdt_device *mdt = info->mti_mdt; + struct ptlrpc_user_desc *pud = req->rq_user_desc; + struct md_ucred *ucred = &info->mti_uc; + struct mdt_identity *identity = NULL; + lnet_nid_t peernid = req->rq_peer.nid; + __u32 setxid_perm = 0; + int root_squashed = 0; + int rc = 0; + + ENTRY; + + ucred->mu_valid = UCRED_INVALID; + + if (mdt->no_gss_support && med->med_rmtclient) { + CWARN("The server is running with no GSS support now! " + "and don't permit remote client to access!\n"); + RETURN(-EACCES); + } + + if (req->rq_auth_gss && req->rq_auth_uid == INVALID_UID) { + CWARN("user not authenticated, deny access!\n"); + RETURN(-EACCES); + } + + ucred->mu_o_uid = pud->pud_uid; + ucred->mu_o_gid = pud->pud_gid; + ucred->mu_o_fsuid = pud->pud_fsuid; + ucred->mu_o_fsgid = pud->pud_fsgid; + + /* sanity check: if we use strong authentication, we expect the + * uid which client claimed is true */ + if (req->rq_auth_gss) { + if (med->med_rmtclient) { + if (ptlrpc_user_desc_do_idmap(req, pud)) + RETURN(-EACCES); + + if (req->rq_auth_mapped_uid != pud->pud_uid) { + CERROR("remote client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, + pud->pud_fsgid); + RETURN(-EACCES); + } + } else { + if (req->rq_auth_uid != pud->pud_uid) { + CERROR("local client "LPU64": auth uid %u " + "while client claim %u:%u/%u:%u\n", + peernid, req->rq_auth_uid, pud->pud_uid, + pud->pud_gid, pud->pud_fsuid, + pud->pud_fsgid); + RETURN(-EACCES); + } + } + } + + if (mdt->no_gss_support) + goto check_squash; + + identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + ucred->mu_fsuid); + RETURN(-EACCES); + } + + /* check setuid/setgid permissions */ + if (!req->rq_auth_usr_mdt) { + int setuid, setgid; + + /* find out the setuid/setgid attempt */ + setuid = (pud->pud_uid != pud->pud_fsuid); + setgid = (pud->pud_gid != pud->pud_fsgid || + pud->pud_gid != identity->mi_gid); + + setxid_perm = mdt_identity_get_setxid_perm(identity, + med->med_rmtclient, + peernid); + + /* check permission of setuid */ + if (setuid && !(setxid_perm & LUSTRE_SETUID_PERM)) { + CWARN("mdt blocked setuid attempt (%u -> %u) from " + LPX64"\n", pud->pud_uid, pud->pud_fsuid, peernid); + GOTO(out, rc = -EACCES); + } + + /* check permission of setgid */ + if (setgid && !(setxid_perm & LUSTRE_SETGID_PERM)) { + CWARN("mdt blocked setgid attempt (%u:%u/%u:%u -> %u) " + "from "LPX64"\n", pud->pud_uid, pud->pud_gid, + pud->pud_fsuid, pud->pud_fsgid, identity->mi_gid, + peernid); + GOTO(out, rc = -EACCES); + } + } + +check_squash: + /* FIXME: The exact behavior of root_squash is not defined. */ + if (!req->rq_auth_usr_mdt) + root_squashed = mdt_squash_root(mdt, ucred, pud, peernid); + if (!root_squashed) { + ucred->mu_uid = pud->pud_uid; + ucred->mu_gid = pud->pud_gid; + ucred->mu_fsuid = pud->pud_fsuid; + ucred->mu_fsgid = pud->pud_fsgid; + ucred->mu_cap = pud->pud_cap; + /* remove fs privilege for non-root user */ + if (pud->pud_fsuid) + ucred->mu_cap &= ~CAP_FS_MASK; + } + + /* by now every fields other than groups have been granted */ + ucred->mu_identity = identity; + + /* setgroups for local client with LUSTRE_SETGRP_PERM, and no_squash_ + * root, otherwise install groups from local user supplementary groups. + * + * NB: remote client not allowed to setgroups anyway. + */ + if (req->rq_auth_usr_mdt || + (pud->pud_ngroups && !med->med_rmtclient && !root_squashed && + (setxid_perm & LUSTRE_SETGRP_PERM))) { + struct group_info *ginfo; + + /* setgroups for local client */ + ginfo = groups_alloc(pud->pud_ngroups); + if (!ginfo) { + CERROR("failed to alloc %d groups\n", + pud->pud_ngroups); + GOTO(out, rc = -ENOMEM); + } + groups_from_list(ginfo, pud->pud_groups); + groups_sort(ginfo); + ucred->mu_ginfo = ginfo; + } else { + ucred->mu_ginfo = NULL; + } + + ucred->mu_valid = UCRED_NEW; + +out: + if (rc) + mdt_identity_put(mdt->mdt_identity_cache, identity); + + RETURN(rc); +} + +static int old_init_ucred(struct mdt_thread_info *info, + struct mdt_body *body) +{ + struct md_ucred *uc = &info->mti_uc; + struct mdt_device *mdt = info->mti_mdt; + struct mdt_identity *identity = NULL; + + ENTRY; + + uc->mu_valid = UCRED_INVALID; + + if (!mdt->no_gss_support) { + /* get identity info of this user */ + identity = mdt_identity_get(mdt->mdt_identity_cache, + body->fsuid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + body->fsuid); + RETURN(-EACCES); + } + } + + uc->mu_valid = UCRED_OLD; + uc->mu_o_uid = body->uid; + uc->mu_o_gid = body->gid; + uc->mu_o_fsuid = body->fsuid; + uc->mu_o_fsgid = body->fsgid; + uc->mu_uid = body->uid; + uc->mu_gid = body->gid; + uc->mu_fsuid = body->fsuid; + uc->mu_fsgid = body->fsgid; + uc->mu_cap = body->capability; + if (identity) + uc->mu_ginfo = identity->mi_ginfo; + else + uc->mu_ginfo = NULL; + uc->mu_identity = identity; + + RETURN(0); +} + +int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = &info->mti_uc; + + if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW)) + return 0; + + mdt_exit_ucred(info); + + /* !rq_user_desc means null security */ + return req->rq_user_desc ? new_init_ucred(info) : + old_init_ucred(info, body); +} + +static int old_init_ucred_reint(struct mdt_thread_info *info) +{ + struct md_ucred *uc = &info->mti_uc; + struct mdt_device *mdt = info->mti_mdt; + struct mdt_identity *identity = NULL; + + ENTRY; + + uc->mu_valid = UCRED_INVALID; + + if (!mdt->no_gss_support) { + /* get identity info of this user */ + identity = mdt_identity_get(mdt->mdt_identity_cache, + uc->mu_fsuid); + if (!identity) { + CERROR("Deny access without identity: uid %d\n", + uc->mu_fsuid); + RETURN(-EACCES); + } + } + + uc->mu_valid = UCRED_OLD; + uc->mu_o_uid = uc->mu_o_fsuid = uc->mu_uid = uc->mu_fsuid; + uc->mu_o_gid = uc->mu_o_fsgid = uc->mu_gid = uc->mu_fsgid; + if (identity) + uc->mu_ginfo = identity->mi_ginfo; + else + uc->mu_ginfo = NULL; + uc->mu_identity = identity; + + RETURN(0); +} + +int mdt_init_ucred_reint(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct md_ucred *uc = &info->mti_uc; + + if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW)) + return 0; + + mdt_exit_ucred(info); + + /* !rq_user_desc means null security */ + return req->rq_user_desc ? new_init_ucred(info) : + old_init_ucred_reint(info); +} + /* copied from lov/lov_ea.c, just for debugging, will be removed later */ void mdt_dump_lmm(int level, const struct lov_mds_md *lmm) { @@ -99,6 +484,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, if (ma->ma_valid & MA_INODE) mdt_pack_attr2body(repbody, la, mdt_object_fid(mo)); + mdt_body_reverse_idmap(info, repbody); if (ma->ma_valid & MA_LOV) { __u32 mode; @@ -168,6 +554,7 @@ static __u64 mdt_attr_valid_xlate(__u64 in, struct mdt_reint_record *rr, static int mdt_setattr_unpack_rec(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct md_attr *ma = &info->mti_attr; struct lu_attr *la = &ma->ma_attr; struct req_capsule *pill = &info->mti_pill; @@ -179,6 +566,12 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info) if (rec == NULL) RETURN(-EFAULT); + uc->mu_fsuid = rec->sa_fsuid; + uc->mu_fsgid = rec->sa_fsgid; + uc->mu_uid = rec->sa_uid; + uc->mu_gid = rec->sa_gid; + uc->mu_cap = rec->sa_cap; + rr->rr_fid1 = &rec->sa_fid; la->la_valid = mdt_attr_valid_xlate(rec->sa_valid, rr, ma); la->la_mode = rec->sa_mode; @@ -255,6 +648,7 @@ int mdt_close_unpack(struct mdt_thread_info *info) static int mdt_create_unpack(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct mdt_rec_create *rec; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; @@ -264,6 +658,10 @@ static int mdt_create_unpack(struct mdt_thread_info *info) rec = req_capsule_client_get(pill, &RMF_REC_CREATE); if (rec != NULL) { + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + rr->rr_fid1 = &rec->cr_fid1; rr->rr_fid2 = &rec->cr_fid2; attr->la_mode = rec->cr_mode; @@ -315,6 +713,7 @@ static int mdt_create_unpack(struct mdt_thread_info *info) static int mdt_link_unpack(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct mdt_rec_link *rec; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; @@ -324,6 +723,10 @@ static int mdt_link_unpack(struct mdt_thread_info *info) rec = req_capsule_client_get(pill, &RMF_REC_LINK); if (rec != NULL) { + uc->mu_fsuid = rec->lk_fsuid; + uc->mu_fsgid = rec->lk_fsgid; + uc->mu_cap = rec->lk_cap; + attr->la_uid = rec->lk_fsuid; attr->la_gid = rec->lk_fsgid; rr->rr_fid1 = &rec->lk_fid1; @@ -341,6 +744,7 @@ static int mdt_link_unpack(struct mdt_thread_info *info) static int mdt_unlink_unpack(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct mdt_rec_unlink *rec; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; @@ -350,6 +754,10 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info) rec = req_capsule_client_get(pill, &RMF_REC_UNLINK); if (rec != NULL) { + uc->mu_fsuid = rec->ul_fsuid; + uc->mu_fsgid = rec->ul_fsgid; + uc->mu_cap = rec->ul_cap; + attr->la_uid = rec->ul_fsuid; attr->la_gid = rec->ul_fsgid; rr->rr_fid1 = &rec->ul_fid1; @@ -370,6 +778,7 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info) static int mdt_rename_unpack(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct mdt_rec_rename *rec; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; @@ -379,6 +788,10 @@ static int mdt_rename_unpack(struct mdt_thread_info *info) rec = req_capsule_client_get(pill, &RMF_REC_RENAME); if (rec != NULL) { + uc->mu_fsuid = rec->rn_fsuid; + uc->mu_fsgid = rec->rn_fsgid; + uc->mu_cap = rec->rn_cap; + attr->la_uid = rec->rn_fsuid; attr->la_gid = rec->rn_fsgid; rr->rr_fid1 = &rec->rn_fid1; @@ -400,6 +813,7 @@ static int mdt_rename_unpack(struct mdt_thread_info *info) static int mdt_open_unpack(struct mdt_thread_info *info) { + struct md_ucred *uc = &info->mti_uc; struct mdt_rec_create *rec; struct lu_attr *attr = &info->mti_attr.ma_attr; struct req_capsule *pill = &info->mti_pill; @@ -409,6 +823,10 @@ static int mdt_open_unpack(struct mdt_thread_info *info) rec = req_capsule_client_get(pill, &RMF_REC_CREATE); if (rec != NULL) { + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + rr->rr_fid1 = &rec->cr_fid1; rr->rr_fid2 = &rec->cr_fid2; attr->la_mode = rec->cr_mode; diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 1fa7628..41ca490 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -85,8 +85,9 @@ static int mdt_create_data(struct mdt_thread_info *info, RETURN(0); ma->ma_need = MA_INODE | MA_LOV; - rc = mdo_create_data(info->mti_ctxt, p ? mdt_object_child(p) : NULL, - mdt_object_child(o), spec, ma); + rc = mdo_create_data(info->mti_ctxt, + p ? mdt_object_child(p) : NULL, + mdt_object_child(o), spec, ma, &info->mti_uc); RETURN(rc); } @@ -332,6 +333,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info, isdir = S_ISDIR(la->la_mode); islnk = S_ISLNK(la->la_mode); mdt_pack_attr2body(repbody, la, mdt_object_fid(o)); + mdt_body_reverse_idmap(info, repbody); /* if we are following a symlink, don't open; and * do not return open handle for special nodes as client required @@ -339,7 +341,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info, if (islnk || (!isreg && !isdir && (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH))) { lustre_msg_set_transno(req->rq_repmsg, 0); - RETURN(0); + GOTO(out, rc = 0); } mdt_set_disposition(info, rep, DISP_OPEN_OPEN); @@ -402,7 +404,9 @@ static int mdt_mfd_open(struct mdt_thread_info *info, if (rc) RETURN(rc); - rc = mo_open(info->mti_ctxt, mdt_object_child(o), flags); + rc = mo_open(info->mti_ctxt, mdt_object_child(o), + created ? flags | MDS_OPEN_CREATED : flags, + &info->mti_uc); if (rc) RETURN(rc); @@ -427,6 +431,24 @@ static int mdt_mfd_open(struct mdt_thread_info *info, mdt_open_transno(info); } else rc = -ENOMEM; + +out: + if (!rc) { + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + + if (med->med_rmtclient) { + void *buf = req_capsule_server_get(&info->mti_pill, + &RMF_ACL); + + rc = mdt_pack_remote_perm(info, o, buf); + if (!rc) { + repbody->valid |= OBD_MD_FLRMTPERM; + repbody->aclsize = + sizeof(struct mdt_remote_perm); + } + } + } + RETURN(rc); } @@ -462,7 +484,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info, mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); GOTO(out, 0); } - + /* * XXX: is this correct to set here transno and status to zero? This is * especially suspicious after calling mdt_req_from_mcd() above, which @@ -470,7 +492,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info, */ lustre_msg_set_transno(req->rq_repmsg, 0); lustre_msg_set_status(req->rq_repmsg, 0); - + ldlm_rep->lock_policy_res1 = 0; ldlm_rep->lock_policy_res2 = 0; result = mdt_reint_open(info, lhc); @@ -502,7 +524,7 @@ static int mdt_open_by_fid(struct mdt_thread_info* info, mdt_set_disposition(info, rep, DISP_IT_EXECD); mdt_set_disposition(info, rep, DISP_LOOKUP_EXECD); mdt_set_disposition(info, rep, DISP_LOOKUP_POS); - rc = mo_attr_get(ctxt, mdt_object_child(o), ma); + rc = mo_attr_get(ctxt, mdt_object_child(o), ma, &info->mti_uc); if (rc == 0) rc = mdt_mfd_open(info, NULL, o, flags, 0, rep); } else if (rc == 0) { @@ -541,7 +563,8 @@ static int mdt_cross_open(struct mdt_thread_info* info, rc = lu_object_exists(&o->mot_obj.mo_lu); if (rc > 0) { - rc = mo_attr_get(info->mti_ctxt, mdt_object_child(o), ma); + rc = mo_attr_get(info->mti_ctxt, mdt_object_child(o), ma, + &info->mti_uc); if (rc == 0) rc = mdt_mfd_open(info, NULL, o, flags, 0, rep); } else if (rc == 0) { @@ -648,7 +671,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) GOTO(out, result = PTR_ERR(parent)); result = mdo_lookup(info->mti_ctxt, mdt_object_child(parent), - rr->rr_name, child_fid); + rr->rr_name, child_fid, &info->mti_uc); if (result != 0 && result != -ENOENT && result != -ESTALE) GOTO(out_parent, result); @@ -686,7 +709,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) rr->rr_name, mdt_object_child(child), &info->mti_spec, - &info->mti_attr); + &info->mti_attr, + &info->mti_uc); if (result == -ERESTART) { mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE); GOTO(out_child, result); @@ -698,8 +722,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) created = 1; } else { /* We have to get attr & lov ea for this object */ - result = mo_attr_get(info->mti_ctxt, - mdt_object_child(child), ma); + result = mo_attr_get(info->mti_ctxt, mdt_object_child(child), + ma, &info->mti_uc); /* * The object is on remote node, return its FID for remote open. */ @@ -743,7 +767,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) GOTO(out_child, result); } } - + /* Try to open it now. */ result = mdt_mfd_open(info, parent, child, create_flags, created, ldlm_rep); @@ -753,9 +777,13 @@ finish_open: if (result != 0 && created) { int rc2; ma->ma_need = 0; - rc2 = mdo_unlink(info->mti_ctxt, mdt_object_child(parent), - mdt_object_child(child), rr->rr_name, - &info->mti_attr); + ma->ma_cookie_size = 0; + rc2 = mdo_unlink(info->mti_ctxt, + mdt_object_child(parent), + mdt_object_child(child), + rr->rr_name, + &info->mti_attr, + &info->mti_uc); if (rc2 != 0) CERROR("error in cleanup of open"); } @@ -765,6 +793,8 @@ out_parent: mdt_object_unlock_put(info, parent, lh, result); out: mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); + if (result) + lustre_msg_set_transno(req->rq_repmsg, 0); return result; } @@ -800,9 +830,9 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) ma->ma_need |= MA_INODE; if (!MFD_CLOSED(mode)) - rc = mo_close(info->mti_ctxt, next, ma); + rc = mo_close(info->mti_ctxt, next, ma, &info->mti_uc); else if (ret == -EAGAIN) - rc = mo_attr_get(info->mti_ctxt, next, ma); + rc = mo_attr_get(info->mti_ctxt, next, ma, &info->mti_uc); /* If the object is unlinked, do not try to re-enable SIZEONMDS */ if ((ret == -EAGAIN) && (ma->ma_valid & MA_INODE) && diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c index c879799..54e7e9a 100644 --- a/lustre/mdt/mdt_recovery.c +++ b/lustre/mdt/mdt_recovery.c @@ -880,7 +880,8 @@ static int mdt_txn_commit_cb(const struct lu_context *ctx, return 0; } -int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt) +int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt, + struct obd_device *obd) { struct lu_fid last_fid; struct dt_object *last; @@ -909,6 +910,11 @@ int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt) CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc); } + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); + obd->obd_lvfs_ctxt.pwdmnt = current->fs->pwdmnt; + obd->obd_lvfs_ctxt.pwd = current->fs->pwd; + obd->obd_lvfs_ctxt.fs = get_ds(); + RETURN (rc); } @@ -976,13 +982,14 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti, body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); rc = mo_attr_get(mti->mti_ctxt, mdt_object_child(child), - &mti->mti_attr); + &mti->mti_attr, &mti->mti_uc); if (rc == -EREMOTE) { /* object was created on remote server */ req->rq_status = rc; body->valid |= OBD_MD_MDS; } mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(child)); + mdt_body_reverse_idmap(mti, body); mdt_object_put(mti->mti_ctxt, child); } @@ -1002,8 +1009,10 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti, body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1); LASSERT(!IS_ERR(obj)); - mo_attr_get(mti->mti_ctxt, mdt_object_child(obj), &mti->mti_attr); + mo_attr_get(mti->mti_ctxt, mdt_object_child(obj), + &mti->mti_attr, &mti->mti_uc); mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(obj)); + mdt_body_reverse_idmap(mti, body); /* Don't return OST-specific attributes if we didn't just set them */ /* diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 7838193..f32fca3 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -67,13 +67,14 @@ static int mdt_md_create(struct mdt_thread_info *info) OBD_FAIL_MDS_REINT_CREATE_WRITE); rc = mdo_create(info->mti_ctxt, next, rr->rr_name, - mdt_object_child(child), &info->mti_spec, - ma); + mdt_object_child(child), + &info->mti_spec, ma, &info->mti_uc); if (rc == 0) { /* return fid & attr to client. */ if (ma->ma_valid & MA_INODE) - mdt_pack_attr2body(repbody, &ma->ma_attr, + mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(child)); + mdt_body_reverse_idmap(info, repbody); } mdt_object_put(info->mti_ctxt, child); } else @@ -99,13 +100,14 @@ static int mdt_md_mkobj(struct mdt_thread_info *info) struct md_object *next = mdt_object_child(o); ma->ma_need = MA_INODE; - rc = mo_object_create(info->mti_ctxt, next, - &info->mti_spec, ma); + rc = mo_object_create(info->mti_ctxt, next, &info->mti_spec, + ma, &info->mti_uc); if (rc == 0) { /* return fid & attr to client. */ if (ma->ma_valid & MA_INODE) mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(o)); + mdt_body_reverse_idmap(info, repbody); } mdt_object_put(info->mti_ctxt, o); } else @@ -162,7 +164,8 @@ int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, int flags) OBD_FAIL_MDS_REINT_SETATTR_WRITE); /* all attrs are packed into mti_attr in unpack_setattr */ - rc = mo_attr_set(info->mti_ctxt, mdt_object_child(mo), ma); + rc = mo_attr_set(info->mti_ctxt, mdt_object_child(mo), ma, + &info->mti_uc); if (rc != 0) GOTO(out, rc); @@ -260,11 +263,12 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, ma->ma_need = MA_INODE; next = mdt_object_child(mo); - rc = mo_attr_get(info->mti_ctxt, next, ma); + rc = mo_attr_get(info->mti_ctxt, next, ma, &info->mti_uc); if (rc != 0) GOTO(out, rc); mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo)); + mdt_body_reverse_idmap(info, repbody); EXIT; out: mdt_object_put(info->mti_ctxt, mo); @@ -348,13 +352,14 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, if (strlen(rr->rr_name) == 0) { /* remote partial operation */ - rc = mo_ref_del(info->mti_ctxt, mdt_object_child(mp), ma); + rc = mo_ref_del(info->mti_ctxt, mdt_object_child(mp), ma, + &info->mti_uc); GOTO(out_unlock_parent, rc); } /* step 2: find & lock the child */ rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mp), - rr->rr_name, child_fid); + rr->rr_name, child_fid, &info->mti_uc); if (rc != 0) GOTO(out_unlock_parent, rc); @@ -377,7 +382,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, */ ma->ma_need = MA_INODE; rc = mdo_unlink(info->mti_ctxt, mdt_object_child(mp), - mdt_object_child(mc), rr->rr_name, ma); + mdt_object_child(mc), rr->rr_name, ma, &info->mti_uc); if (rc) GOTO(out_unlock_child, rc); @@ -425,7 +430,8 @@ static int mdt_reint_link(struct mdt_thread_info *info, if (strlen(rr->rr_name) == 0) { /* remote partial operation */ - rc = mo_ref_add(info->mti_ctxt, mdt_object_child(ms)); + rc = mo_ref_add(info->mti_ctxt, mdt_object_child(ms), + &info->mti_uc); GOTO(out_unlock_source, rc); } /*step 2: find & lock the target parent dir*/ @@ -442,7 +448,7 @@ static int mdt_reint_link(struct mdt_thread_info *info, OBD_FAIL_MDS_REINT_LINK_WRITE); rc = mdo_link(info->mti_ctxt, mdt_object_child(mp), - mdt_object_child(ms), rr->rr_name, ma); + mdt_object_child(ms), rr->rr_name, ma, &info->mti_uc); GOTO(out_unlock_target, rc); out_unlock_target: @@ -482,7 +488,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info) /*step 2: find & lock the target object if exists*/ rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mtgtdir), - rr->rr_tgt, tgt_fid); + rr->rr_tgt, tgt_fid, &info->mti_uc); if (rc != 0 && rc != -ENOENT) { GOTO(out_unlock_tgtdir, rc); } else if (rc == 0) { @@ -494,12 +500,13 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info) GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt)); rc = mdo_rename_tgt(info->mti_ctxt, mdt_object_child(mtgtdir), - mdt_object_child(mtgt), - rr->rr_fid2, rr->rr_tgt, ma); + mdt_object_child(mtgt), rr->rr_fid2, + rr->rr_tgt, ma, &info->mti_uc); } else /* -ENOENT */ { rc = mdo_name_insert(info->mti_ctxt, mdt_object_child(mtgtdir), rr->rr_tgt, rr->rr_fid2, - S_ISDIR(ma->ma_attr.la_mode)); + S_ISDIR(ma->ma_attr.la_mode), + &info->mti_uc); } /* handle last link of tgt object */ @@ -578,8 +585,9 @@ static int mdt_rename_check(struct mdt_thread_info *info, struct lu_fid *fid) do { dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid); if (!IS_ERR(dst)) { - rc = mdo_is_subdir(info->mti_ctxt, mdt_object_child(dst), - fid, &dst_fid); + rc = mdo_is_subdir(info->mti_ctxt, + mdt_object_child(dst), + fid, &dst_fid, NULL); mdt_object_put(info->mti_ctxt, dst); if (rc < 0) { CERROR("Error while doing mdo_is_subdir(), rc %d\n", @@ -667,7 +675,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info, /*step 3: find & lock the old object*/ rc = mdo_lookup(info->mti_ctxt, mdt_object_child(msrcdir), - rr->rr_name, old_fid); + rr->rr_name, old_fid, &info->mti_uc); if (rc != 0) GOTO(out_unlock_target, rc); @@ -684,7 +692,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info, /*step 4: find & lock the new object*/ /* new target object may not exist now */ rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mtgtdir), - rr->rr_tgt, new_fid); + rr->rr_tgt, new_fid, &info->mti_uc); if (rc == 0) { /* the new_fid should have been filled at this moment*/ if (lu_fid_eq(old_fid, new_fid)) @@ -734,7 +742,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info, rc = mdo_rename(info->mti_ctxt, mdt_object_child(msrcdir), mdt_object_child(mtgtdir), old_fid, rr->rr_name, - (mnew ? mdt_object_child(mnew) : NULL), rr->rr_tgt, ma); + (mnew ? mdt_object_child(mnew) : NULL), + rr->rr_tgt, ma, &info->mti_uc); /* handle last link of tgt object */ if (rc == 0 && mnew) diff --git a/lustre/mdt/mdt_rmtacl.c b/lustre/mdt/mdt_rmtacl.c new file mode 100644 index 0000000..9e9f082 --- /dev/null +++ b/lustre/mdt/mdt_rmtacl.c @@ -0,0 +1,248 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * Author: Lai Siyao + * Author: Fan Yong + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mdt_internal.h" + +#define MAX_CMD_LEN 256 + +static void mdt_rmtacl_entry_init(struct upcall_cache_entry *entry, void *args) +{ + struct rmtacl_upcall_data *data = args; + struct mdt_rmtacl *acl = &entry->u.acl; + char *cmd; + + /* we use address of this cache entry as handle */ + acl->ra_handle = (__u32)entry; + OBD_ALLOC(cmd, strlen(data->aud_cmd) + 1); + if (!cmd) + return; /* upcall will fail later! */ + + strcpy(cmd, data->aud_cmd); + entry->u.acl.ra_cmd = cmd; +} + +static void mdt_rmtacl_entry_free(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + int len; + + if (acl->ra_cmd) { + len = strlen(acl->ra_cmd) + 1; + OBD_FREE(acl->ra_cmd, len); + } + + if (acl->ra_buf) { + len = strlen(acl->ra_buf) + 1; + OBD_FREE(acl->ra_buf, len); + } +} + +static int mdt_rmtacl_upcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + struct rmtacl_upcall_data *data = args; + + LASSERT(entry && data); + LASSERT(entry->u.acl.ra_cmd && data->aud_cmd); + return strncmp(entry->u.acl.ra_cmd, data->aud_cmd, MAX_CMD_LEN); +} + +static int mdt_rmtacl_downcall_compare(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + __u64 key, void *args) +{ + struct rmtacl_downcall_data *data = args; + + return entry->u.acl.ra_handle - data->add_handle; +} + +static int mdt_rmtacl_do_upcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + char handle[20] = ""; + char keystr[20] = ""; + char *argv[] = { + [0] = cache->uc_upcall, + [1] = cache->uc_name, + [2] = keystr, + [3] = handle, + [4] = acl->ra_cmd, + [5] = NULL + }; + char *envp[] = { + [0] = "HOME=/", + [1] = "PATH=/bin:/usr/bin:/sbin:/usr/sbin", + [2] = NULL + }; + int rc; + ENTRY; + + if (!acl->ra_cmd) + RETURN(-ENOMEM); + + snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key); + snprintf(handle, sizeof(handle), "%u", acl->ra_handle); + + LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!"); + + CDEBUG(D_INFO, "%s: remote acl upcall %s %s %s %s %s\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], argv[4]); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0) { + CERROR("%s: error invoking upcall %s %s %s %s %s: rc %d; " + "check /proc/fs/lustre/mdt/%s/rmtacl_upcall\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], + argv[4], rc, cache->uc_name); + } else { + CDEBUG(D_HA, "%s: invoked upcall %s %s %s %s %s\n", + cache->uc_name, argv[0], argv[1], argv[2], argv[3], + argv[4]); + rc = 0; + } + RETURN(rc); +} + +static int mdt_rmtacl_parse_downcall(struct upcall_cache *cache, + struct upcall_cache_entry *entry, + void *args) +{ + struct mdt_rmtacl *acl = &entry->u.acl; + struct rmtacl_downcall_data *data; + char *buf; + int len; + ENTRY; + + data = (struct rmtacl_downcall_data *)args; + LASSERT(data); + + len = strlen(data->add_buf) + 1; + OBD_ALLOC(buf, len); + if (!buf) + RETURN(-ENOMEM); + + memcpy(buf, data->add_buf, len); + acl->ra_buf = buf; + + CDEBUG(D_OTHER, "parse mdt acl@%p: %s %s\n", + acl, acl->ra_cmd, acl->ra_buf); + + RETURN(0); +} + +struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops = { + .init_entry = mdt_rmtacl_entry_init, + .free_entry = mdt_rmtacl_entry_free, + .upcall_compare = mdt_rmtacl_upcall_compare, + .downcall_compare = mdt_rmtacl_downcall_compare, + .do_upcall = mdt_rmtacl_do_upcall, + .parse_downcall = mdt_rmtacl_parse_downcall, +}; + +int mdt_rmtacl_upcall(struct mdt_thread_info *info, unsigned long key, + char *cmd, char *buf, int buflen) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct obd_device *obd = req->rq_export->exp_obd; + struct mdt_device *mdt = info->mti_mdt; + struct lvfs_ucred uc; + struct lvfs_run_ctxt saved; + struct rmtacl_upcall_data data; + struct upcall_cache_entry *entry; + char *tmp = NULL; + int rc = 0; + ENTRY; + + OBD_ALLOC(tmp, PAGE_SIZE); + if (!tmp) + RETURN(-ENOMEM); + + data.aud_cmd = cmd; + + uc.luc_uid = info->mti_uc.mu_uid; + uc.luc_gid = info->mti_uc.mu_gid; + uc.luc_fsuid = info->mti_uc.mu_fsuid; + uc.luc_fsgid = info->mti_uc.mu_fsgid; + uc.luc_cap = info->mti_uc.mu_cap; + uc.luc_umask = info->mti_uc.mu_umask; + uc.luc_ginfo = info->mti_uc.mu_ginfo; + uc.luc_identity = info->mti_uc.mu_identity; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + entry = upcall_cache_get_entry(mdt->mdt_rmtacl_cache, (__u64)key, + &data); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + + if (IS_ERR(entry)) + GOTO(out, rc = PTR_ERR(entry)); + + if (buflen <= strlen(entry->u.acl.ra_buf)) + GOTO(out, rc = -EFAULT); + + memcpy(buf, entry->u.acl.ra_buf, strlen(entry->u.acl.ra_buf)); + /* remote acl operation expire at once! */ + UC_CACHE_SET_EXPIRED(entry); + upcall_cache_put_entry(mdt->mdt_rmtacl_cache, entry); + +out: + if (rc) + sprintf(buf, "server processing error: %d\n", rc); + OBD_FREE(tmp, PAGE_SIZE); + RETURN(0); +} diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index 694042d..190ea74 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -68,13 +68,16 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) sizeof(user_string) - 1) == 0) return -EOPNOTSUPP; - rc = mo_xattr_get(info->mti_ctxt, - mdt_object_child(info->mti_object), - NULL, 0, xattr_name); + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) + rc = RMTACL_SIZE_MAX; + else + rc = mo_xattr_get(info->mti_ctxt, + mdt_object_child(info->mti_object), + NULL, 0, xattr_name, &info->mti_uc); } else if ((valid & OBD_MD_FLXATTRLS) == OBD_MD_FLXATTRLS) { rc = mo_xattr_list(info->mti_ctxt, mdt_object_child(info->mti_object), - NULL, 0); + NULL, 0, &info->mti_uc); } else { CERROR("valid bits: "LPX64"\n", info->mti_body->valid); return -EINVAL; @@ -96,9 +99,33 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) return rc = !rc1? rc1 : rc; } +static int do_remote_getfacl(struct mdt_thread_info *info, + struct lu_fid *fid, int offset, + void *buf, int buflen) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *cmd; + int rc; + ENTRY; + + if (!buf || (buflen != RMTACL_SIZE_MAX)) + RETURN(-EINVAL); + + cmd = lustre_msg_string(req->rq_reqmsg, offset, 0); + if (!cmd) { + CERROR("missing getfacl command!\n"); + RETURN(-EFAULT); + } + + rc = mdt_rmtacl_upcall(info, fid_oid(fid), cmd, buf, buflen); + lustre_shrink_reply(req, REPLY_REC_OFF + 1, strlen(buf) + 1, 0); + RETURN(rc ?: strlen(buf) + 1); +} int mdt_getxattr(struct mdt_thread_info *info) { + struct mdt_body *body = (struct mdt_body *)info->mti_body; + struct mdt_body *reqbody; int rc; struct md_object *next; char *buf; @@ -113,11 +140,19 @@ int mdt_getxattr(struct mdt_thread_info *info) CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->fid1)); + reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(-EFAULT); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + next = mdt_object_child(info->mti_object); rc = mdt_getxattr_pack_reply(info); if (rc < 0) - RETURN(rc); + GOTO(out, rc); rep_body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); /*No EA, just go back*/ @@ -132,8 +167,13 @@ int mdt_getxattr(struct mdt_thread_info *info) &RMF_NAME); CDEBUG(D_INODE, "getxattr %s\n", xattr_name); - rc = mo_xattr_get(info->mti_ctxt, next, - buf, buflen, xattr_name); + if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) { + rc = do_remote_getfacl(info, &body->fid1, + REQ_REC_OFF + 2, buf, buflen); + } else { + rc = mo_xattr_get(info->mti_ctxt, next, buf, buflen, + xattr_name, &info->mti_uc); + } if (rc < 0 && rc != -ENODATA && rc != -EOPNOTSUPP && rc != -ERANGE) @@ -141,7 +181,8 @@ int mdt_getxattr(struct mdt_thread_info *info) } else if (info->mti_body->valid & OBD_MD_FLXATTRLS) { CDEBUG(D_INODE, "listxattr\n"); - rc = mo_xattr_list(info->mti_ctxt, next, buf, buflen); + rc = mo_xattr_list(info->mti_ctxt, next, buf, buflen, + &info->mti_uc); if (rc < 0) CDEBUG(D_OTHER, "listxattr failed: %d\n", rc); } else @@ -152,20 +193,48 @@ no_xattr: rep_body->eadatasize = rc; rc = 0; } - +out: + mdt_exit_ucred(info); RETURN(rc); } +static int do_remote_setfacl(struct mdt_thread_info *info, struct lu_fid *fid, + int offset) +{ + struct ptlrpc_request *req = mdt_info_req(info); + char *cmd, *buf; + int rc, buflen; + ENTRY; + + cmd = lustre_msg_string(req->rq_reqmsg, offset, 0); + if (!cmd) { + CERROR("missing setfacl command!\n"); + RETURN(-EFAULT); + } + + buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF); + buf = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, buflen); + if (!buf || (buflen != RMTACL_SIZE_MAX)) + RETURN(-EINVAL); + + rc = mdt_rmtacl_upcall(info, fid_oid(fid), cmd, buf, buflen); + if (rc) + CERROR("remote acl upcall failed: %d\n", rc); + + lustre_shrink_reply(req, REPLY_REC_OFF, strlen(buf) + 1, 0); + RETURN(rc); +} int mdt_setxattr(struct mdt_thread_info *info) { - struct ptlrpc_request *req = mdt_info_req(info); + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_body *reqbody; const char user_string[] = "user."; const char trust_string[] = "trusted."; struct mdt_lock_handle *lh; struct req_capsule *pill = &info->mti_pill; struct mdt_object *obj = info->mti_object; - const struct mdt_body *body = info->mti_body; + struct mdt_body *body = (struct mdt_body *)info->mti_body; const struct lu_context *ctx = info->mti_ctxt; struct md_object *child = mdt_object_child(obj); __u64 valid = body->valid; @@ -180,6 +249,14 @@ int mdt_setxattr(struct mdt_thread_info *info) if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR)) RETURN(-ENOMEM); + reqbody = req_capsule_client_get(pill, &RMF_MDT_BODY); + if (reqbody == NULL) + RETURN(-EFAULT); + + rc = mdt_init_ucred(info, reqbody); + if (rc) + RETURN(rc); + /* various sanity check for xattr name */ xattr_name = req_capsule_client_get(pill, &RMF_NAME); if (!xattr_name) @@ -188,6 +265,12 @@ int mdt_setxattr(struct mdt_thread_info *info) CDEBUG(D_INODE, "%s xattr %s\n", body->valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name); + if (((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) && + (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL))) { + rc = do_remote_setfacl(info, &body->fid1, REQ_REC_OFF + 2); + GOTO(out, rc); + } + if (strncmp(xattr_name, trust_string, sizeof(trust_string) - 1) == 0) { if (strcmp(xattr_name + 8, XATTR_NAME_LOV) == 0) GOTO(out, rc = -EACCES); @@ -225,14 +308,15 @@ int mdt_setxattr(struct mdt_thread_info *info) if (body->flags & XATTR_CREATE) flags |= LU_XATTR_CREATE; + mdt_fail_write(ctx, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_SETXATTR_WRITE); - rc = mo_xattr_set(ctx, child, xattr, - xattr_len, xattr_name, flags); + rc = mo_xattr_set(ctx, child, xattr, xattr_len, + xattr_name, flags, &info->mti_uc); } } else if ((valid & OBD_MD_FLXATTRRM) == OBD_MD_FLXATTRRM) { - rc = mo_xattr_del(ctx, child, xattr_name); + rc = mo_xattr_del(ctx, child, xattr_name, &info->mti_uc); } else { CERROR("valid bits: "LPX64"\n", body->valid); rc = -EINVAL; @@ -241,5 +325,6 @@ int mdt_setxattr(struct mdt_thread_info *info) out_unlock: mdt_object_unlock(info, obj, lh, rc); out: + mdt_exit_ucred(info); return rc; } diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index fe20569..d87da4c 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -1892,6 +1892,28 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, GOTO(end_while, rc); } + if (!class_match_param(ptr, PARAM_ROOTSQUASH, NULL)) { + /* Change mds root_squash params */ + lustre_cfg_bufs_reset(&bufs, mti->mti_svname); + lustre_cfg_bufs_set(&bufs, 1, ptr, strlen(ptr)); + lcfg = lustre_cfg_new(LCFG_PARAM, &bufs); + rc = mgs_write_log_direct(obd, fsdb, mti->mti_svname, + mti->mti_svname, lcfg); + lustre_cfg_free(lcfg); + GOTO(end_while, rc); + } + + if (!class_match_param(ptr, PARAM_GSS_SUPPORT, NULL)) { + /* Change mdt gss_support params */ + lustre_cfg_bufs_reset(&bufs, mti->mti_svname); + lustre_cfg_bufs_set(&bufs, 1, ptr, strlen(ptr)); + lcfg = lustre_cfg_new(LCFG_PARAM, &bufs); + rc = mgs_write_log_direct(obd, fsdb, mti->mti_svname, + mti->mti_svname, lcfg); + lustre_cfg_free(lcfg); + GOTO(end_while, rc); + } + LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr); end_while: diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 3077228..f079d15 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -384,8 +384,8 @@ static const char *obd_connect_names[] = { "join_file", "getattr_by_fid", "no_oh_for_devices", - "local_1.8_client", - "remote_1.8_client", + "local_client", + "remote_client", "max_byte_per_rpc", "64bit_qdata", "fid_capability", @@ -846,6 +846,7 @@ int lprocfs_alloc_md_stats(struct obd_device *obd, LPROCFS_MD_OP_INIT(num_private_stats, stats, set_open_replay_data); LPROCFS_MD_OP_INIT(num_private_stats, stats, clear_open_replay_data); LPROCFS_MD_OP_INIT(num_private_stats, stats, set_lock_data); + LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm); for (i = num_private_stats; i < num_stats; i++) { if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) { diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 4f24d10..b0ce61d 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1207,6 +1207,9 @@ struct lustre_sb_info *lustre_init_lsi(struct super_block *sb) /* Default umount style */ lsi->lsi_flags = LSI_UMOUNT_FAILOVER; + + lsi->lsi_lmd->lmd_nllu = NOBODY_UID; + lsi->lsi_lmd->lmd_nllg = NOBODY_GID; RETURN(lsi); } @@ -1892,6 +1895,12 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) if (rc) goto invalid; clear++; + } else if (strncmp(s1, "nllu=", 5) == 0) { + lmd->lmd_nllu = simple_strtoul(s1 + 5, NULL, 10); + clear++; + } else if (strncmp(s1, "nllg=", 5) == 0) { + lmd->lmd_nllg = simple_strtoul(s1 + 5, NULL, 10); + clear++; } else if (strncmp(s1, "sec", 3) == 0) { rc = lmd_parse_sec_opts(lmd, s1); if (rc) diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c index f5f7018..4cbf891 100644 --- a/lustre/ptlrpc/gss/sec_gss.c +++ b/lustre/ptlrpc/gss/sec_gss.c @@ -1903,7 +1903,7 @@ int gss_svc_accept(struct ptlrpc_request *req) req->rq_auth_gss = 1; req->rq_auth_remote = grctx->src_ctx->gsc_remote; - req->rq_auth_usr_mds = grctx->src_ctx->gsc_usr_mds; + req->rq_auth_usr_mdt = grctx->src_ctx->gsc_usr_mds; req->rq_auth_usr_root = grctx->src_ctx->gsc_usr_root; req->rq_auth_uid = grctx->src_ctx->gsc_uid; req->rq_auth_mapped_uid = grctx->src_ctx->gsc_mapped_uid; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 5869fcf..e17610a 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -1713,6 +1713,24 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q) lustre_swab_obd_dqblk (&q->qc_dqblk); } +void lustre_swab_mds_remote_perm (struct mds_remote_perm *p) +{ + __swab32s (&p->rp_uid); + __swab32s (&p->rp_gid); + __swab32s (&p->rp_fsuid); + __swab32s (&p->rp_fsgid); + __swab32s (&p->rp_access_perm); +}; + +void lustre_swab_mdt_remote_perm (struct mdt_remote_perm *p) +{ + __swab32s (&p->rp_uid); + __swab32s (&p->rp_gid); + __swab32s (&p->rp_fsuid); + __swab32s (&p->rp_fsgid); + __swab32s (&p->rp_access_perm); +}; + void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) { __swab32s (&sa->sa_opcode); diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 882bdae..018bdfb 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -219,6 +219,8 @@ EXPORT_SYMBOL(lustre_swab_mds_body); EXPORT_SYMBOL(lustre_swab_mdt_body); EXPORT_SYMBOL(lustre_swab_mdt_epoch); EXPORT_SYMBOL(lustre_swab_obd_quotactl); +EXPORT_SYMBOL(lustre_swab_mds_remote_perm); +EXPORT_SYMBOL(lustre_swab_mdt_remote_perm); EXPORT_SYMBOL(lustre_swab_mds_rec_setattr); EXPORT_SYMBOL(lustre_swab_mdt_rec_setattr); EXPORT_SYMBOL(lustre_swab_mds_rec_create); diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index adf0418..59eb56c 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -65,6 +65,8 @@ MOUNT=${MOUNT:-/mnt/${FSNAME}} MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-${MOUNT}2} MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" DIR=${DIR:-$MOUNT} DIR1=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} diff --git a/lustre/tests/cfg/lov.sh b/lustre/tests/cfg/lov.sh index 56ca580..68d9ae1 100644 --- a/lustre/tests/cfg/lov.sh +++ b/lustre/tests/cfg/lov.sh @@ -59,6 +59,8 @@ MOUNT=${MOUNT:-/mnt/${FSNAME}} MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-${MOUNT}2} MOUNTOPT=${MOUNTOPT:-"user_xattr,"} +[ "x$RMTCLIENT" != "x" ] && + MOUNTOPT=$MOUNTOPT",remote_client" DIR=${DIR:-$MOUNT} DIR1=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 4db7617..416d251 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -19,9 +19,11 @@ #endif static const char usage[] = -"Usage: %s -u user_id [-g grp_id] [-G[gid0,gid1,...]] command\n" +"Usage: %s -u user_id [-g grp_id] [-v euid] [-j egid] [-G[gid0,gid1,...]] command\n" " -u user_id switch to UID user_id\n" " -g grp_id switch to GID grp_id\n" +" -v euid switch euid to UID\n" +" -j egid switch egid to GID\n" " -G[gid0,gid1,...] set supplementary groups\n"; void Usage_and_abort(const char *name) @@ -37,6 +39,9 @@ int main(int argc, char **argv) int gid_is_set = 0, uid_is_set = 0, num_supp = -1; uid_t user_id = 0; gid_t grp_id = 0, supp_groups[NGROUPS_MAX] = { 0 }; + int euid_is_set = 0, egid_is_set = 0; + uid_t euid = 0; + gid_t egid = 0; if (argc == 1) { fprintf(stderr, "No parameter count\n"); @@ -44,7 +49,7 @@ int main(int argc, char **argv) } // get UID and GID - while ((c = getopt(argc, argv, "+u:g:hG::")) != -1) { + while ((c = getopt(argc, argv, "+u:g:v:j:hG::")) != -1) { switch (c) { case 'u': if (!isdigit(optarg[0])) { @@ -78,6 +83,36 @@ int main(int argc, char **argv) gid_is_set = 1; break; + case 'v': + if (!isdigit(optarg[0])) { + struct passwd *pw = getpwnam(optarg); + if (pw == NULL) { + fprintf(stderr, "parameter '%s' bad\n", + optarg); + Usage_and_abort(name); + } + euid = pw->pw_uid; + } else { + euid = (uid_t)atoi(optarg); + } + euid_is_set = 1; + break; + + case 'j': + if (!isdigit(optarg[0])) { + struct group *gr = getgrnam(optarg); + if (gr == NULL) { + fprintf(stderr, "getgrname %s failed\n", + optarg); + Usage_and_abort(name); + } + egid = gr->gr_gid; + } else { + egid = (gid_t)atoi(optarg); + } + egid_is_set = 1; + break; + case 'G': num_supp = 0; if (optarg == NULL || !isdigit(optarg[0])) @@ -126,10 +161,12 @@ int main(int argc, char **argv) #endif // set GID - status = setregid(grp_id, grp_id); + if (!egid_is_set) + egid = grp_id; + status = setregid(grp_id, egid); if (status == -1) { - fprintf(stderr, "Cannot change grp_ID to %d, errno=%d (%s)\n", - grp_id, errno, strerror(errno) ); + fprintf(stderr, "Cannot change gid to %d/%d, errno=%d (%s)\n", + grp_id, egid, errno, strerror(errno) ); exit(-1); } @@ -142,16 +179,19 @@ int main(int argc, char **argv) } // set UID - status = setreuid(user_id, user_id ); + if (!euid_is_set) + euid = user_id; + status = setreuid(user_id, euid); if(status == -1) { - fprintf(stderr,"Cannot change user_ID to %d, errno=%d (%s)\n", - user_id, errno, strerror(errno) ); + fprintf(stderr,"Cannot change uid to %d/%d, errno=%d (%s)\n", + user_id, euid, errno, strerror(errno) ); exit(-1); } - fprintf(stderr, "running as UID %d, GID %d", user_id, grp_id); + fprintf(stderr, "running as uid/gid/euid/egid %d/%d/%d/%d, groups:", + user_id, grp_id, euid, egid); for (i = 0; i < num_supp; i++) - fprintf(stderr, ":%d", supp_groups[i]); + fprintf(stderr, " %d", supp_groups[i]); fprintf(stderr, "\n"); for (i = 0; i < argc - optind; i++) diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh new file mode 100644 index 0000000..27478d7 --- /dev/null +++ b/lustre/tests/sanity-sec.sh @@ -0,0 +1,285 @@ +#!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# +# TODO: support rootsquash test +set -e + +SRCDIR=`dirname $0` +export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +[ "$ALWAYS_EXCEPT$EXCEPT" ] && \ + echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`" + +TMP=${TMP:-/tmp} + +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +RUNAS=${RUNAS:-runas} + +log() { + echo "$*" + $LCTL mark "$*" 2> /dev/null || true +} + +run_one() { + BEFORE=`date +%s` + log "== test $2= `date +%H:%M:%S` ($BEFORE)" + export TESTNAME=test_$1 + test_$1 || error "exit with rc=$?" + unset TESTNAME + pass "($((`date +%s` - $BEFORE))s)" +} + +build_test_filter() { + for O in $ONLY; do + eval ONLY_${O}=true + done + for E in $EXCEPT $ALWAYS_EXCEPT; do + eval EXCEPT_${E}=true + done +} + +_basetest() { + echo $* +} + +basetest() { + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 +} + +run_test() { + base=`basetest $1` + if [ "$ONLY" ]; then + testname=ONLY_$1 + if [ ${!testname}x != x ]; then + run_one $1 "$2" + return $? + fi + testname=ONLY_$base + if [ ${!testname}x != x ]; then + run_one $1 "$2" + return $? + fi + echo -n "." + return 0 + fi + testname=EXCEPT_$1 + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1" + return 0 + fi + testname=EXCEPT_$base + if [ ${!testname}x != x ]; then + echo "skipping excluded test $1 (base $base)" + return 0 + fi + run_one $1 "$2" + return $? +} + +error() { + sysctl -w lustre.fail_loc=0 + log "FAIL: $TESTNAME $@" + exit 1 +} + +pass() { + echo PASS $@ +} + +mounted_lustre_filesystems() { + awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts +} +MOUNT="`mounted_lustre_filesystems`" +if [ -z "$MOUNT" ]; then + sh llmount.sh + MOUNT="`mounted_lustre_filesystems`" + [ -z "$MOUNT" ] && error "NAME=$NAME not mounted" + I_MOUNTED=yes +fi + +[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once" + +DIR=${DIR:-$MOUNT} +[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 + +if [ -z "`lsmod|grep mds`" ]; then + echo "skipping $TESTNAME (remote MDS)" + exit 0 +fi + +LPROC=/proc/fs/lustre +LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` +MDS=$(\ls $LPROC/mds 2> /dev/null | grep -v num_refs | tail -n 1) +TSTDIR="$MOUNT/remote_user_dir" +LUSTRE_CONF_DIR=/etc/lustre +SETXID_CONF=$LUSTRE_CONF_DIR/setxid.conf +IDENTITY_FLUSH=$LPROC/mds/$MDS/identity_flush +ROOTSQUASH_UID=$LPROC/mds/$MDS/rootsquash_uid +ROOTSQUASH_GID=$LPROC/mds/$MDS/rootsquash_gid +ROOTSQUASH_SKIPS=$LPROC/mds/$MDS/rootsquash_skips +KRB5_REALM=`cat /etc/krb5.conf |grep default_realm| awk '{ print $3 }'` +USER1=`cat /etc/passwd|grep :500:|cut -d: -f1` +USER2=`cat /etc/passwd|grep :501:|cut -d: -f1` + +build_test_filter + +setup() { + rm -f $SETXID_CONF + echo 1 > $IDENTITY_FLUSH + $RUNAS -u 500 ls $DIR + $RUNAS -u 501 ls $DIR +} +setup + +# run as different user +test_0() { + rm -rf $DIR/d0 + mkdir $DIR/d0 + + chown $USER1 $DIR/d0 || error + $RUNAS -u 500 ls $DIR || error + $RUNAS -u 500 touch $DIR/f0 && error + $RUNAS -u 500 touch $DIR/d0/f1 || error + $RUNAS -u 501 touch $DIR/d0/f2 && error + touch $DIR/d0/f3 || error + chown root $DIR/d0 + chgrp $USER1 $DIR/d0 + chmod 775 $DIR/d0 + $RUNAS -u 500 touch $DIR/d0/f4 || error + $RUNAS -u 501 touch $DIR/d0/f5 && error + touch $DIR/d0/f6 || error + + rm -rf $DIR/d0 +} +run_test 0 "uid permission =============================" + +# setuid/gid +test_1() { + rm -rf $DIR/d1 + mkdir $DIR/d1 + + chown $USER1 $DIR/d1 || error + $RUNAS -u 501 -v 500 touch $DIR/d1/f0 && error + echo "* 501 setuid" > $SETXID_CONF + echo "enable uid 501 setuid" + echo 1 > $IDENTITY_FLUSH + $RUNAS -u 501 -v 500 touch $DIR/d1/f1 || error + + chown root $DIR/d1 + chgrp $USER1 $DIR/d1 + chmod 770 $DIR/d1 + $RUNAS -u 501 -g 501 touch $DIR/d1/f2 && error + echo "* 501 setuid,setgid" > $SETXID_CONF + echo "enable uid 501 setuid,setgid" + echo 1 > $IDENTITY_FLUSH + $RUNAS -u 501 -g 501 -j 500 touch $DIR/d1/f3 || error + $RUNAS -u 501 -v 500 -g 501 -j 500 touch $DIR/d1/f4 || error + + rm -f $SETXID_CONF + rm -rf $DIR/d1 + echo 1 > $IDENTITY_FLUSH +} +run_test 1 "setuid/gid =============================" + +# lfs getfacl/setfacl +test_2() { + rm -rf $DIR/d2 + mkdir $DIR/d2 + chmod 755 $DIR/d2 + echo xxx > $DIR/d2/f0 + chmod 644 $DIR/d2/f0 + + $LFS getfacl $DIR/d2/f0 || error + $RUNAS -u 500 cat $DIR/d2/f0 || error + $RUNAS -u 500 touch $DIR/d2/f0 && error + + $LFS setfacl -m u:$USER1:w $DIR/d2/f0 || error + $LFS getfacl $DIR/d2/f0 || error + echo "set user $USER1 write permission on file $DIR/d2/fo" + $RUNAS -u 500 touch $DIR/d2/f0 || error + $RUNAS -u 500 cat $DIR/d2/f0 && error + + rm -rf $DIR/d2 +} +run_test 2 "lfs getfacl/setfacl =============================" + +# rootsquash +test_3() { + [ -n "$SEC" ] && echo "ignore rootsquash test for single node" && return + + $LCTL conf_param $MDS security.rootsquash.skips=none + while grep LNET_NID_ANY $ROOTSQUASH_SKIPS > /dev/null; do sleep 1; done + $LCTL conf_param $MDS security.rootsquash.uid=0 + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDS security.rootsquash.gid=0 + while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done + + rm -rf $DIR/d3 + mkdir $DIR/d3 + chown $USER1 $DIR/d3 + chmod 700 $DIR/d3 + $LCTL conf_param $MDS security.rootsquash.uid=500 + echo "set rootsquash uid = 500" + while [ "`cat $ROOTSQUASH_UID`" -ne 500 ]; do sleep 1; done + touch $DIR/f3_0 && error + touch $DIR/d3/f3_1 || error + + $LCTL conf_param $MDS security.rootsquash.uid=0 + echo "disable rootsquash" + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + chown root $DIR/d3 + chgrp $USER2 $DIR/d3 + chmod 770 $DIR/d3 + + $LCTL conf_param $MDS security.rootsquash.uid=500 + echo "set rootsquash uid = 500" + while [ "`cat $ROOTSQUASH_UID`" -ne 500 ]; do sleep 1; done + touch $DIR/d3/f3_2 && error + $LCTL conf_param $MDS security.rootsquash.gid=501 + echo "set rootsquash gid = 501" + while [ "`cat $ROOTSQUASH_GID`" -ne 501 ]; do sleep 1; done + touch $DIR/d3/f3_3 || error + + $LCTL conf_param $MDS security.rootsquash.skips=* + echo "add host in rootsquash skip list" + while ! grep LNET_NID_ANY $ROOTSQUASH_SKIPS > /dev/null; + do sleep 1; + done + touch $DIR/f3_4 || error + + $LCTL conf_param $MDS security.rootsquash.uid=0 + while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDS security.rootsquash.gid=0 + while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done + $LCTL conf_param $MDS security.rootsquash.skips=none + rm -rf $DIR/d3 + rm -f $DIR/f3_? +} +run_test 3 "rootsquash =============================" + +# bug 3285 - supplementary group should always succeed (see do_init_ucred), +# NB: the supplementary groups are set for local client only, as for remote +# client, the groups of the specified uid on MDS will be obtained by +# upcall /sbin/l_getidentity and used. +test_4() { + mkdir $DIR/d4 + chmod 771 $DIR/d4 + chgrp 500 $DIR/d4 + $RUNAS -u 500 -G1,2,500 ls $DIR/d4 || error "setgroups failed" + rm -rf $DIR/d4 +} +run_test 4 "set supplementary group ===============" + +log "cleanup: ======================================================" +if [ "$I_MOUNTED" = "yes" ]; then + llmountcleanup.sh || error "cleanup failed" +fi + +echo '=========================== finished ===============================' diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index e7aed66..29974ed 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -18,7 +18,7 @@ noinst_PROGRAMS = llog_reader lr_reader wirecheck wiretest lload obdio obdbarrie # mount only finds helpers in /sbin rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \ - l_getgroups llverfs llverdev + l_getidentity l_facl llverfs llverdev bin_PROGRAMS = lfs req_layout sbin_SCRIPTS = $(sbin_scripts) endif # UTILS @@ -87,6 +87,14 @@ tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS) tunefs_lustre_LDADD := $(mkfs_lustre_LDADD) tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES) +l_getidentity_SOURCES = l_getidentity.c +l_getidentity_LDADD := $(LIBPTLCTL) +l_getidentity_DEPENDENCIES := $(LIBPTLCTL) + +l_facl_SOURCES = l_facl.c +l_facl_LDADD := liblustreapi.a +l_facl_DEPENDENCIES := liblustreapi.a + EXTRA_DIST = $(sbin_scripts) # NOTE: this should only be run on i386. diff --git a/lustre/utils/l_facl.c b/lustre/utils/l_facl.c new file mode 100644 index 0000000..fb1f023 --- /dev/null +++ b/lustre/utils/l_facl.c @@ -0,0 +1,242 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "obdctl.h" + +static char *progname; + +static void usage(void) +{ + fprintf(stderr, + "\nusage: %s {mdsname} {ino} {handle} {cmd}\n" + "Normally invoked as an upcall from Lustre, set via:\n" + " /proc/fs/lustre/mds/{mdsname}/rmtacl_upcall\n", + progname); +} + +static inline void show_result(struct rmtacl_downcall_data *data) +{ + fprintf(stdout, "buflen %d\n\n%s\n", data->add_buflen, data->add_buf); +} + +#define MDS_ERR "server processing error" + +static void errlog(char *buf, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); +} + +static char *get_lustre_mount(void) +{ + FILE *fp; + struct mntent *mnt; + static char mntpath[PATH_MAX] = ""; + + fp = setmntent(MOUNTED, "r"); + if (fp == NULL) { + fprintf(stderr, "setmntent %s failed: %s\n", + MOUNTED, strerror(errno)); + return NULL; + } + + while (1) { + mnt = getmntent(fp); + if (!mnt) + break; + + if (!llapi_is_lustre_mnttype(mnt)) + continue; + + if (strstr(mnt->mnt_fsname, ":/lustre")) { + /* save the mountpoint dir part */ + strncpy(mntpath, mnt->mnt_dir, sizeof(mntpath)); + endmntent(fp); + return mntpath; + } + } + endmntent(fp); + + return NULL; +} + +int main(int argc, char **argv) +{ + struct rmtacl_downcall_data *data; + char procname[1024], *buf, *mntpath; + int out_pipe[2], err_pipe[2], pid, size, buflen, fd, rc; + + progname = basename(argv[0]); + + if (argc != 5) { + usage(); + return 1; + } + + size = offsetof(struct rmtacl_downcall_data, add_buf[RMTACL_SIZE_MAX]); + data = malloc(size); + if (!data) { + fprintf(stderr, "malloc %d failed\n", size); + return 1; + } + memset(data, 0, size); + data->add_magic = RMTACL_DOWNCALL_MAGIC; + data->add_ino = strtoll(argv[2], NULL, 10); + data->add_handle = strtoul(argv[3], NULL, 10); + buf = data->add_buf; + + mntpath = get_lustre_mount(); + if (!mntpath) { + errlog(buf, MDS_ERR"(no lustre mounted on MDS)\n"); + goto downcall; + } + + /* create pipe */ + if (pipe(out_pipe) < 0 || pipe(err_pipe) < 0) { + errlog(buf, MDS_ERR"(pipe failed): %s\n", strerror(errno)); + goto downcall; + } + + if ((pid = fork()) < 0) { + errlog(buf, MDS_ERR"(fork failed): %s\n", strerror(errno)); + goto downcall; + } else if (pid == 0) { + close(out_pipe[0]); + if (out_pipe[1] != STDOUT_FILENO) { + dup2(out_pipe[1], STDOUT_FILENO); + close(out_pipe[1]); + } + close(err_pipe[0]); + if (err_pipe[1] != STDERR_FILENO) { + dup2(err_pipe[1], STDERR_FILENO); + close(err_pipe[1]); + } + close(STDIN_FILENO); + + if (chdir(mntpath) < 0) { + fprintf(stderr, "chdir %s failed: %s\n", + mntpath, strerror(errno)); + return 1; + } + + execl("/bin/sh", "sh", "-c", argv[4], NULL); + fprintf(stderr, "execl %s failed: %s\n", + argv[4], strerror(errno)); + + return 1; + } + + /* parent process handling */ + close(out_pipe[1]); + close(err_pipe[1]); + + buflen = 0; + while (1) { + rc = read(out_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen); + if (rc < 0) { + errlog(buf, MDS_ERR"(read failed): %s\n", + strerror(errno)); + break; + } + if (rc == 0) + break; + buflen += rc; + if (buflen >= RMTACL_SIZE_MAX) + break; + } + + if (buflen != 0) { + wait(&rc); + goto downcall; + } + + while (1) { + rc = read(err_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen); + if (rc < 0) { + errlog(buf, MDS_ERR"(read failed): %s\n", + strerror(errno)); + break; + } + if (rc == 0) + break; + buflen += rc; + if (buflen >= RMTACL_SIZE_MAX) + break; + } + + wait(&rc); + +downcall: + buf[RMTACL_SIZE_MAX - 1] = 0; + data->add_buflen = strlen(buf) + 1; + if (getenv("L_FACL_TEST")) { + show_result(data); + free(data); + return 0; + } + + snprintf(procname, sizeof(procname), + "/proc/fs/lustre/mds/%s/rmtacl_info", argv[1]); + fd = open(procname, O_WRONLY); + if (fd < 0) { + fprintf(stderr, "open %s failed: %s\n", + procname, strerror(errno)); + free(data); + return 1; + } + + buflen = offsetof(struct rmtacl_downcall_data, + add_buf[data->add_buflen]); + rc = write(fd, data, buflen); + close(fd); + if (rc != buflen) { + fprintf(stderr, "write %s len %d return %d: %s\n", + procname, buflen, rc, strerror(errno)); + free(data); + return 1; + } + + free(data); + return 0; +} diff --git a/lustre/utils/l_getidentity.c b/lustre/utils/l_getidentity.c new file mode 100644 index 0000000..b0cf680 --- /dev/null +++ b/lustre/utils/l_getidentity.c @@ -0,0 +1,356 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define SETXID_PATHNAME "/etc/lustre/setxid.conf" + +/* setxid permission file format is like this: + * {nid} {uid} {perms} + * the valid values for perms are setuid/setgid/setgrp, and they can be listed + * together, seperated by ','. + */ + +static char *progname; + +static void usage(void) +{ + fprintf(stderr, + "\nusage: %s {mdsname} {uid}\n" + "Normally invoked as an upcall from Lustre, set via:\n" + " /proc/fs/lustre/mds/{mdsname}/identity_upcall\n", + progname); +} + +static int compare_u32(const void *v1, const void *v2) +{ + return (*(__u32 *)v1 - *(__u32 *)v2); +} + +static void errlog(const char *fmt, ...) +{ + va_list args; + + openlog(progname, LOG_PERROR, LOG_AUTHPRIV); + + va_start(args, fmt); + vsyslog(LOG_NOTICE, fmt, args); + fprintf(stderr, fmt, args); + va_end(args); + + closelog(); +} + +int get_groups_local(struct identity_downcall_data *data) +{ + int maxgroups; + gid_t *groups; + unsigned int ngroups = 0; + struct passwd *pw; + struct group *gr; + int i; + + pw = getpwuid(data->idd_uid); + if (!pw) { + errlog("no such user %u\n", data->idd_uid); + data->idd_err = errno ? errno : EIDRM; + return -1; + } + data->idd_gid = pw->pw_gid; + + maxgroups = sysconf(_SC_NGROUPS_MAX); + if (maxgroups > NGROUPS_MAX) + maxgroups = NGROUPS_MAX; + groups = data->idd_groups; + + groups[ngroups++] = pw->pw_gid; + while ((gr = getgrent())) { + if (gr->gr_gid == pw->pw_gid) + continue; + if (!gr->gr_mem) + continue; + for (i = 0; gr->gr_mem[i]; i++) { + if (!strcmp(gr->gr_mem[i], pw->pw_name)) { + groups[ngroups++] = gr->gr_gid; + break; + } + } + if (ngroups == maxgroups) + break; + } + endgrent(); + qsort(groups, ngroups, sizeof(*groups), compare_u32); + data->idd_ngroups = ngroups; + + return 0; +} + +static inline int comment_line(char *line) +{ + char *p = line; + + while (*p && (*p == ' ' || *p == '\t')) p++; + + if (!*p || *p == '\n' || *p == '#') + return 1; + return 0; +} + +static inline int match_uid(uid_t uid, const char *str) +{ + char *end; + uid_t uid2; + + uid2 = strtoul(str, &end, 0); + if (*end) + return 0; + + return (uid == uid2); +} + +static struct setxid_perm_type_t { + char *name; + __u32 bit; +} setxid_perm_types[] = { + { "setuid", LUSTRE_SETUID_PERM }, + { "setgid", LUSTRE_SETGID_PERM }, + { "setgrp", LUSTRE_SETGRP_PERM }, + { NULL }, +}; + +int parse_setxid_perm(__u32 *perm, char *str) +{ + char *start, *end; + char name[64]; + struct setxid_perm_type_t *pt; + + *perm = 0; + start = str; + while (1) { + memset(name, 0, sizeof(name)); + end = strchr(start, ','); + if (!end) + end = str + strlen(str); + if (start >= end) + break; + strncpy(name, start, end - start); + for (pt = setxid_perm_types; pt->name; pt++) { + if (!strcasecmp(name, pt->name)) { + *perm |= pt->bit; + break; + } + } + + if (!pt->name) { + printf("unkown perm type: %s\n", name); + return -1; + } + + start = end + 1; + } + return 0; +} + +int parse_setxid_perm_line(struct identity_downcall_data *data, char *line) +{ + char uid_str[256], nid_str[256], perm_str[256]; + lnet_nid_t nid; + __u32 perm; + struct setxid_perm_downcall_data *pdd = + &data->idd_perms[data->idd_nperms]; + int rc, i; + + if (data->idd_nperms >= N_SETXID_PERMS_MAX) { + errlog("setxid permission count %d > max %d\n", + data->idd_nperms, N_SETXID_PERMS_MAX); + return -1; + } + + rc = sscanf(line, "%s %s %s", nid_str, uid_str, perm_str); + if (rc != 3) { + errlog("can't parse line %s\n", line); + return -1; + } + + if (!match_uid(data->idd_uid, uid_str)) + return 0; + + if (!strcmp(nid_str, "*")) { + nid = LNET_NID_ANY; + } else { + nid = libcfs_str2nid(nid_str); + if (nid == LNET_NID_ANY) { + errlog("can't parse nid %s\n", nid_str); + return -1; + } + } + + if (parse_setxid_perm(&perm, perm_str)) { + errlog("invalid setxid perm %s\n", perm_str); + return -1; + } + + /* merge the perms with the same nid */ + for (i = 0; i < data->idd_nperms; i++) { + if (data->idd_perms[i].pdd_nid == nid) { + data->idd_perms[i].pdd_perm |= perm; + return 0; + } + } + + pdd->pdd_nid = nid; + pdd->pdd_perm = perm; + data->idd_nperms++; + return 0; +} + +int get_setxid_perms(FILE *fp, struct identity_downcall_data *data) +{ + char line[1024]; + + while (fgets(line, 1024, fp)) { + if (comment_line(line)) + continue; + + if (parse_setxid_perm_line(data, line)) { + errlog("parse line %s failed!\n", line); + return -1; + } + } + + return 0; +} + +static void show_result(struct identity_downcall_data *data) +{ + int i; + + if (data->idd_err) { + errlog("failed to get identity for uid %d: %s\n", + data->idd_uid, strerror(data->idd_err)); + return; + } + + printf("uid=%d gid=", data->idd_uid); + for (i = 0; i < data->idd_ngroups; i++) + printf("%s%u", i > 0 ? "," : "", data->idd_groups[i]); + printf("\n"); + printf("setxid permissions:\n" + " nid\t\t\tperm\n"); + for (i = 0; i < data->idd_nperms; i++) { + struct setxid_perm_downcall_data *pdd; + + pdd = &data->idd_perms[i]; + + printf(" %#llx\t0x%x\n", pdd->pdd_nid, pdd->pdd_perm); + } + printf("\n"); +} + +int main(int argc, char **argv) +{ + FILE *perms_fp; + char *end; + struct identity_downcall_data *data; + char procname[1024]; + unsigned long uid; + int fd, rc; + + progname = basename(argv[0]); + + if (argc != 3) { + usage(); + return 1; + } + + uid = strtoul(argv[2], &end, 0); + if (*end) { + errlog("%s: invalid uid '%s'\n", progname, argv[2]); + usage(); + return 1; + } + + data = malloc(sizeof(*data)); + if (!data) { + errlog("malloc identity downcall data(%d) failed!\n", + sizeof(*data)); + return 1; + } + memset(data, 0, sizeof(*data)); + data->idd_magic = IDENTITY_DOWNCALL_MAGIC; + data->idd_uid = uid; + + /* get groups for uid */ + rc = get_groups_local(data); + if (rc) + goto downcall; + + /* read permission database */ + perms_fp = fopen(SETXID_PATHNAME, "r"); + if (perms_fp) { + get_setxid_perms(perms_fp, data); + fclose(perms_fp); + } else if (errno != ENOENT) { + errlog("open %s failed: %s\n", + SETXID_PATHNAME, strerror(errno)); + } + +downcall: + if (getenv("L_GETIDENTITY_TEST")) { + show_result(data); + return 0; + } + + snprintf(procname, sizeof(procname), + "/proc/fs/lustre/mds/%s/identity_info", argv[1]); + fd = open(procname, O_WRONLY); + if (fd < 0) { + errlog("can't open file %s: %s\n", procname, strerror(errno)); + return 1; + } + + rc = write(fd, data, sizeof(*data)); + close(fd); + if (rc != sizeof(*data)) { + errlog("partial write ret %d: %s\n", rc, strerror(errno)); + return 1; + } + + return 0; +} diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 2e35469..8fa3500 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,8 @@ static int lfs_quota(int argc, char **argv); #endif static int lfs_flushctx(int argc, char **argv); static int lfs_join(int argc, char **argv); +static int lfs_getfacl(int argc, char **argv); +static int lfs_setfacl(int argc, char **argv); /* all avaialable commands */ command_t cmdlist[] = { @@ -134,6 +137,12 @@ command_t cmdlist[] = { #endif {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n" "usage: flushctx [-k] [mountpoint...]"}, + {"getfacl", lfs_getfacl, 0, + "Get file access control list in remote client.\n" + "usage: getfacl [-dRLPvh] file"}, + {"setfacl", lfs_setfacl, 0, + "Set file access control list in remote client.\n" + "usage: setfacl [-bkndRLPvh] [{-m|-x} acl_spec] [{-M|-X} acl_file] file"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, @@ -1567,6 +1576,119 @@ static int lfs_flushctx(int argc, char **argv) return rc; } +/* + * We assume one and only one filename is supplied as the + * last parameter. + */ +static int acl_cmd_parse(int argc, char **argv, char *fname, char *cmd) +{ + char *dname, *rpath = NULL; + char path[PATH_MAX], cwd[PATH_MAX]; + FILE *fp; + struct mntent *mnt; + int i; + + if (argc < 2) + return -1; + + /* FIXME the premise is there is no sub-mounted filesystems under this + * mounted lustre tree. */ + strncpy(fname, argv[argc - 1], PATH_MAX); + + /* get path prefix */ + dname = dirname(fname); + + /* try to resolve the pathname into relative to the root of the mounted + * lustre filesystem. + */ + if (getcwd(cwd, sizeof(cwd)) == NULL) { + fprintf(stderr, "getcwd %s failed: %s\n", cwd, strerror(errno)); + return -1; + } + + if (chdir(dname) == -1) { + fprintf(stderr, "chdir to %s failed: %s\n", + dname, strerror(errno)); + return -1; + } + + if (getcwd(path, sizeof(path)) == NULL) { + fprintf(stderr, "getcwd %s: %s\n", path, strerror(errno)); + return -1; + } + + if (chdir(cwd) == -1) { + fprintf(stderr, "chdir back to %s: %s\n", + cwd, strerror(errno)); + return -1; + } + + strncat(path, "/", PATH_MAX); + strncpy(fname, argv[argc - 1], PATH_MAX); + strncat(path, basename(fname), PATH_MAX); + + fp = setmntent(MOUNTED, "r"); + if (fp == NULL) { + fprintf(stderr, "setmntent %s failed: %s\n", + MOUNTED, strerror(errno)); + return -1; + } + + while (1) { + mnt = getmntent(fp); + if (!mnt) + break; + + if (!llapi_is_lustre_mnttype(mnt)) + continue; + + if (!strncmp(mnt->mnt_dir, path, strlen(mnt->mnt_dir))) { + rpath = path + strlen(mnt->mnt_dir); + break; + } + } + endmntent(fp); + + /* remove char '/' from rpath to be a relative path */ + while (rpath && *rpath == '/') rpath++; + + if (!rpath) { + fprintf(stderr, + "%s: file %s doesn't belong to a lustre file system!\n", + argv[0], argv[argc - 1]); + return -1; + } + + for (i = 0; i < argc - 1; i++) { + strncat(cmd, argv[i], PATH_MAX); + strncat(cmd, " ", PATH_MAX); + } + strncat(cmd, *rpath ? rpath : ".", PATH_MAX); + strncpy(fname, argv[argc - 1], sizeof(fname)); + + return 0; +} + +static int lfs_getfacl(int argc, char **argv) +{ + char fname[PATH_MAX] = "", cmd[PATH_MAX] = ""; + + if (acl_cmd_parse(argc, argv, fname, cmd)) + return CMD_HELP; + + return llapi_getfacl(fname, cmd); +} + +static int lfs_setfacl(int argc, char **argv) +{ + char fname[PATH_MAX] = "", cmd[PATH_MAX] = ""; + + if (acl_cmd_parse(argc, argv, fname, cmd)) + return CMD_HELP; + + return llapi_setfacl(fname, cmd); +} + int main(int argc, char **argv) { int rc; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 53f5c67..1499ca0 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -56,7 +56,6 @@ #include #include #include -#include static void err_msg(char *fmt, ...) { @@ -1253,3 +1252,59 @@ out: find_param_fini(¶m); return ret; } + +int llapi_getfacl(char *fname, char *cmd) +{ + struct rmtacl_ioctl_data data; + char out[RMTACL_SIZE_MAX] = ""; + int fd, rc; + + data.cmd = cmd; + data.cmd_len = strlen(cmd) + 1; + data.res = out; + data.res_len = sizeof(out); + + fd = open(fname, 0); + if (fd == -1) { + err_msg("open %s failed", fname); + return -1; + } + + rc = ioctl(fd, LL_IOC_GETFACL, &data); + close(fd); + if (rc) { + err_msg("getfacl %s failed", fname); + } else { + printf("%s", out); + } + + return rc; +} + +int llapi_setfacl(char *fname, char *cmd) +{ + struct rmtacl_ioctl_data data; + char out[RMTACL_SIZE_MAX] = ""; + int fd, rc; + + data.cmd = cmd; + data.cmd_len = strlen(cmd) + 1; + data.res = out; + data.res_len = sizeof(out); + + fd = open(fname, 0); + if (fd == -1) { + err_msg("open %s failed", fname); + return -1; + } + + rc = ioctl(fd, LL_IOC_SETFACL, &data); + close(fd); + if (rc) { + err_msg("setfacl %s failed", fname); + } else { + printf("%s", out); + } + + return rc; +} -- 1.8.3.1