Whamcloud - gitweb
(1) Remote/local user
authorfanyong <fanyong>
Sun, 24 Sep 2006 14:11:02 +0000 (14:11 +0000)
committerfanyong <fanyong>
Sun, 24 Sep 2006 14:11:02 +0000 (14:11 +0000)
(2) Permission check

73 files changed:
lustre/cmm/cmm_device.c
lustre/cmm/cmm_internal.h
lustre/cmm/cmm_object.c
lustre/cmm/cmm_split.c
lustre/cmm/mdc_internal.h
lustre/cmm/mdc_object.c
lustre/include/linux/lvfs.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre_cfg.h
lustre/include/lustre_disk.h
lustre/include/lustre_dlm.h
lustre/include/lustre_export.h
lustre/include/lustre_lib.h
lustre/include/lustre_mdt.h
lustre/include/lustre_net.h
lustre/include/lustre_param.h
lustre/include/lustre_ucache.h
lustre/include/md_object.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/ldlm/ldlm_lockd.c
lustre/llite/Makefile.in
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/llite/remote_perm.c [new file with mode: 0644]
lustre/llite/super.c
lustre/llite/super25.c
lustre/lmv/lmv_obd.c
lustre/lvfs/lvfs_linux.c
lustre/lvfs/upcall_cache.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/mdd/mdd_handler.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_lov.c
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_lib.c
lustre/mds/mds_reint.c
lustre/mds/mds_xattr.c
lustre/mdt/Makefile.in
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_identity.c [new file with mode: 0644]
lustre/mdt/mdt_idmap.c [new file with mode: 0644]
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c
lustre/mdt/mdt_rmtacl.c [new file with mode: 0644]
lustre/mdt/mdt_xattr.c
lustre/mgs/mgs_llog.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_mount.c
lustre/ptlrpc/gss/sec_gss.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/tests/cfg/local.sh
lustre/tests/cfg/lov.sh
lustre/tests/runas.c
lustre/tests/sanity-sec.sh [new file with mode: 0644]
lustre/utils/Makefile.am
lustre/utils/l_facl.c [new file with mode: 0644]
lustre/utils/l_getidentity.c [new file with mode: 0644]
lustre/utils/lfs.c
lustre/utils/liblustreapi.c

index 2b6c74e..8a996aa 100644 (file)
@@ -51,37 +51,37 @@ static inline int lu_device_is_cmm(struct lu_device *d)
        return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops);
 }
 
-static int cmm_root_get(const struct lu_context *ctx, struct md_device *md,
-                 struct lu_fid *fid)
+int cmm_root_get(const struct lu_context *ctx, struct md_device *md,
+                 struct lu_fid *fid, struct md_ucred *uc)
 {
         struct cmm_device *cmm_dev = md2cmm_dev(md);
         /* valid only on master MDS */
         if (cmm_dev->cmm_local_num == 0)
                 return cmm_child_ops(cmm_dev)->mdo_root_get(ctx,
-                                     cmm_dev->cmm_child, fid);
+                                     cmm_dev->cmm_child, fid, uc);
         else
                 return -EINVAL;
 }
 
 static int cmm_statfs(const struct lu_context *ctxt, struct md_device *md,
-                      struct kstatfs *sfs) {
+                      struct kstatfs *sfs, struct md_ucred *uc) {
         struct cmm_device *cmm_dev = md2cmm_dev(md);
        int rc;
 
         ENTRY;
         rc = cmm_child_ops(cmm_dev)->mdo_statfs(ctxt,
-                                                cmm_dev->cmm_child, sfs);
+                                                cmm_dev->cmm_child, sfs, uc);
         RETURN (rc);
 }
 
 static int cmm_maxsize_get(const struct lu_context *ctxt, struct md_device *md,
-                           int *md_size, int *cookie_size)
+                           int *md_size, int *cookie_size, struct md_ucred *uc)
 {
         struct cmm_device *cmm_dev = md2cmm_dev(md);
         int rc;
         ENTRY;
-        rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(ctxt,
-                                     cmm_dev->cmm_child, md_size, cookie_size);
+        rc = cmm_child_ops(cmm_dev)->mdo_maxsize_get(ctxt, cmm_dev->cmm_child,
+                                                     md_size, cookie_size, uc);
         RETURN(rc);
 }
 
index a429981..8e4a15b 100644 (file)
@@ -127,7 +127,8 @@ int cmm_upcall(const struct lu_context *ctxt, struct md_device *md,
                enum md_upcall_event ev);
 #ifdef HAVE_SPLIT_SUPPORT
 /* cmm_split.c */
-int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo);
+int cml_try_to_split(const struct lu_context *ctx,
+                     struct md_object *mo, struct md_ucred *uc);
 #endif
 
 #endif /* __KERNEL__ */
index 1b88938..77f8115 100644 (file)
@@ -209,122 +209,136 @@ static struct lu_object_operations cml_obj_ops = {
 static int cml_object_create(const struct lu_context *ctx,
                              struct md_object *mo,
                              const struct md_create_spec *spec,
-                             struct md_attr *attr)
+                             struct md_attr *attr,
+                             struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_object_create(ctx, md_object_next(mo), spec, attr);
+        rc = mo_object_create(ctx, md_object_next(mo), spec, attr, uc);
+        RETURN(rc);
+}
+
+static int cml_permission(const struct lu_context *ctx,
+                        struct md_object *mo, int mask, struct md_ucred *uc)
+{
+        int rc;
+        ENTRY;
+        rc = mo_permission(ctx, md_object_next(mo), mask, uc);
         RETURN(rc);
 }
 
 static int cml_attr_get(const struct lu_context *ctx, struct md_object *mo,
-                        struct md_attr *attr)
+                        struct md_attr *attr, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_attr_get(ctx, md_object_next(mo), attr);
+        rc = mo_attr_get(ctx, md_object_next(mo), attr, uc);
         RETURN(rc);
 }
 
 static int cml_attr_set(const struct lu_context *ctx, struct md_object *mo,
-                        const struct md_attr *attr)
+                        const struct md_attr *attr, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_attr_set(ctx, md_object_next(mo), attr);
+        rc = mo_attr_set(ctx, md_object_next(mo), attr, uc);
         RETURN(rc);
 }
 
 static int cml_xattr_get(const struct lu_context *ctx, struct md_object *mo,
-                         void *buf, int buflen, const char *name)
+                         void *buf, int buflen, const char *name,
+                         struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_xattr_get(ctx, md_object_next(mo), buf, buflen, name);
+        rc = mo_xattr_get(ctx, md_object_next(mo), buf, buflen, name, uc);
         RETURN(rc);
 }
 
 static int cml_readlink(const struct lu_context *ctx, struct md_object *mo,
-                        void *buf, int buflen)
+                        void *buf, int buflen, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_readlink(ctx, md_object_next(mo), buf, buflen);
+        rc = mo_readlink(ctx, md_object_next(mo), buf, buflen, uc);
         RETURN(rc);
 }
 
 static int cml_xattr_list(const struct lu_context *ctx, struct md_object *mo,
-                          void *buf, int buflen)
+                          void *buf, int buflen, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_xattr_list(ctx, md_object_next(mo), buf, buflen);
+        rc = mo_xattr_list(ctx, md_object_next(mo), buf, buflen, uc);
         RETURN(rc);
 }
 
 static int cml_xattr_set(const struct lu_context *ctx, struct md_object *mo,
-                         const void *buf, int buflen, const char *name, int fl)
+                         const void *buf, int buflen,
+                         const char *name, int fl, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_xattr_set(ctx, md_object_next(mo), buf, buflen, name, fl);
+        rc = mo_xattr_set(ctx, md_object_next(mo), buf, buflen, name, fl, uc);
         RETURN(rc);
 }
 
 static int cml_xattr_del(const struct lu_context *ctx, struct md_object *mo,
-                         const char *name)
+                         const char *name, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_xattr_del(ctx, md_object_next(mo), name);
+        rc = mo_xattr_del(ctx, md_object_next(mo), name, uc);
         RETURN(rc);
 }
 
-static int cml_ref_add(const struct lu_context *ctx, struct md_object *mo)
+static int cml_ref_add(const struct lu_context *ctx, struct md_object *mo,
+                       struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_ref_add(ctx, md_object_next(mo));
+        rc = mo_ref_add(ctx, md_object_next(mo), uc);
         RETURN(rc);
 }
 
 static int cml_ref_del(const struct lu_context *ctx, struct md_object *mo,
-                       struct md_attr *ma)
+                       struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_ref_del(ctx, md_object_next(mo), ma);
+        rc = mo_ref_del(ctx, md_object_next(mo), ma, uc);
         RETURN(rc);
 }
 
 static int cml_open(const struct lu_context *ctx, struct md_object *mo,
-                    int flags)
+                    int flags, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_open(ctx, md_object_next(mo), flags);
+        rc = mo_open(ctx, md_object_next(mo), flags, uc);
         RETURN(rc);
 }
 
 static int cml_close(const struct lu_context *ctx, struct md_object *mo,
-                     struct md_attr *ma)
+                     struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_close(ctx, md_object_next(mo), ma);
+        rc = mo_close(ctx, md_object_next(mo), ma, uc);
         RETURN(rc);
 }
 
 static int cml_readpage(const struct lu_context *ctxt, struct md_object *mo,
-                        const struct lu_rdpg *rdpg)
+                        const struct lu_rdpg *rdpg, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mo_readpage(ctxt, md_object_next(mo), rdpg);
+        rc = mo_readpage(ctxt, md_object_next(mo), rdpg, uc);
         RETURN(rc);
 }
 
 static struct md_object_operations cml_mo_ops = {
+        .moo_permission    = cml_permission,
         .moo_attr_get      = cml_attr_get,
         .moo_attr_set      = cml_attr_set,
         .moo_xattr_get     = cml_xattr_get,
@@ -342,67 +356,67 @@ static struct md_object_operations cml_mo_ops = {
 
 /* md_dir operations */
 static int cml_lookup(const struct lu_context *ctx, struct md_object *mo_p,
-                      const char *name, struct lu_fid *lf)
+                      const char *name, struct lu_fid *lf, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
-        rc = mdo_lookup(ctx, md_object_next(mo_p), name, lf);
+        rc = mdo_lookup(ctx, md_object_next(mo_p), name, lf, uc);
         RETURN(rc);
 
 }
 
-static int cml_create(const struct lu_context *ctx, struct md_object *mo_p,
-                      const char *child_name, struct md_object *mo_c,
-                      const struct md_create_spec *spec,
-                      struct md_attr *ma)
+static int cml_create(const struct lu_context *ctx,
+                      struct md_object *mo_p, const char *child_name,
+                      struct md_object *mo_c, const struct md_create_spec *spec,
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
 #ifdef HAVE_SPLIT_SUPPORT
-        rc = cml_try_to_split(ctx, mo_p);
+        rc = cml_try_to_split(ctx, mo_p, uc);
         if (rc)
                 RETURN(rc);
 #endif
 
         rc = mdo_create(ctx, md_object_next(mo_p), child_name,
-                        md_object_next(mo_c), spec, ma);
+                        md_object_next(mo_c), spec, ma, uc);
 
 
         RETURN(rc);
 }
 
-static int cml_create_data(const struct lu_context *ctx,
-                           struct md_object *p, struct md_object *o,
+static int cml_create_data(const struct lu_context *ctx, struct md_object *p,
+                           struct md_object *o,
                            const struct md_create_spec *spec,
-                           struct md_attr *ma)
+                           struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
         rc = mdo_create_data(ctx, md_object_next(p), md_object_next(o),
-                             spec, ma);
+                             spec, ma, uc);
         RETURN(rc);
 }
 
 static int cml_link(const struct lu_context *ctx, struct md_object *mo_p,
                     struct md_object *mo_s, const char *name,
-                    struct md_attr *ma)
+                    struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
         rc = mdo_link(ctx, md_object_next(mo_p), md_object_next(mo_s),
-                      name, ma);
+                      name, ma, uc);
         RETURN(rc);
 }
 
 static int cml_unlink(const struct lu_context *ctx, struct md_object *mo_p,
                       struct md_object *mo_c, const char *name,
-                      struct md_attr *ma)
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
         rc = mdo_unlink(ctx, md_object_next(mo_p), md_object_next(mo_c),
-                        name, ma);
+                        name, ma, uc);
         RETURN(rc);
 }
 
@@ -426,7 +440,8 @@ struct md_object *md_object_find(const struct lu_context *ctx,
 }
 
 static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md,
-                          const struct lu_fid *lf, struct md_attr *ma)
+                          const struct lu_fid *lf, struct md_attr *ma,
+                          struct md_ucred *uc)
 {
         struct cmm_thread_info *cmi;
         struct md_object *mo_s = md_object_find(ctx, md, lf);
@@ -443,7 +458,7 @@ static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md,
         tmp_ma->ma_need = MA_INODE;
         
         /* get type from src, can be remote req */
-        rc = mo_attr_get(ctx, md_object_next(mo_s), tmp_ma);
+        rc = mo_attr_get(ctx, md_object_next(mo_s), tmp_ma, uc);
         if (rc == 0) {
                 ma->ma_attr.la_mode = tmp_ma->ma_attr.la_mode;
                 ma->ma_attr.la_flags = tmp_ma->ma_attr.la_flags;
@@ -454,74 +469,76 @@ static int __cmm_mode_get(const struct lu_context *ctx, struct md_device *md,
 }
 
 static int cml_rename(const struct lu_context *ctx, struct md_object *mo_po,
-                       struct md_object *mo_pn, const struct lu_fid *lf,
-                       const char *s_name, struct md_object *mo_t,
-                       const char *t_name, struct md_attr *ma)
+                      struct md_object *mo_pn, const struct lu_fid *lf,
+                      const char *s_name, struct md_object *mo_t,
+                      const char *t_name, struct md_attr *ma,
+                      struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
-        rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma);
+        rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma, uc);
         if (rc != 0)
                 RETURN(rc);
 
         if (mo_t && lu_object_exists(&mo_t->mo_lu) < 0) {
                 /* mo_t is remote object and there is RPC to unlink it */
-                rc = mo_ref_del(ctx, md_object_next(mo_t), ma);
+                rc = mo_ref_del(ctx, md_object_next(mo_t), ma, uc);
                 if (rc)
                         RETURN(rc);
                 mo_t = NULL;
         }
-        
+
         /* local rename, mo_t can be NULL */
         rc = mdo_rename(ctx, md_object_next(mo_po),
                         md_object_next(mo_pn), lf, s_name,
-                        md_object_next(mo_t), t_name, ma);
+                        md_object_next(mo_t), t_name, ma, uc);
         RETURN(rc);
 }
 
-static int cml_rename_tgt(const struct lu_context *ctx,
-                          struct md_object *mo_p, struct md_object *mo_t,
-                          const struct lu_fid *lf, const char *name,
-                          struct md_attr *ma)
+static int cml_rename_tgt(const struct lu_context *ctx, struct md_object *mo_p,
+                          struct md_object *mo_t, const struct lu_fid *lf,
+                          const char *name, struct md_attr *ma,
+                          struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
         rc = mdo_rename_tgt(ctx, md_object_next(mo_p),
-                            md_object_next(mo_t), lf, name, ma);
+                            md_object_next(mo_t), lf, name, ma, uc);
         RETURN(rc);
 }
 /* used only in case of rename_tgt() when target is not exist */
-static int cml_name_insert(const struct lu_context *ctx,
-                           struct md_object *p, const char *name,
-                           const struct lu_fid *lf, int isdir)
+static int cml_name_insert(const struct lu_context *ctx, struct md_object *p,
+                           const char *name, const struct lu_fid *lf, int isdir,
+                           struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
-        rc = mdo_name_insert(ctx, md_object_next(p), name, lf, isdir);
+        rc = mdo_name_insert(ctx, md_object_next(p), name, lf, isdir, uc);
 
         RETURN(rc);
 }
 
 /* Common method for remote and local use. */
 static int cmm_is_subdir(const struct lu_context *ctx, struct md_object *mo,
-                         const struct lu_fid *fid, struct lu_fid *sfid)
+                         const struct lu_fid *fid, struct lu_fid *sfid,
+                         struct md_ucred *uc)
 {
         struct cmm_thread_info *cmi;
         int rc;
         ENTRY;
 
         cmi = lu_context_key_get(ctx, &cmm_thread_key);
-        rc = __cmm_mode_get(ctx, md_obj2dev(mo), fid, &cmi->cmi_ma);
+        rc = __cmm_mode_get(ctx, md_obj2dev(mo), fid, &cmi->cmi_ma, uc);
         if (rc)
                 RETURN(rc);
 
         if (!S_ISDIR(cmi->cmi_ma.ma_attr.la_mode))
                 RETURN(0);
         
-        rc = mdo_is_subdir(ctx, md_object_next(mo), fid, sfid);
+        rc = mdo_is_subdir(ctx, md_object_next(mo), fid, sfid, uc);
         RETURN(rc);
 }
 
@@ -621,83 +638,94 @@ static struct lu_object_operations cmr_obj_ops = {
 static int cmr_object_create(const struct lu_context *ctx,
                              struct md_object *mo,
                              const struct md_create_spec *spec,
-                             struct md_attr *ma)
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
+static int cmr_permission(const struct lu_context *ctx, struct md_object *mo,
+                          int mask, struct md_ucred *uc)
+{
+        RETURN(-EREMOTE);
+}
+
 static int cmr_attr_get(const struct lu_context *ctx, struct md_object *mo,
-                        struct md_attr *attr)
+                        struct md_attr *attr, struct md_ucred *uc)
 {
         RETURN(-EREMOTE);
 }
 
 static int cmr_attr_set(const struct lu_context *ctx, struct md_object *mo,
-                        const struct md_attr *attr)
+                        const struct md_attr *attr, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_xattr_get(const struct lu_context *ctx, struct md_object *mo,
-                         void *buf, int buflen, const char *name)
+                         void *buf, int buflen, const char *name,
+                         struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_readlink(const struct lu_context *ctx, struct md_object *mo,
-                        void *buf, int buflen)
+                        void *buf, int buflen, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_xattr_list(const struct lu_context *ctx, struct md_object *mo,
-                          void *buf, int buflen)
+                          void *buf, int buflen, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_xattr_set(const struct lu_context *ctx, struct md_object *mo,
-                         const void *buf, int buflen, const char *name, int fl)
+                         const void *buf, int buflen, const char *name, int fl,
+                         struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_xattr_del(const struct lu_context *ctx, struct md_object *mo,
-                         const char *name)
+                         const char *name, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
-static int cmr_ref_add(const struct lu_context *ctx, struct md_object *mo)
+static int cmr_ref_add(const struct lu_context *ctx, struct md_object *mo,
+                       struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_ref_del(const struct lu_context *ctx, struct md_object *mo,
-                       struct md_attr *ma)
+                       struct md_attr *ma, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_open(const struct lu_context *ctx, struct md_object *mo,
-                    int flags)
+                    int flags, struct md_ucred *uc)
 {
         RETURN(-EREMOTE);
 }
 
 static int cmr_close(const struct lu_context *ctx, struct md_object *mo,
-                     struct md_attr *ma)
+                     struct md_attr *ma, struct md_ucred *uc)
 {
         RETURN(-EFAULT);
 }
 
 static int cmr_readpage(const struct lu_context *ctxt, struct md_object *mo,
-                        const struct lu_rdpg *rdpg)
+                        const struct lu_rdpg *rdpg, struct md_ucred *uc)
 {
         RETURN(-EREMOTE);
 }
 
 static struct md_object_operations cmr_mo_ops = {
+        .moo_permission    = cmr_permission,
         .moo_attr_get      = cmr_attr_get,
         .moo_attr_set      = cmr_attr_set,
         .moo_xattr_get     = cmr_xattr_get,
@@ -715,7 +743,7 @@ static struct md_object_operations cmr_mo_ops = {
 
 /* remote part of md_dir operations */
 static int cmr_lookup(const struct lu_context *ctx, struct md_object *mo_p,
-                      const char *name, struct lu_fid *lf)
+                      const char *name, struct lu_fid *lf, struct md_ucred *uc)
 {
         /*
          * This can happens while rename() If new parent is remote dir, lookup
@@ -738,7 +766,7 @@ static int cmr_lookup(const struct lu_context *ctx, struct md_object *mo_p,
 static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p,
                       const char *child_name, struct md_object *mo_c,
                       const struct md_create_spec *spec,
-                      struct md_attr *ma)
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         struct cmm_thread_info *cmi;
         struct md_attr *tmp_ma;
@@ -750,7 +778,7 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p,
         LASSERT(cmi);
         tmp_ma = &cmi->cmi_ma;
         tmp_ma->ma_need = MA_INODE;
-        rc = mo_attr_get(ctx, md_object_next(mo_p), tmp_ma);
+        rc = mo_attr_get(ctx, md_object_next(mo_p), tmp_ma, uc);
         if (rc)
                 RETURN(rc);
         
@@ -762,11 +790,11 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p,
                 }
         }
         /* remote object creation and local name insert */
-        rc = mo_object_create(ctx, md_object_next(mo_c), spec, ma);
+        rc = mo_object_create(ctx, md_object_next(mo_c), spec, ma, uc);
         if (rc == 0) {
                 rc = mdo_name_insert(ctx, md_object_next(mo_p),
                                      child_name, lu_object_fid(&mo_c->mo_lu),
-                                     S_ISDIR(ma->ma_attr.la_mode));
+                                     S_ISDIR(ma->ma_attr.la_mode), uc);
         }
 
         RETURN(rc);
@@ -774,17 +802,17 @@ static int cmr_create(const struct lu_context *ctx, struct md_object *mo_p,
 
 static int cmr_link(const struct lu_context *ctx, struct md_object *mo_p,
                     struct md_object *mo_s, const char *name,
-                    struct md_attr *ma)
+                    struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
         //XXX: make sure that MDT checks name isn't exist
 
-        rc = mo_ref_add(ctx, md_object_next(mo_s));
+        rc = mo_ref_add(ctx, md_object_next(mo_s), uc);
         if (rc == 0) {
                 rc = mdo_name_insert(ctx, md_object_next(mo_p),
-                                     name, lu_object_fid(&mo_s->mo_lu), 0);
+                                     name, lu_object_fid(&mo_s->mo_lu), 0, uc);
         }
 
         RETURN(rc);
@@ -792,30 +820,30 @@ static int cmr_link(const struct lu_context *ctx, struct md_object *mo_p,
 
 static int cmr_unlink(const struct lu_context *ctx, struct md_object *mo_p,
                       struct md_object *mo_c, const char *name,
-                      struct md_attr *ma)
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
 
-        rc = mo_ref_del(ctx, md_object_next(mo_c), ma);
+        rc = mo_ref_del(ctx, md_object_next(mo_c), ma, uc);
         if (rc == 0) {
-                rc = mdo_name_remove(ctx, md_object_next(mo_p),
-                                     name);
+                rc = mdo_name_remove(ctx, md_object_next(mo_p), name, uc);
         }
 
         RETURN(rc);
 }
 
-static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po,
-                      struct md_object *mo_pn, const struct lu_fid *lf,
-                      const char *s_name, struct md_object *mo_t,
-                      const char *t_name, struct md_attr *ma)
+static int cmr_rename(const struct lu_context *ctx,
+                      struct md_object *mo_po, struct md_object *mo_pn,
+                      const struct lu_fid *lf, const char *s_name,
+                      struct md_object *mo_t, const char *t_name,
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
         
         /* get real type of src */
-        rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma);
+        rc = __cmm_mode_get(ctx, md_obj2dev(mo_po), lf, ma, uc);
         if (rc != 0)
                 RETURN(rc);
 
@@ -824,11 +852,11 @@ static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po,
          * mo_t or not. Therefore mo_t is NULL here but remote server should do
          * lookup and process this further */
         rc = mdo_rename_tgt(ctx, md_object_next(mo_pn),
-                            NULL/* mo_t */, lf, t_name, ma);
+                            NULL/* mo_t */, lf, t_name, ma, uc);
         /* only old name is removed localy */
         if (rc == 0)
                 rc = mdo_name_remove(ctx, md_object_next(mo_po),
-                                     s_name);
+                                     s_name, uc);
 
         RETURN(rc);
 }
@@ -838,16 +866,16 @@ static int cmr_rename(const struct lu_context *ctx, struct md_object *mo_po,
 static int cmr_rename_tgt(const struct lu_context *ctx,
                           struct md_object *mo_p, struct md_object *mo_t,
                           const struct lu_fid *lf, const char *name,
-                          struct md_attr *ma)
+                          struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         ENTRY;
         /* target object is remote one */
-        rc = mo_ref_del(ctx, md_object_next(mo_t), ma);
+        rc = mo_ref_del(ctx, md_object_next(mo_t), ma, uc);
         /* continue locally with name handling only */
         if (rc == 0)
                 rc = mdo_rename_tgt(ctx, md_object_next(mo_p),
-                                    NULL, lf, name, ma);
+                                    NULL, lf, name, ma, uc);
         RETURN(rc);
 }
 
@@ -860,5 +888,3 @@ static struct md_dir_operations cmr_dir_ops = {
         .mdo_rename      = cmr_rename,
         .mdo_rename_tgt  = cmr_rename_tgt,
 };
-
-
index 061794d..4f82634 100644 (file)
@@ -54,7 +54,9 @@ static inline struct lu_fid* cmm2_fid(struct cmm_object *obj)
 }
 
 static int cmm_expect_splitting(const struct lu_context *ctx,
-                                struct md_object *mo, struct md_attr *ma)
+                                struct md_object *mo,
+                                struct md_attr *ma,
+                                struct md_ucred *uc)
 {
         struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
         struct lu_fid *fid = NULL;
@@ -70,8 +72,7 @@ static int cmm_expect_splitting(const struct lu_context *ctx,
         if (ma->ma_lmv_size)
                 GOTO(cleanup, rc = CMM_NO_SPLIT_EXPECTED);
         OBD_ALLOC_PTR(fid);
-        rc = cmm_child_ops(cmm)->mdo_root_get(ctx, cmm->cmm_child, 
-                                              fid);
+        rc = cmm_child_ops(cmm)->mdo_root_get(ctx, cmm->cmm_child, fid, uc);
         if (rc)
                 GOTO(cleanup, rc);
 
@@ -152,7 +153,7 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx,
                                 struct cmm_device *cmm,
                                 struct lu_fid *fid, struct md_attr *ma,
                                 const struct lmv_stripe_md *lmv,
-                                int lmv_size)
+                                int lmv_size, struct md_ucred *uc)
 {
         struct cmm_object *obj;
         struct md_create_spec *spec;
@@ -170,7 +171,7 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx,
         spec->u.sp_ea.eadatalen = lmv_size;
         spec->sp_cr_flags |= MDS_CREATE_SLAVE_OBJ;
         rc = mo_object_create(ctx, md_object_next(&obj->cmo_obj),
-                              spec, ma);
+                              spec, ma, uc);
         OBD_FREE_PTR(spec);
 
         cmm_object_put(ctx, obj);
@@ -178,7 +179,8 @@ static int cmm_creat_remote_obj(const struct lu_context *ctx,
 }
 
 static int cmm_create_slave_objects(const struct lu_context *ctx,
-                                    struct md_object *mo, struct md_attr *ma)
+                                    struct md_object *mo, struct md_attr *ma,
+                                    struct md_ucred *uc)
 {
         struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
         struct lmv_stripe_md *lmv = NULL, *slave_lmv = NULL;
@@ -213,7 +215,7 @@ static int cmm_create_slave_objects(const struct lu_context *ctx,
         slave_lmv->mea_count = 0;
         for (i = 1; i < cmm->cmm_tgt_count + 1; i ++) {
                 rc = cmm_creat_remote_obj(ctx, cmm, &lmv->mea_ids[i], ma,
-                                          slave_lmv, sizeof(slave_lmv));
+                                          slave_lmv, sizeof(slave_lmv), uc);
                 if (rc)
                         GOTO(cleanup, rc);
         }
@@ -228,7 +230,8 @@ cleanup:
 
 static int cmm_send_split_pages(const struct lu_context *ctx,
                                 struct md_object *mo, struct lu_rdpg *rdpg,
-                                struct lu_fid *fid, int len)
+                                struct lu_fid *fid, int len,
+                                struct md_ucred *uc)
 {
         struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
         struct cmm_object *obj;
@@ -240,14 +243,14 @@ static int cmm_send_split_pages(const struct lu_context *ctx,
                 RETURN(PTR_ERR(obj));
 
         rc = mdc_send_page(cmm, ctx, md_object_next(&obj->cmo_obj),
-                           rdpg->rp_pages[0], len);
+                           rdpg->rp_pages[0], len, uc);
         cmm_object_put(ctx, obj);
         RETURN(rc);
 }
 
 static int cmm_remove_entries(const struct lu_context *ctx,
                               struct md_object *mo, struct lu_rdpg *rdpg,
-                              __u32 hash_end, __u32 *len)
+                              __u32 hash_end, __u32 *len, struct md_ucred *uc)
 {
         struct lu_dirpage *dp;
         struct lu_dirent  *ent;
@@ -268,7 +271,7 @@ static int cmm_remove_entries(const struct lu_context *ctx,
                                 OBD_ALLOC(name, ent->lde_namelen + 1);
                                 memcpy(name, ent->lde_name, ent->lde_namelen);
                                 rc = mdo_name_remove(ctx, md_object_next(mo),
-                                                     name);
+                                                     name, uc);
                                 OBD_FREE(name, ent->lde_namelen + 1);
                         }
                         if (rc) {
@@ -292,9 +295,9 @@ unmap:
         RETURN(rc);
 }
 
-static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
-                             struct lu_rdpg *rdpg, struct lu_fid *lf,
-                             __u32 end)
+static int cmm_split_entries(const struct lu_context *ctx,
+                             struct md_object *mo, struct lu_rdpg *rdpg,
+                             struct lu_fid *lf, __u32 end, struct md_ucred *uc)
 {
         int rc, done = 0;
         ENTRY;
@@ -310,7 +313,7 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
                 memset(kmap(rdpg->rp_pages[0]), 0, CFS_PAGE_SIZE);
                 kunmap(rdpg->rp_pages[0]);
 
-                rc = mo_readpage(ctx, md_object_next(mo), rdpg);
+                rc = mo_readpage(ctx, md_object_next(mo), rdpg, uc);
                 /* -E2BIG means it already reach the end of the dir */
                 if (rc) {
                         if (rc != -ERANGE) {
@@ -321,13 +324,13 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
                 }
                 
                 /* Remove the old entries */
-                rc = cmm_remove_entries(ctx, mo, rdpg, end, &len);
+                rc = cmm_remove_entries(ctx, mo, rdpg, end, &len, uc);
                 if (rc)
                         RETURN(rc);
 
                 /* Send page to slave object */ 
                 if (len > 0) {
-                        rc = cmm_send_split_pages(ctx, mo, rdpg, lf, len);
+                        rc = cmm_send_split_pages(ctx, mo, rdpg, lf, len, uc);
                         if (rc)
                                 RETURN(rc);
                 }
@@ -345,7 +348,8 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
 }
 #define SPLIT_PAGE_COUNT 1
 static int cmm_scan_and_split(const struct lu_context *ctx,
-                              struct md_object *mo, struct md_attr *ma)
+                              struct md_object *mo, struct md_attr *ma,
+                              struct md_ucred *uc)
 {
         struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
         __u32 hash_segement;
@@ -376,7 +380,7 @@ static int cmm_scan_and_split(const struct lu_context *ctx,
 
                 rdpg->rp_hash = i * hash_segement;
                 hash_end = rdpg->rp_hash + hash_segement;
-                rc = cmm_split_entries(ctx, mo, rdpg, lf, hash_end);
+                rc = cmm_split_entries(ctx, mo, rdpg, lf, hash_end, uc);
                 if (rc)
                         GOTO(cleanup, rc);
         }
@@ -394,7 +398,8 @@ free_rdpg:
         RETURN(rc);
 }
 
-int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo)
+int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo,
+                     struct md_ucred *uc)
 {
         struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
         struct md_attr *ma;
@@ -408,12 +413,12 @@ int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo)
                 RETURN(-ENOMEM);
 
         ma->ma_need = MA_INODE|MA_LMV;
-        rc = mo_attr_get(ctx, mo, ma);
+        rc = mo_attr_get(ctx, mo, ma, uc);
         if (rc)
                 GOTO(cleanup, ma);
 
         /* step1: checking whether the dir need to be splitted */
-        rc = cmm_expect_splitting(ctx, mo, ma);
+        rc = cmm_expect_splitting(ctx, mo, ma, uc);
         if (rc != CMM_EXPECT_SPLIT)
                 GOTO(cleanup, rc = 0);
 
@@ -425,18 +430,18 @@ int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo)
                 GOTO(cleanup, rc = 0);
 
         /* step2: create slave objects */
-        rc = cmm_create_slave_objects(ctx, mo, ma);
+        rc = cmm_create_slave_objects(ctx, mo, ma, uc);
         if (rc)
                 GOTO(cleanup, ma);
 
         /* step3: scan and split the object */
-        rc = cmm_scan_and_split(ctx, mo, ma);
+        rc = cmm_scan_and_split(ctx, mo, ma, uc);
         if (rc)
                 GOTO(cleanup, ma);
 
         /* step4: set mea to the master object */
-        rc = mo_xattr_set(ctx, md_object_next(mo), ma->ma_lmv, ma->ma_lmv_size,
-                          MDS_LMV_MD_NAME, 0);
+        rc = mo_xattr_set(ctx, md_object_next(mo), ma->ma_lmv,
+                          ma->ma_lmv_size, MDS_LMV_MD_NAME, 0, uc);
 
         if (rc == -ERESTART) 
                 CWARN("Dir"DFID" has been split \n", 
index a86f358..5483b2b 100644 (file)
@@ -35,6 +35,7 @@
 #include <lustre_net.h>
 #include <obd.h>
 #include <md_object.h>
+
 struct mdc_cli_desc {
         struct lustre_handle     cl_conn;
         /* uuid of remote MDT to connect */
@@ -96,8 +97,9 @@ struct lu_object *mdc_object_alloc(const struct lu_context *,
                                    const struct lu_object_header *,
                                    struct lu_device *);
 #ifdef HAVE_SPLIT_SUPPORT
-int mdc_send_page(struct cmm_device *cmm, const struct lu_context *ctx, 
-                  struct md_object *mo, struct page *page, __u32 end);
+int mdc_send_page(struct cmm_device *cmm, const struct lu_context *ctx,
+                  struct md_object *mo, struct page *page, __u32 end,
+                  struct md_ucred *uc);
 #endif
 
 #endif /* __KERNEL__ */
index 1c5e3b5..b1f45da 100644 (file)
@@ -203,7 +203,7 @@ static int mdc_req2attr_update(const struct lu_context *ctx,
 }
 
 static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo,
-                        struct md_attr *ma)
+                        struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         struct mdc_thread_info *mci;
@@ -232,15 +232,19 @@ static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo,
 
 
 static int mdc_object_create(const struct lu_context *ctx,
-                             struct md_object *mo, 
+                             struct md_object *mo,
                              const struct md_create_spec *spec,
-                             struct md_attr *ma)
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         struct lu_attr *la = &ma->ma_attr;
         struct mdc_thread_info *mci;
         const void *symname;
         int rc, symlen;
+        uid_t uid;
+        gid_t gid;
+        __u32 cap;
         ENTRY;
 
         LASSERT(spec->u.sp_pfid != NULL);
@@ -249,6 +253,16 @@ static int mdc_object_create(const struct lu_context *ctx,
         /* parent fid is needed to create dotdot on the remote node */
         mci->mci_opdata.fid1 = *(spec->u.sp_pfid);
         mci->mci_opdata.mod_time = la->la_mtime;
+        if (uc &&
+            ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) {
+                uid = uc->mu_fsuid;
+                gid = uc->mu_fsgid;
+                cap = uc->mu_cap;
+        } else {
+                uid = la->la_uid;
+                gid = la->la_gid;
+                cap = 0;
+        }
 
         /* get data from spec */
         if (spec->sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
@@ -263,7 +277,7 @@ static int mdc_object_create(const struct lu_context *ctx,
         
         rc = md_create(mc->mc_desc.cl_exp, &mci->mci_opdata,
                        symname, symlen,
-                       la->la_mode, la->la_uid, la->la_gid, 0, la->la_rdev,
+                       la->la_mode, uid, gid, cap, la->la_rdev,
                        &mci->mci_req);
 
         if (rc == 0) {
@@ -276,7 +290,8 @@ static int mdc_object_create(const struct lu_context *ctx,
         RETURN(rc);
 }
 
-static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo)
+static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo,
+                       struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         struct mdc_thread_info *mci;
@@ -291,6 +306,18 @@ static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo)
         //mci->mci_opdata.mod_time = la->la_ctime;
         //mci->mci_opdata.fsuid = la->la_uid;
         //mci->mci_opdata.fsgid = la->la_gid;
+        mci->mci_opdata.mod_time = CURRENT_SECONDS;
+        if (uc &&
+            ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) {
+                mci->mci_opdata.fsuid = uc->mu_fsuid;
+                mci->mci_opdata.fsgid = uc->mu_fsgid;
+                mci->mci_opdata.cap = uc->mu_cap;
+        } else {
+                mci->mci_opdata.fsuid = current->fsuid;
+                mci->mci_opdata.fsgid = current->fsgid;
+                mci->mci_opdata.cap = current->cap_effective;
+        }
+
 
         rc = md_link(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req);
 
@@ -300,7 +327,7 @@ static int mdc_ref_add(const struct lu_context *ctx, struct md_object *mo)
 }
 
 static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo,
-                       struct md_attr *ma)
+                       struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         struct lu_attr *la = &ma->ma_attr;
@@ -312,8 +339,17 @@ static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo,
         mci->mci_opdata.fid1 = *lu_object_fid(&mo->mo_lu);
         mci->mci_opdata.create_mode = la->la_mode;
         mci->mci_opdata.mod_time = la->la_ctime;
-        mci->mci_opdata.fsuid = la->la_uid;
-        mci->mci_opdata.fsgid = la->la_gid;
+        if (uc &&
+            ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) {
+                mci->mci_opdata.fsuid = uc->mu_fsuid;
+                mci->mci_opdata.fsgid = uc->mu_fsgid;
+                mci->mci_opdata.cap = uc->mu_cap;
+        } else {
+                mci->mci_opdata.fsuid = la->la_uid;
+                mci->mci_opdata.fsgid = la->la_gid;
+                mci->mci_opdata.cap = current->cap_effective;
+        }
+
         rc = md_unlink(mc->mc_desc.cl_exp, &mci->mci_opdata, &mci->mci_req);
         if (rc == 0) {
                 /* get attr from request */
@@ -327,7 +363,8 @@ static int mdc_ref_del(const struct lu_context *ctx, struct md_object *mo,
 
 #ifdef HAVE_SPLIT_SUPPORT
 int mdc_send_page(struct cmm_device *cm, const struct lu_context *ctx,
-                  struct md_object *mo, struct page *page, __u32 offset)
+                  struct md_object *mo, struct page *page, __u32 offset,
+                  struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         int rc;
@@ -349,10 +386,10 @@ static struct md_object_operations mdc_mo_ops = {
 };
 
 /* md_dir_operations */
-static int mdc_rename_tgt(const struct lu_context *ctx,
-                          struct md_object *mo_p, struct md_object *mo_t,
-                          const struct lu_fid *lf, const char *name,
-                          struct md_attr *ma)
+static int mdc_rename_tgt(const struct lu_context *ctx, struct md_object *mo_p,
+                          struct md_object *mo_t, const struct lu_fid *lf,
+                          const char *name, struct md_attr *ma,
+                          struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo_p));
         struct lu_attr *la = &ma->ma_attr;
@@ -365,8 +402,16 @@ static int mdc_rename_tgt(const struct lu_context *ctx,
         mci->mci_opdata.fid2 = *lf;
         mci->mci_opdata.create_mode = la->la_mode;
         mci->mci_opdata.mod_time = la->la_ctime;
-        mci->mci_opdata.fsuid = la->la_uid;
-        mci->mci_opdata.fsgid = la->la_gid;
+        if (uc &&
+            ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))) {
+                mci->mci_opdata.fsuid = uc->mu_fsuid;
+                mci->mci_opdata.fsgid = uc->mu_fsgid;
+                mci->mci_opdata.cap = uc->mu_cap;
+        } else {
+                mci->mci_opdata.fsuid = la->la_uid;
+                mci->mci_opdata.fsgid = la->la_gid;
+                mci->mci_opdata.cap = current->cap_effective;
+        }
 
         rc = md_rename(mc->mc_desc.cl_exp, &mci->mci_opdata, NULL, 0,
                        name, strlen(name), &mci->mci_req);
@@ -381,7 +426,8 @@ static int mdc_rename_tgt(const struct lu_context *ctx,
 }
 
 static int mdc_is_subdir(const struct lu_context *ctx, struct md_object *mo,
-                         const struct lu_fid *fid, struct lu_fid *sfid)
+                         const struct lu_fid *fid, struct lu_fid *sfid,
+                         struct md_ucred *uc)
 {
         struct mdc_device *mc = md2mdc_dev(md_obj2dev(mo));
         struct mdc_thread_info *mci;
@@ -419,4 +465,3 @@ static struct md_dir_operations mdc_dir_ops = {
         .mdo_is_subdir   = mdc_is_subdir,
         .mdo_rename_tgt  = mdc_rename_tgt
 };
-
index cf341fb..11ca3a3 100644 (file)
@@ -41,13 +41,14 @@ struct group_info { /* unused */ };
 /* simple.c */
 
 struct lvfs_ucred {
-        struct upcall_cache_entry *luc_uce;
-        __u32 luc_fsuid;
-        __u32 luc_fsgid;
-        __u32 luc_cap;
-        __u32 luc_suppgid1;
-        __u32 luc_suppgid2;
-        __u32 luc_umask;
+        __u32                   luc_uid;
+        __u32                   luc_gid;
+        __u32                   luc_fsuid;
+        __u32                   luc_fsgid;
+        __u32                   luc_cap;
+        __u32                   luc_umask;
+       struct group_info      *luc_ginfo;
+       struct mdt_identity    *luc_identity;
 };
 
 struct lvfs_callback_ops {
index e4730f8..1cc81d1 100644 (file)
@@ -61,4 +61,6 @@ extern int llapi_quotacheck(char *mnt, int check_type);
 extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
 extern int llapi_quotactl(char *mnt, struct if_quotactl *qctl);
 extern int llapi_target_iterate(int type_num, char **obd_type, void *args, llapi_cb_t cb);
+extern int llapi_getfacl(char *fname, char *cmd);
+extern int llapi_setfacl(char *fname, char *cmd);
 #endif
index eb7a1d7..8cc1454 100644 (file)
@@ -449,7 +449,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define MDT_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
                                 OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
-                                OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID)
+                                OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | \
+                                OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
@@ -605,6 +606,7 @@ struct md_op_data {
         __u32                 suppgids[2];
         __u32                 fsuid;
         __u32                 fsgid;
+        __u32                 cap;
 
         /* iattr fields and blocks. */
         struct iattr          attr;
@@ -657,6 +659,9 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
 #define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
 #define XATTR_NAME_LOV          "trusted.lov"
 
+/* remote ACL */
+#define XATTR_NAME_LUSTRE_ACL   "system.lustre_acl"
+
 #define OBD_MD_FLID        (0x00000001ULL) /* object ID */
 #define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
 #define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
@@ -698,6 +703,7 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
 #define OBD_MD_FLXATTRLS   (0x0000002000000000ULL) /* xattr list */
 #define OBD_MD_FLXATTRRM   (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL       (0x0000008000000000ULL) /* ACL */
+#define OBD_MD_FLRMTPERM   (0x0000010000000000ULL) /* remote permission */
 
 #define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
                           OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
@@ -1022,6 +1028,7 @@ struct lustre_md {
 #ifdef CONFIG_FS_POSIX_ACL
         struct posix_acl        *posix_acl;
 #endif
+        struct mdt_remote_perm   *remote_perm;
 };
 
 #define Q_QUOTACHECK    0x800100
@@ -1046,6 +1053,33 @@ struct obd_quotactl {
 
 extern void lustre_swab_obd_quotactl(struct obd_quotactl *q);
 
+/* inode access permission for remote user, the inode info are omitted,
+ * for client knows them. */
+struct mds_remote_perm {
+        __u32           rp_uid;
+        __u32           rp_gid;
+        __u32           rp_fsuid;
+        __u32           rp_fsgid;
+        __u32           rp_access_perm; /* MAY_READ/WRITE/EXEC */
+};
+
+/* setxid permissions for mds_setxid_perm.mp_perm */
+#define LUSTRE_SETUID_PERM 0x01
+#define LUSTRE_SETGID_PERM 0x02
+#define LUSTRE_SETGRP_PERM 0x04
+
+extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p);
+
+struct mdt_remote_perm {
+        __u32           rp_uid;
+        __u32           rp_gid;
+        __u32           rp_fsuid;
+        __u32           rp_fsgid;
+        __u32           rp_access_perm; /* MAY_READ/WRITE/EXEC */
+};
+
+extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p);
+
 struct mds_rec_setattr {
         __u32           sa_opcode;
         __u32           sa_fsuid;
@@ -1104,6 +1138,8 @@ extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
 #define FMODE_SOM                04000000
 #define FMODE_CLOSED             0
 
+#define MDS_OPEN_CREATED         00000010
+
 #define MDS_FMODE_EXEC           00000004
 #define MDS_OPEN_CREAT           00000100
 #define MDS_OPEN_EXCL            00000200
index 7c76d9d..158be62 100644 (file)
@@ -59,6 +59,8 @@ struct obd_statfs;
 #define IOC_OBD_STATFS                  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO                 _IOWR('f', 165, struct lov_user_mds_data *)
 #define LL_IOC_FLUSHCTX                 _IOW ('f', 166, long)
+#define LL_IOC_GETFACL                  _IOWR('f', 167, struct rmtacl_ioctl_data *)
+#define LL_IOC_SETFACL                  _IOWR('f', 168, struct rmtacl_ioctl_data *)
 
 #define LL_STATFS_MDC           1
 #define LL_STATFS_LOV           2
@@ -74,8 +76,8 @@ struct obd_statfs;
 #define IOC_MDC_GETSTRIPE       IOC_MDC_GETFILESTRIPE
 
 #define O_LOV_DELAY_CREATE 0100000000  /* hopefully this does not conflict */
-#define O_JOIN_FILE        0400000000  /* hopefully this does not conflict */
 #define O_CHECK_STALE      0200000000  /* hopefully this does not conflict */
+#define O_JOIN_FILE        0400000000  /* hopefully this does not conflict */
 
 #define LL_FILE_IGNORE_LOCK             0x00000001
 #define LL_FILE_GROUP_LOCKED            0x00000002
@@ -177,15 +179,36 @@ struct if_quotacheck {
         struct obd_uuid         obd_uuid;
 };
 
-#define MDS_GRP_DOWNCALL_MAGIC 0x6d6dd620
+#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd620
+
+/* setxid permission */
+#define N_SETXID_PERMS_MAX      64
+
+struct setxid_perm_downcall_data {
+        __u64 pdd_nid;
+        __u32 pdd_perm;
+};
+
+struct identity_downcall_data {
+        __u32                            idd_magic;
+        __u32                            idd_err;
+        __u32                            idd_uid;
+        __u32                            idd_gid;
+        __u32                            idd_nperms;
+        struct setxid_perm_downcall_data idd_perms[N_SETXID_PERMS_MAX];
+        __u32                            idd_ngroups;
+        __u32                            idd_groups[0];
+};
+
+#define RMTACL_DOWNCALL_MAGIC 0x6d6dd620
+#define RMTACL_SIZE_MAX     (4096)
 
-struct mds_grp_downcall_data {
-        __u32           mgd_magic;
-        __u32           mgd_err;
-        __u32           mgd_uid;
-        __u32           mgd_gid;
-        __u32           mgd_ngroups;
-        __u32           mgd_groups[0];
+struct rmtacl_downcall_data {
+        __u32           add_magic;
+        __u32           add_handle;
+        __u64           add_ino;
+        __u32           add_buflen;
+        __u8            add_buf[0];
 };
 
 #ifdef NEED_QUOTA_DEFS
@@ -253,4 +276,12 @@ struct if_quotactl {
 # define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
 #endif
 
+/* remote acl ioctl */
+struct rmtacl_ioctl_data {
+        char           *cmd;            /* IN */
+        unsigned long   cmd_len;
+        char           *res;            /* OUT */
+        unsigned long   res_len;
+};
+
 #endif /* _LUSTRE_USER_H */
index f916bc7..13fa4e5 100644 (file)
@@ -256,4 +256,9 @@ static inline int lustre_cfg_sanity_check(void *buf, int len)
         RETURN(0);
 }
 
+/* default value for nllu/nllg for llite */
+#define NOBODY_UID      99
+#define NOBODY_GID      99
+#define INVALID_UID     (-1)
+
 #endif // _LUSTRE_CFG_H
index 73fb1d8..e445bf4 100644 (file)
@@ -141,6 +141,8 @@ struct lustre_mount_data {
         char      *lmd_profile;       /* client only */
         char      *lmd_sec_mdt;       /* sec from mdt (to ost/mdt) */
         char      *lmd_sec_cli;       /* sec from client (to ost/mdt) */
+        uid_t      lmd_nllu;          /* non-lustre-local-user id */
+        gid_t      lmd_nllg;          /* non-lustre-local-group id */
         char      *lmd_opts;          /* lustre mount options (as opposed to 
                                          _device_ mount options) */
         __u32     *lmd_exclude;       /* array of OSTs to ignore */
index 5ee5a81..b5625c1 100644 (file)
@@ -432,6 +432,7 @@ int ldlm_handle_convert(struct ptlrpc_request *req);
 int ldlm_handle_cancel(struct ptlrpc_request *req);
 int ldlm_del_waiting_lock(struct ldlm_lock *lock);
 int ldlm_refresh_waiting_lock(struct ldlm_lock *lock);
+void ldlm_revoke_export_locks(struct obd_export *exp);
 int ldlm_get_ref(void);
 void ldlm_put_ref(int force);
 
index ca5cabb..896d3e8 100644 (file)
@@ -11,6 +11,8 @@
 /* Data stored per client in the last_rcvd file.  In le32 order. */
 struct mds_client_data;
 struct mdt_client_data;
+struct mds_idmap_table;
+struct mdt_idmap_table;
 
 struct mds_export_data {
         struct list_head        med_open_head;
@@ -19,6 +21,10 @@ struct mds_export_data {
         __u64                   med_ibits_known;
         loff_t                  med_lr_off;
         int                     med_lr_idx;
+        unsigned int            med_rmtclient:1; /* remote client? */
+        __u32                   med_nllu;
+        __u32                   med_nllg;
+        struct mds_idmap_table *med_idmap;
 };
 
 struct mdt_export_data {
@@ -29,7 +35,12 @@ struct mdt_export_data {
         __u64                   med_ibits_known;
         loff_t                  med_lr_off;
         int                     med_lr_idx;
+        unsigned int            med_rmtclient:1; /* remote client? */
+        __u32                   med_nllu;
+        __u32                   med_nllg;
+        struct mdt_idmap_table *med_idmap;
 };
+
 struct osc_creator {
         spinlock_t              oscc_lock;
         struct list_head        oscc_list;
index b9a6903..8e865ca 100644 (file)
@@ -449,8 +449,12 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_SET_READONLY           _IOW ('f', 141, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_ABORT_RECOVERY         _IOR ('f', 142, OBD_IOC_DATA_TYPE)
 
+#define OBD_IOC_ROOT_SQUASH            _IOWR('f', 143, OBD_IOC_DATA_TYPE)
+
 #define OBD_GET_VERSION                _IOWR ('f', 144, OBD_IOC_DATA_TYPE)
 
+#define OBD_IOC_GSS_SUPPORT            _IOWR('f', 145, OBD_IOC_DATA_TYPE)
+
 #define OBD_IOC_CLOSE_UUID             _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_GETDEVICE              _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
index 732d3a4..911c8b4 100644 (file)
@@ -40,4 +40,26 @@ struct com_thread_info {
         struct req_capsule cti_pill;
 };
 
+/* id map */
+#define MDT_IDMAP_HASHSIZE      (32)
+#define MDT_IDMAP_HASHFUNC(id)  ((id) & (MDT_IDMAP_HASHSIZE - 1))
+
+enum mdt_idmap_idx {
+        RMT_UIDMAP_IDX,
+        LCL_UIDMAP_IDX,
+        RMT_GIDMAP_IDX,
+        LCL_GIDMAP_IDX,
+        MDT_IDMAP_N_HASHES
+};
+
+struct mdt_idmap_table {
+        spinlock_t       mit_lock;
+        struct list_head mit_idmaps[MDT_IDMAP_N_HASHES]
+                                   [MDT_IDMAP_HASHSIZE];
+};
+
+/* remote perm */
+extern int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+                               struct ptlrpc_request **request);
+
 #endif
index 8bf199e..3c65717 100644 (file)
@@ -363,7 +363,7 @@ struct ptlrpc_request {
                                  rq_auth_gss:1,      /* authenticated by gss */
                                  rq_auth_remote:1,   /* authed as remote user */
                                  rq_auth_usr_root:1, /* authed as root */
-                                 rq_auth_usr_mds:1;  /* authed as mds */
+                                 rq_auth_usr_mdt:1;  /* authed as mdt */
 
         uid_t                    rq_auth_uid;        /* authed uid */
         uid_t                    rq_auth_mapped_uid; /* authed uid mapped to */
index 95f8a73..c95744f 100644 (file)
@@ -52,5 +52,10 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 #define PARAM_SEC_RPC              PARAM_SEC"rpc."
 #define PARAM_SEC_RPC_MDT          PARAM_SEC_RPC"mdt="
 #define PARAM_SEC_RPC_CLI          PARAM_SEC_RPC"cli="
+#define PARAM_ROOTSQUASH           PARAM_SEC"rootsquash."
+#define PARAM_ROOTSQUASH_UID       PARAM_ROOTSQUASH"uid="
+#define PARAM_ROOTSQUASH_GID       PARAM_ROOTSQUASH"gid="
+#define PARAM_ROOTSQUASH_SKIPS     PARAM_ROOTSQUASH"skips="
+#define PARAM_GSS_SUPPORT          PARAM_SEC"gss="
 
 #endif // _LUSTRE_PARAM_H
index 16b5c1a..5e33e88 100644 (file)
@@ -5,6 +5,12 @@
 #ifndef _UPCALL_CACHE_H
 #define _UPCALL_CACHE_H
 
+#ifdef __KERNEL__
+#include <linux/sched.h>
+#else
+struct group_info {};
+#endif
+
 #define UC_CACHE_NEW            0x01
 #define UC_CACHE_ACQUIRING      0x02
 #define UC_CACHE_INVALID        0x04
 #define UC_CACHE_CLEAR_INVALID(i)   (i)->ue_flags &= ~UC_CACHE_INVALID
 #define UC_CACHE_CLEAR_EXPIRED(i)   (i)->ue_flags &= ~UC_CACHE_EXPIRED
 
+struct upcall_cache_entry;
+
+struct mdt_setxid_perm {
+        lnet_nid_t      mp_nid;
+        __u32           mp_perm;
+};
+
+struct mdt_identity {
+        struct upcall_cache_entry *mi_uc_entry;
+        uid_t                      mi_uid;
+        gid_t                      mi_gid;
+        struct group_info         *mi_ginfo;
+        int                        mi_nperms;
+        struct mdt_setxid_perm    *mi_perms;
+};
+
+struct rmtacl_upcall_data {
+        char                      *aud_cmd;
+};
+
+struct mdt_rmtacl {
+        unsigned long              ra_ino;
+        __u32                      ra_handle;
+        char                      *ra_cmd;
+        char                      *ra_buf;
+};
+
 struct upcall_cache_entry {
         struct list_head        ue_hash;
         __u64                   ue_key;
-        __u64                   ue_primary;
-        struct group_info      *ue_group_info;
+//        __u64                   ue_primary;
+//        struct group_info      *ue_group_info;
         atomic_t                ue_refcount;
         int                     ue_flags;
         cfs_waitq_t             ue_waitq;
         cfs_time_t              ue_acquire_expire;
         cfs_time_t              ue_expire;
+        union {
+                struct mdt_identity     identity;
+                struct mdt_rmtacl       acl;
+        } u;
 };
 
 #define UC_CACHE_HASH_SIZE        (128)
 #define UC_CACHE_HASH_INDEX(id)   ((id) & (UC_CACHE_HASH_SIZE - 1))
 #define UC_CACHE_UPCALL_MAXPATH   (1024UL)
 
+struct upcall_cache;
+
+struct upcall_cache_ops {
+        void            (*init_entry)(struct upcall_cache_entry *, void *args);
+        void            (*free_entry)(struct upcall_cache *,
+                                      struct upcall_cache_entry *);
+        int             (*upcall_compare)(struct upcall_cache *,
+                                          struct upcall_cache_entry *,
+                                          __u64 key, void *args);
+        int             (*downcall_compare)(struct upcall_cache *,
+                                            struct upcall_cache_entry *,
+                                            __u64 key, void *args);
+        int             (*do_upcall)(struct upcall_cache *,
+                                     struct upcall_cache_entry *);
+        int             (*parse_downcall)(struct upcall_cache *,
+                                          struct upcall_cache_entry *, void *);
+};
+
 struct upcall_cache {
         struct list_head        uc_hashtable[UC_CACHE_HASH_SIZE];
         spinlock_t              uc_lock;
@@ -51,8 +106,23 @@ struct upcall_cache {
         char                    uc_upcall[UC_CACHE_UPCALL_MAXPATH];
         cfs_time_t              uc_acquire_expire;      /* jiffies */
         cfs_time_t              uc_entry_expire;        /* jiffies */
+        struct upcall_cache_ops *uc_ops;
 };
 
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
+                                                  __u64 key, void *args);
+void upcall_cache_put_entry(struct upcall_cache *cache,
+                            struct upcall_cache_entry *entry);
+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
+                          void *args);
+void upcall_cache_flush_idle(struct upcall_cache *cache);
+void upcall_cache_flush_all(struct upcall_cache *cache);
+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args);
+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
+                                       struct upcall_cache_ops *ops);
+void upcall_cache_cleanup(struct upcall_cache *cache);
+
+#if 0
 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
                                                   __u64 key, __u32 primary,
                                                   __u32 ngroups, __u32 *groups);
@@ -65,4 +135,5 @@ void upcall_cache_flush_all(struct upcall_cache *cache);
 struct upcall_cache *upcall_cache_init(const char *name);
 void upcall_cache_cleanup(struct upcall_cache *hash);
 
+#endif
 #endif /* _UPCALL_CACHE_H */
index c2a2c4d..d049892 100644 (file)
  * super-class definitions.
  */
 #include <lu_object.h>
+#include <lvfs.h>
 
 struct md_device;
 struct md_device_operations;
 struct md_object;
 
 
+typedef enum {
+        UCRED_INVALID   = -1,
+        UCRED_INIT      = 0,
+        UCRED_OLD       = 1,
+        UCRED_NEW       = 2,
+} ucred_t;
+
+struct md_ucred {
+        ucred_t                 mu_valid;
+        __u32                   mu_o_uid;
+        __u32                   mu_o_gid;
+        __u32                   mu_o_fsuid;
+        __u32                   mu_o_fsgid;
+        __u32                   mu_uid;
+        __u32                   mu_gid;
+        __u32                   mu_fsuid;
+        __u32                   mu_fsgid;
+        __u32                   mu_cap;
+        __u32                   mu_umask;
+       struct group_info      *mu_ginfo;
+       struct mdt_identity    *mu_identity;
+};
+
 /* metadata attributes */
 enum ma_valid {
         MA_INODE = (1 << 0),
@@ -92,96 +116,179 @@ struct md_create_spec {
  * Operations implemented for each md object (both directory and leaf).
  */
 struct md_object_operations {
-        int (*moo_attr_get)(const struct lu_context *ctxt, struct md_object *obj,
-                            struct md_attr *attr);
+        int (*moo_permission)(const struct lu_context *ctxt,
+                              struct md_object *obj,
+                              int mask,
+                              struct md_ucred *uc);
+
+        int (*moo_attr_get)(const struct lu_context *ctxt,
+                            struct md_object *obj,
+                            struct md_attr *attr,
+                            struct md_ucred *uc);
 
-        int (*moo_attr_set)(const struct lu_context *ctxt, struct md_object *obj,
-                            const struct md_attr *attr);
+        int (*moo_attr_set)(const struct lu_context *ctxt,
+                            struct md_object *obj,
+                            const struct md_attr *attr,
+                            struct md_ucred *uc);
 
         int (*moo_xattr_get)(const struct lu_context *ctxt,
                              struct md_object *obj,
-                             void *buf, int buf_len, const char *name);
+                             void *buf,
+                             int buf_len,
+                             const char *name,
+                             struct md_ucred *uc);
 
         int (*moo_xattr_list)(const struct lu_context *ctxt,
                               struct md_object *obj,
-                              void *buf, int buf_len);
+                              void *buf,
+                              int buf_len,
+                              struct md_ucred *uc);
 
         int (*moo_xattr_set)(const struct lu_context *ctxt,
-                             struct md_object *obj, const void *buf,
-                             int buf_len, const char *name, int fl);
+                             struct md_object *obj,
+                             const void *buf,
+                             int buf_len,
+                             const char *name,
+                             int fl,
+                             struct md_ucred *uc);
+
         int (*moo_xattr_del)(const struct lu_context *ctxt,
-                             struct md_object *obj, const char *name);
+                             struct md_object *obj,
+                             const char *name,
+                             struct md_ucred *uc);
 
-        int (*moo_readpage)(const struct lu_context *, struct md_object *,
-                            const struct lu_rdpg *);
+        int (*moo_readpage)(const struct lu_context *ctxt,
+                            struct md_object *obj,
+                            const struct lu_rdpg *rdpg,
+                            struct md_ucred *uc);
 
         int (*moo_readlink)(const struct lu_context *ctxt,
                             struct md_object *obj,
-                            void *buf, int buf_len);
+                            void *buf,
+                            int buf_len,
+                            struct md_ucred *uc);
 
         /* part of cross-ref operation */
-        int (*moo_object_create)(const struct lu_context *,
-                                 struct md_object *,
+        int (*moo_object_create)(const struct lu_context *ctxt,
+                                 struct md_object *obj,
                                  const struct md_create_spec *spec,
-                                 struct md_attr *);
-        int (*moo_ref_add)(const struct lu_context *, struct md_object *);
-        int (*moo_ref_del)(const struct lu_context *, struct md_object *,
-                           struct md_attr *);
-        int (*moo_open)(const struct lu_context *, struct md_object *, int flags);
-        int (*moo_close)(const struct lu_context *, struct md_object *,
-                         struct md_attr *);
+                                 struct md_attr *ma,
+                                 struct md_ucred *uc);
+
+        int (*moo_ref_add)(const struct lu_context * ctxt,
+                           struct md_object *obj,
+                           struct md_ucred *uc);
+
+        int (*moo_ref_del)(const struct lu_context *ctxt,
+                           struct md_object *obj,
+                           struct md_attr *ma,
+                           struct md_ucred *uc);
+
+        int (*moo_open)(const struct lu_context *ctxt,
+                        struct md_object *obj,
+                        int flag,
+                        struct md_ucred *uc);
+
+        int (*moo_close)(const struct lu_context *ctxt,
+                         struct md_object *obj,
+                         struct md_attr *ma,
+                         struct md_ucred *uc);
 };
 
 /*
  * Operations implemented for each directory object.
  */
 struct md_dir_operations {
-        int (*mdo_is_subdir) (const struct lu_context *, struct md_object *,
-                              const struct lu_fid *, struct lu_fid *);
+        int (*mdo_is_subdir) (const struct lu_context *ctxt,
+                              struct md_object *obj,
+                              const struct lu_fid *fid,
+                              struct lu_fid *sfid,
+                              struct md_ucred *uc);
         
-        int (*mdo_lookup)(const struct lu_context *, struct md_object *,
-                          const char *, struct lu_fid *);
-
-        int (*mdo_create)(const struct lu_context *, struct md_object *,
-                          const char *child_name, struct md_object *,
+        int (*mdo_lookup)(const struct lu_context *ctxt,
+                          struct md_object *obj,
+                          const char *name,
+                          struct lu_fid *fid,
+                          struct md_ucred *uc);
+
+        int (*mdo_create)(const struct lu_context *ctxt,
+                          struct md_object *pobj,
+                          const char *name,
+                          struct md_object *child,
                           const struct md_create_spec *spec,
-                          struct md_attr *);
+                          struct md_attr *ma,
+                          struct md_ucred *uc);
+
         /* This method is used for creating data object for this meta object*/
-        int (*mdo_create_data)(const struct lu_context *cx, struct md_object *p,
+        int (*mdo_create_data)(const struct lu_context *ctxt,
+                               struct md_object *p,
                                struct md_object *o,
                                const struct md_create_spec *spec,
-                               struct md_attr *ma);
-        int (*mdo_rename)(const struct lu_context *ctxt,
-                          struct md_object *spobj, struct md_object *tpobj,
-                          const struct lu_fid *lf, const char *sname,
-                          struct md_object *tobj, const char *tname,
-                          struct md_attr *);
-
-        int (*mdo_link)(const struct lu_context *, struct md_object *,
-                        struct md_object *, const char *, struct md_attr *);
+                               struct md_attr *ma,
+                               struct md_ucred *uc);
 
-        int (*mdo_unlink)(const struct lu_context *, struct md_object *,
-                          struct md_object *, const char *, struct md_attr *);
+        int (*mdo_rename)(const struct lu_context *ctxt,
+                          struct md_object *spobj,
+                          struct md_object *tpobj,
+                          const struct lu_fid *lf,
+                          const char *sname,
+                          struct md_object *tobj,
+                          const char *tname,
+                          struct md_attr *ma,
+                          struct md_ucred *uc);
+
+        int (*mdo_link)(const struct lu_context *ctxt,
+                        struct md_object *tgt_obj,
+                        struct md_object *src_obj,
+                        const char *name,
+                        struct md_attr *ma,
+                        struct md_ucred *uc);
+
+        int (*mdo_unlink)(const struct lu_context *ctxt,
+                          struct md_object *pobj,
+                          struct md_object *cobj,
+                          const char *name,
+                          struct md_attr *ma,
+                          struct md_ucred *uc);
 
         /* partial ops for cross-ref case */
-        int (*mdo_name_insert)(const struct lu_context *, struct md_object *,
-                               const char *, const struct lu_fid *, int);
-        int (*mdo_name_remove)(const struct lu_context *, struct md_object *,
-                               const char *);
-        int (*mdo_rename_tgt)(const struct lu_context *, struct md_object *,
-                              struct md_object *, const struct lu_fid *,
-                              const char *, struct md_attr *);
+        int (*mdo_name_insert)(const struct lu_context *ctxt,
+                               struct md_object *obj,
+                               const char *name,
+                               const struct lu_fid *fid,
+                               int isdir,
+                               struct md_ucred *uc);
+
+        int (*mdo_name_remove)(const struct lu_context *ctxt,
+                               struct md_object *obj, const char *name,
+                               struct md_ucred *uc);
+
+        int (*mdo_rename_tgt)(const struct lu_context *ctxt,
+                              struct md_object *pobj,
+                              struct md_object *tobj,
+                              const struct lu_fid *fid,
+                              const char *name,
+                              struct md_attr *ma,
+                              struct md_ucred *uc);
 };
 
 struct md_device_operations {
         /* meta-data device related handlers. */
         int (*mdo_root_get)(const struct lu_context *ctx,
-                            struct md_device *m, struct lu_fid *f);
+                            struct md_device *m,
+                            struct lu_fid *f,
+                            struct md_ucred *uc);
+
         int (*mdo_maxsize_get)(const struct lu_context *ctx,
-                               struct md_device *m, int *md_size,
-                               int *cookie_size);
+                               struct md_device *m,
+                               int *md_size,
+                               int *cookie_size,
+                               struct md_ucred *uc);
+
         int (*mdo_statfs)(const struct lu_context *ctx,
-                          struct md_device *m, struct kstatfs *sfs);
+                          struct md_device *m,
+                          struct kstatfs *sfs,
+                          struct md_ucred *uc);
 };
 
 enum md_upcall_event {
@@ -252,187 +359,255 @@ static inline void md_device_fini(struct md_device *md)
 }
 
 /* md operations */
-static inline int mo_attr_get(const struct lu_context *cx, struct md_object *m,
-                              struct md_attr *at)
+static inline int mo_permission(const struct lu_context *cx,
+                                struct md_object *m,
+                                int mask,
+                                struct md_ucred *uc)
 {
-        LASSERT(m->mo_ops->moo_attr_get);
-        return m->mo_ops->moo_attr_get(cx, m, at);
+        LASSERT(m->mo_ops->moo_permission);
+        return m->mo_ops->moo_permission(cx, m, mask, uc);
 }
 
-static inline int mo_readlink(const struct lu_context *cx, struct md_object *m,
-                              void *buf, int buf_len)
+static inline int mo_attr_get(const struct lu_context *cx,
+                              struct md_object *m,
+                              struct md_attr *at,
+                              struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_attr_get);
-        return m->mo_ops->moo_readlink(cx, m, buf, buf_len);
+        return m->mo_ops->moo_attr_get(cx, m, at, uc);
+}
+
+static inline int mo_readlink(const struct lu_context *cx,
+                              struct md_object *m,
+                              void *buf,
+                              int buf_len,
+                              struct md_ucred *uc)
+{
+        LASSERT(m->mo_ops->moo_readlink);
+        return m->mo_ops->moo_readlink(cx, m, buf, buf_len, uc);
 }
 
-static inline int mo_attr_set(const struct lu_context *cx, struct md_object *m,
-                              const struct md_attr *at)
+static inline int mo_attr_set(const struct lu_context *cx,
+                              struct md_object *m,
+                              const struct md_attr *at,
+                              struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_attr_set);
-        return m->mo_ops->moo_attr_set(cx, m, at);
+        return m->mo_ops->moo_attr_set(cx, m, at, uc);
 }
 
 static inline int mo_xattr_get(const struct lu_context *cx,
                                struct md_object *m,
-                               void *buf, int buf_len, const char *name)
+                               void *buf,
+                               int buf_len,
+                               const char *name,
+                               struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_xattr_get);
-        return m->mo_ops->moo_xattr_get(cx, m, buf, buf_len, name);
+        return m->mo_ops->moo_xattr_get(cx, m, buf, buf_len, name, uc);
 }
 
 static inline int mo_xattr_del(const struct lu_context *cx,
                                struct md_object *m,
-                               const char *name)
+                               const char *name,
+                               struct md_ucred *uc)
 {
-        LASSERT(m->mo_ops->moo_xattr_set);
-        return m->mo_ops->moo_xattr_del(cx, m, name);
+        LASSERT(m->mo_ops->moo_xattr_del);
+        return m->mo_ops->moo_xattr_del(cx, m, name, uc);
 }
 
 static inline int mo_xattr_set(const struct lu_context *cx,
-                               struct md_object *m, const void *buf,
-                               int buf_len, const char *name, int flags)
+                               struct md_object *m,
+                               const void *buf,
+                               int buf_len,
+                               const char *name,
+                               int flags,
+                               struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_xattr_set);
-        return m->mo_ops->moo_xattr_set(cx, m, buf, buf_len, name, flags);
+        return m->mo_ops->moo_xattr_set(cx, m, buf, buf_len, name, flags, uc);
 }
 
 static inline int mo_xattr_list(const struct lu_context *cx,
-                               struct md_object *m,
-                               void *buf, int buf_len)
+                                struct md_object *m,
+                                void *buf,
+                                int buf_len,
+                                struct md_ucred *uc)
 {
-        LASSERT(m->mo_ops->moo_xattr_get);
-        return m->mo_ops->moo_xattr_list(cx, m, buf, buf_len);
+        LASSERT(m->mo_ops->moo_xattr_list);
+        return m->mo_ops->moo_xattr_list(cx, m, buf, buf_len, uc);
 }
 
-static inline int mo_open(const struct lu_context *cx, struct md_object *m,
-                          int flags)
+static inline int mo_open(const struct lu_context *cx,
+                          struct md_object *m,
+                          int flags,
+                          struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_open);
-        return m->mo_ops->moo_open(cx, m, flags);
+        return m->mo_ops->moo_open(cx, m, flags, uc);
 }
 
-static inline int mo_close(const struct lu_context *cx, struct md_object *m,
-                           struct md_attr *ma)
+static inline int mo_close(const struct lu_context *cx,
+                           struct md_object *m,
+                           struct md_attr *ma,
+                           struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_close);
-        return m->mo_ops->moo_close(cx, m, ma);
+        return m->mo_ops->moo_close(cx, m, ma, uc);
 }
 
-static inline int mo_readpage(const struct lu_context *cx, struct md_object *m,
-                              const struct lu_rdpg *rdpg)
+static inline int mo_readpage(const struct lu_context *cx,
+                              struct md_object *m,
+                              const struct lu_rdpg *rdpg,
+                              struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_readpage);
-        return m->mo_ops->moo_readpage(cx, m, rdpg);
+        return m->mo_ops->moo_readpage(cx, m, rdpg, uc);
 }
 
 static inline int mo_object_create(const struct lu_context *cx,
                                    struct md_object *m,
                                    const struct md_create_spec *spc,
-                                   struct md_attr *at)
+                                   struct md_attr *at,
+                                   struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_object_create);
-        return m->mo_ops->moo_object_create(cx, m, spc, at);
+        return m->mo_ops->moo_object_create(cx, m, spc, at, uc);
 }
 
 static inline int mo_ref_add(const struct lu_context *cx,
-                             struct md_object *m)
+                             struct md_object *m,
+                             struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_ref_add);
-        return m->mo_ops->moo_ref_add(cx, m);
+        return m->mo_ops->moo_ref_add(cx, m, uc);
 }
 
 static inline int mo_ref_del(const struct lu_context *cx,
-                             struct md_object *m, struct md_attr *ma)
+                             struct md_object *m,
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
         LASSERT(m->mo_ops->moo_ref_del);
-        return m->mo_ops->moo_ref_del(cx, m, ma);
+        return m->mo_ops->moo_ref_del(cx, m, ma, uc);
 }
 
-static inline int mdo_lookup(const struct lu_context *cx, struct md_object *p,
-                             const char *name, struct lu_fid *f)
+static inline int mdo_lookup(const struct lu_context *cx,
+                             struct md_object *p,
+                             const char *name,
+                             struct lu_fid *f,
+                             struct md_ucred *uc)
 {
         LASSERT(p->mo_dir_ops->mdo_lookup);
-        return p->mo_dir_ops->mdo_lookup(cx, p, name, f);
+        return p->mo_dir_ops->mdo_lookup(cx, p, name, f, uc);
 }
 
-static inline int mdo_create(const struct lu_context *cx, struct md_object *p,
-                             const char *child_name, struct md_object *c,
+static inline int mdo_create(const struct lu_context *cx,
+                             struct md_object *p,
+                             const char *child_name,
+                             struct md_object *c,
                              const struct md_create_spec *spc,
-                             struct md_attr *at)
+                             struct md_attr *at,
+                             struct md_ucred *uc)
 {
         LASSERT(c->mo_dir_ops->mdo_create);
-        return c->mo_dir_ops->mdo_create(cx, p, child_name, c, spc, at);
+        return c->mo_dir_ops->mdo_create(cx, p, child_name, c, spc, at, uc);
 }
+
 static inline int mdo_create_data(const struct lu_context *cx,
-                                  struct md_object *p, struct md_object *c,
+                                  struct md_object *p,
+                                  struct md_object *c,
                                   const struct md_create_spec *spec,
-                                  struct md_attr *ma)
+                                  struct md_attr *ma,
+                                  struct md_ucred *uc)
 {
         LASSERT(c->mo_dir_ops->mdo_create_data);
-        return c->mo_dir_ops->mdo_create_data(cx, p, c, spec, ma);
+        return c->mo_dir_ops->mdo_create_data(cx, p, c, spec, ma, uc);
 }
 
 static inline int mdo_rename(const struct lu_context *cx,
-                             struct md_object *sp, struct md_object *tp,
-                             const struct lu_fid *lf, const char *sname,
-                             struct md_object *t, const char *tname,
-                             struct md_attr *ma)
+                             struct md_object *sp,
+                             struct md_object *tp,
+                             const struct lu_fid *lf,
+                             const char *sname,
+                             struct md_object *t,
+                             const char *tname,
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
         LASSERT(tp->mo_dir_ops->mdo_rename);
-        return tp->mo_dir_ops->mdo_rename(cx, sp, tp, lf, sname, t, tname, ma);
+        return tp->mo_dir_ops->mdo_rename(cx, sp, tp, lf, sname, t, tname,
+                                          ma, uc);
 }
 
-static inline int mdo_is_subdir(const struct lu_context *cx, struct md_object *mo,
-                                const struct lu_fid *fid, struct lu_fid *sfid)
+static inline int mdo_is_subdir(const struct lu_context *cx,
+                                struct md_object *mo,
+                                const struct lu_fid *fid,
+                                struct lu_fid *sfid,
+                                struct md_ucred *uc)
 {
         LASSERT(mo->mo_dir_ops->mdo_is_subdir);
-        return mo->mo_dir_ops->mdo_is_subdir(cx, mo, fid, sfid);
+        return mo->mo_dir_ops->mdo_is_subdir(cx, mo, fid, sfid, uc);
 }
 
-static inline int mdo_link(const struct lu_context *cx, struct md_object *p,
-                           struct md_object *s, const char *name,
-                           struct md_attr *ma)
+static inline int mdo_link(const struct lu_context *cx,
+                           struct md_object *p,
+                           struct md_object *s,
+                           const char *name,
+                           struct md_attr *ma,
+                           struct md_ucred *uc)
 {
         LASSERT(s->mo_dir_ops->mdo_link);
-        return s->mo_dir_ops->mdo_link(cx, p, s, name, ma);
+        return s->mo_dir_ops->mdo_link(cx, p, s, name, ma, uc);
 }
 
-static inline int mdo_unlink(const struct lu_context *cx, struct md_object *p,
-                             struct md_object *c, const char *name,
-                             struct md_attr *ma)
+static inline int mdo_unlink(const struct lu_context *cx,
+                             struct md_object *p,
+                             struct md_object *c,
+                             const char *name,
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
         LASSERT(c->mo_dir_ops->mdo_unlink);
-        return c->mo_dir_ops->mdo_unlink(cx, p, c, name, ma);
+        return c->mo_dir_ops->mdo_unlink(cx, p, c, name, ma, uc);
 }
 
 static inline int mdo_name_insert(const struct lu_context *cx,
-                                  struct md_object *p, const char *name,
-                                  const struct lu_fid *f, int isdir)
+                                  struct md_object *p,
+                                  const char *name,
+                                  const struct lu_fid *f,
+                                  int isdir,
+                                  struct md_ucred *uc)
 {
         LASSERT(p->mo_dir_ops->mdo_name_insert);
-        return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, isdir);
+        return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, isdir, uc);
 }
 
 static inline int mdo_name_remove(const struct lu_context *cx,
                                   struct md_object *p,
-                                  const char *name)
+                                  const char *name,
+                                  struct md_ucred *uc)
 {
         LASSERT(p->mo_dir_ops->mdo_name_remove);
-        return p->mo_dir_ops->mdo_name_remove(cx, p, name);
+        return p->mo_dir_ops->mdo_name_remove(cx, p, name, uc);
 }
 
 static inline int mdo_rename_tgt(const struct lu_context *cx,
-                                 struct md_object *p, struct md_object *t,
-                                 const struct lu_fid *lf, const char *name,
-                                 struct md_attr *ma)
+                                 struct md_object *p,
+                                 struct md_object *t,
+                                 const struct lu_fid *lf,
+                                 const char *name,
+                                 struct md_attr *ma,
+                                 struct md_ucred *uc)
 {
         if (t) {
                 LASSERT(t->mo_dir_ops->mdo_rename_tgt);
-                return t->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, ma);
+                return t->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name,
+                                                     ma, uc);
         } else {
                 LASSERT(p->mo_dir_ops->mdo_rename_tgt);
-                return p->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name, ma);
+                return p->mo_dir_ops->mdo_rename_tgt(cx, p, t, lf, name,
+                                                     ma, uc);
         }
 }
 
index 140c51f..65cd41a 100644 (file)
@@ -507,6 +507,15 @@ struct mgs_obd {
         struct semaphore                 mgs_sem;
 };
 
+/* hah, upper limit 64 should be enough */
+#define N_NOSQUASH_NIDS 64
+struct rootsquash_info {
+        uid_t           rsi_uid;
+        gid_t           rsi_gid;
+        int             rsi_n_nosquash_nids;
+        lnet_nid_t      rsi_nosquash_nids[N_NOSQUASH_NIDS];
+};
+
 struct mds_obd {
         /* NB this field MUST be first */
         struct obd_device_target         mds_obt;
@@ -545,7 +554,7 @@ struct mds_obd {
         struct file                     *mds_lov_objid_filp;
         struct file                     *mds_health_check_filp;
         unsigned long                   *mds_client_bitmap;
-        struct upcall_cache             *mds_group_hash;
+//        struct upcall_cache             *mds_group_hash;
 
         struct lustre_quota_info         mds_quota_info;
         struct semaphore                 mds_qonoff_sem;
@@ -557,6 +566,11 @@ struct mds_obd {
         /* For CMD add mds_num */
         int                              mds_num;
 
+        struct upcall_cache             *mds_identity_cache;
+        struct upcall_cache             *mds_rmtacl_cache;
+
+        /* root squash */
+        struct rootsquash_info          *mds_rootsquash_info;
 };
 
 struct echo_obd {
@@ -1190,6 +1204,9 @@ struct md_ops {
         int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
                                int flags, void *opaque);
 
+        int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
+                                 struct ptlrpc_request **);
+
         /*
          * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
          * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
index e7602db..94b3743 100644 (file)
@@ -415,8 +415,9 @@ static inline int obd_precleanup(struct obd_device *obd,
                                 lu_context_exit(&ctx);
                                 lu_context_fini(&ctx);
                         }
-                } else 
+                } else {
                         rc = 0;
+                }
         } else
 #endif
         {
@@ -1896,6 +1897,16 @@ static inline int md_init_ea_size(struct obd_export *exp,
                                                cookiesize));
 }
 
+static inline int md_get_remote_perm(struct obd_export *exp,
+                                     const struct lu_fid *fid,
+                                     struct ptlrpc_request **request)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, get_remote_perm);
+        MD_COUNTER_INCREMENT(exp->exp_obd, get_remote_perm);
+        RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, request));
+}
+
 /* OBD Metadata Support */
 extern int obd_init_caches(void);
 extern void obd_cleanup_caches(void);
index 3cfed08..955c6cd 100644 (file)
@@ -184,9 +184,10 @@ extern int obd_race_state;
 #define OBD_FAIL_FLD                     0x1100
 #define OBD_FAIL_FLD_QUERY_NET           0x1101
 
-#define OBD_FAIL_SEC_CTX_INIT_NET        0x1200
-#define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1210
-#define OBD_FAIL_SEC_CTX_FINI_NET        0x1220
+#define OBD_FAIL_SEC_CTX                 0x1200
+#define OBD_FAIL_SEC_CTX_INIT_NET        0x1201
+#define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
+#define OBD_FAIL_SEC_CTX_FINI_NET        0x1203
 
 #define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xA00
 
index e683de7..29c9360 100644 (file)
@@ -1500,6 +1500,61 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req)
         RETURN(0);
 }
 
+void ldlm_revoke_export_locks(struct obd_export *exp)
+{
+        struct list_head *locklist = &exp->exp_ldlm_data.led_held_locks;
+        struct list_head  rpc_list;
+        struct ldlm_lock *lock, *next;
+        struct ldlm_lock_desc desc;
+
+        ENTRY;
+        INIT_LIST_HEAD(&rpc_list);
+
+        spin_lock(&exp->exp_ldlm_data.led_lock);
+        list_for_each_entry_safe(lock, next, locklist, l_export_chain) {
+                lock_res_and_lock(lock);
+                if (lock->l_req_mode != lock->l_granted_mode) {
+                        unlock_res_and_lock(lock);
+                        continue;
+                }
+
+                LASSERT(lock->l_resource);
+                if (lock->l_resource->lr_type != LDLM_IBITS &&
+                    lock->l_resource->lr_type != LDLM_PLAIN) {
+                        unlock_res_and_lock(lock);
+                        continue;
+                }
+
+                if (lock->l_flags & LDLM_FL_AST_SENT) {
+                        unlock_res_and_lock(lock);
+                        continue;
+                }
+
+                LASSERT(lock->l_blocking_ast);
+                LASSERT(!lock->l_blocking_lock);
+
+                lock->l_flags |= LDLM_FL_AST_SENT;
+                unlock_res_and_lock(lock);
+
+                list_move(&lock->l_export_chain, &rpc_list);
+        }
+        spin_unlock(&exp->exp_ldlm_data.led_lock);
+
+        while (!list_empty(&rpc_list)) {
+                lock = list_entry(rpc_list.next, struct ldlm_lock,
+                                  l_export_chain);
+                list_del_init(&lock->l_export_chain);
+
+                /* the desc just pretend to exclusive */
+                ldlm_lock2desc(lock, &desc);
+                desc.l_req_mode = LCK_EX;
+                desc.l_granted_mode = 0;
+
+                lock->l_blocking_ast(lock, &desc, NULL, LDLM_CB_BLOCKING);
+        }
+        EXIT;
+}
+
 #ifdef __KERNEL__
 static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
 {
@@ -1853,6 +1908,7 @@ EXPORT_SYMBOL(ldlm_del_waiting_lock);
 EXPORT_SYMBOL(ldlm_get_ref);
 EXPORT_SYMBOL(ldlm_put_ref);
 EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
+EXPORT_SYMBOL(ldlm_revoke_export_locks);
 
 /* ldlm_resource.c */
 EXPORT_SYMBOL(ldlm_namespace_new);
index d67f148..2a671f3 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := lustre
-lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o
+lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o remote_perm.o
 
 ifeq ($(PATCHLEVEL),4)
 lustre-objs += rw24.o super.o
index 2e86f72..81f0dc7 100644 (file)
@@ -1100,7 +1100,22 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
         }
         case LL_IOC_FLUSHCTX:
                 RETURN(ll_flush_ctx(inode));
+        case LL_IOC_GETFACL: {
+                struct rmtacl_ioctl_data ioc;
 
+                if (copy_from_user(&ioc, (void *)arg, sizeof(ioc)))
+                        RETURN(-EFAULT);
+
+                RETURN(ll_ioctl_getfacl(inode, &ioc));
+        }
+        case LL_IOC_SETFACL: {
+                struct rmtacl_ioctl_data ioc;
+
+                if (copy_from_user(&ioc, (void *)arg, sizeof(ioc)))
+                        RETURN(-EFAULT);
+
+                RETURN(ll_ioctl_setfacl(inode, &ioc));
+        }
         default:
                 RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg));
         }
index 9af7595..0f55396 100644 (file)
@@ -2139,6 +2139,22 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         */
         case LL_IOC_FLUSHCTX:
                 RETURN(ll_flush_ctx(inode));
+        case LL_IOC_GETFACL: {
+                struct rmtacl_ioctl_data ioc;
+
+                if (copy_from_user(&ioc, (void *)arg, sizeof(ioc)))
+                        RETURN(-EFAULT);
+
+                RETURN(ll_ioctl_getfacl(inode, &ioc));
+        }
+        case LL_IOC_SETFACL: {
+                struct rmtacl_ioctl_data ioc;
+
+                if (copy_from_user(&ioc, (void *)arg, sizeof(ioc)))
+                        RETURN(-EFAULT);
+
+                RETURN(ll_ioctl_setfacl(inode, &ioc));
+        }
         default:
                 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                      (void *)arg));
@@ -2550,6 +2566,8 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
                inode->i_ino, inode->i_generation, inode, mask);
+        if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
+                return lustre_check_remote_perm(inode, mask);
         return generic_permission(inode, mask, lustre_check_acl);
 }
 #else
@@ -2565,6 +2583,9 @@ int ll_inode_permission(struct inode *inode, int mask)
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
                inode->i_ino, inode->i_generation, inode, mask);
 
+        if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
+                return lustre_check_remote_perm(inode, mask);
+
         if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
             (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
                 return -EROFS;
index 54c5c07..8eff9fa 100644 (file)
@@ -57,6 +57,21 @@ extern struct file_operations ll_pgcache_seq_fops;
 #define LLI_INODE_MAGIC                 0x111d0de5
 #define LLI_INODE_DEAD                  0xdeadd00d
 
+/* remote client permission cache */
+#define REMOTE_PERM_HASHSIZE 16
+
+/* llite setxid/access permission for user on remote client */
+struct ll_remote_perm {
+        struct hlist_node       lrp_list;
+        uid_t                   lrp_uid;
+        gid_t                   lrp_gid;
+        uid_t                   lrp_fsuid;
+        gid_t                   lrp_fsgid;
+        int                     lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
+                                                    is access permission with
+                                                    lrp_fsuid/lrp_fsgid. */
+};
+
 enum lli_flags {
         /* MDS has an authority for the Size-on-MDS attributes. */
         LLIF_MDS_SIZE_LOCK      = (1 << 0),
@@ -96,6 +111,10 @@ struct ll_inode_info {
 
         struct posix_acl       *lli_posix_acl;
 
+        /* remote permission hash */
+        struct hlist_head      *lli_remote_perms;
+        struct semaphore        lli_rmtperm_sem;
+
         struct list_head        lli_dead_list;
 
         struct semaphore        lli_och_sem; /* Protects access to och pointers
@@ -205,6 +224,7 @@ struct ll_rw_process_info {
 #define LL_SBI_USER_XATTR       0x08 /* support user xattr */
 #define LL_SBI_ACL              0x10 /* support ACL */
 #define LL_SBI_JOIN             0x20 /* support JOIN */
+#define LL_SBI_RMT_CLIENT       0x40 /* remote client */
 
 struct ll_sb_info {
         struct list_head          ll_list;
@@ -547,6 +567,8 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
 int ll_obd_statfs(struct inode *inode, void *arg);
 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 int ll_process_config(struct lustre_cfg *lcfg);
+int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
+int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
 
 /* llite/llite_nfs.c */
 extern struct export_operations lustre_export_operations;
@@ -685,6 +707,15 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name,
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ll_removexattr(struct dentry *dentry, const char *name);
 
+/* llite/remote_perm.c */
+extern kmem_cache_t *ll_remote_perm_cachep;
+extern kmem_cache_t *ll_rmtperm_hash_cachep;
+
+struct hlist_head *alloc_rmtperm_hash(void);
+void free_rmtperm_hash(struct hlist_head *hash);
+int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
+int lustre_check_remote_perm(struct inode *inode, int mask);
+
 /* llite/llite_fid.c*/
 int ll_fid_md_init(struct ll_sb_info *sbi);
 int ll_fid_dt_init(struct ll_sb_info *sbi);
index 87b7b42..bd24410 100644 (file)
@@ -149,7 +149,8 @@ static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
 }
 
 static int client_common_fill_super(struct super_block *sb, 
-                                    char *md, char *dt)
+                                    char *md, char *dt,
+                                    uid_t nllu, gid_t nllg)
 {
         struct inode *root = 0;
         struct ll_sb_info *sbi = ll_s2sbi(sb);
@@ -200,6 +201,13 @@ static int client_common_fill_super(struct super_block *sb,
 
         /* real client */
         data->ocd_connect_flags |= OBD_CONNECT_REAL;
+        if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+                data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT;
+                data->ocd_nllu = nllu;
+                data->ocd_nllg = nllg;
+        } else {
+                data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT;
+        }
 
         err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data);
         if (err == -EBUSY) {
@@ -238,12 +246,23 @@ static int client_common_fill_super(struct super_block *sb,
                 sb->s_flags |= MS_POSIXACL;
 #endif
                 sbi->ll_flags |= LL_SBI_ACL;
-        } else
+        } else {
                 sbi->ll_flags &= ~LL_SBI_ACL;
+        }
 
         if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
                 sbi->ll_flags |= LL_SBI_JOIN;
 
+        if ((sbi->ll_flags & LL_SBI_RMT_CLIENT) &&
+            !(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) {
+                /* sometimes local client claims to be remote, but mds
+                 * will disagree when client gss not applied. */
+                LCONSOLE_INFO("client claims to be remote, but server "
+                              "rejected, forced to be local\n");
+                sbi->ll_flags &= ~OBD_CONNECT_RMT_CLIENT;
+                sbi->ll_flags |= OBD_CONNECT_LCL_CLIENT;
+        }
+
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
         /* We set sb->s_dev equal on all lustre clients in order to support
          * NFS export clustering.  NFSD requires that the FSID be the same
@@ -687,6 +706,11 @@ static int ll_options(char *options, int *flags)
                 if (tmp) {
                         goto next;
                 }
+                tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
+                if (tmp) {
+                        *flags |= tmp;
+                        goto next;
+                }
 
                 LCONSOLE_ERROR("Unknown option '%s', won't mount.\n", s1);
                 RETURN(-EINVAL);
@@ -718,6 +742,7 @@ void ll_lli_init(struct ll_inode_info *lli)
         lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
         lli->lli_open_fd_exec_count = 0;
         INIT_LIST_HEAD(&lli->lli_dead_list);
+        sema_init(&lli->lli_rmtperm_sem, 1);
 }
 
 /* COMPAT_146 */
@@ -991,7 +1016,9 @@ int ll_fill_super(struct super_block *sb)
         sprintf(md, "%s-%s", lprof->lp_md, ll_instance);
 
         /* connections, registrations, sb setup */
-        err = client_common_fill_super(sb, md, dt);
+        err = client_common_fill_super(sb, md, dt,
+                                       lsi->lsi_lmd->lmd_nllu,
+                                       lsi->lsi_lmd->lmd_nllg);
 
 out_free:
         if (md)
@@ -1167,10 +1194,17 @@ void ll_clear_inode(struct inode *inode)
 #ifdef CONFIG_FS_POSIX_ACL
         if (lli->lli_posix_acl) {
                 LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
+//                LASSERT(lli->lli_remote_perms == NULL);
                 posix_acl_release(lli->lli_posix_acl);
                 lli->lli_posix_acl = NULL;
         }
 #endif
+        if (lli->lli_remote_perms) {
+                LASSERT(sbi->ll_flags & LL_SBI_RMT_CLIENT);
+                LASSERT(lli->lli_posix_acl == NULL);
+                free_rmtperm_hash(lli->lli_remote_perms);
+                lli->lli_remote_perms = NULL;
+        }
 
         lli->lli_inode_magic = LLI_INODE_DEAD;
 
@@ -1670,6 +1704,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
                 spin_unlock(&lli->lli_lock);
         }
 #endif
+        if (body->valid & OBD_MD_FLRMTPERM)
+                ll_update_remote_perm(inode, md->remote_perm);
 
         if (body->valid & OBD_MD_FLATIME &&
             body->atime > LTIME_S(inode->i_atime))
@@ -2149,3 +2185,82 @@ int ll_process_config(struct lustre_cfg *lcfg)
         return(rc);
 }
 
+int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
+{
+        struct ptlrpc_request *req = NULL;
+        struct mds_body *body;
+        char *cmd, *buf;
+        int rc, buflen;
+        ENTRY;
+
+        LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len);
+
+        OBD_ALLOC(cmd, ioc->cmd_len);
+        if (!cmd)
+                RETURN(-ENOMEM);
+        if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
+                GOTO(out, rc = -EFAULT);
+
+        rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+                          OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
+                          ioc->cmd_len, ioc->res_len, 0, &req);
+        if (rc < 0) {
+                CERROR("mdc_getxattr %s [%s] failed: %d\n",
+                       XATTR_NAME_LUSTRE_ACL, cmd, rc);
+                GOTO(out, rc);
+        }
+
+        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body));
+        LASSERT(body);
+
+        buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF);
+        LASSERT(buflen <= ioc->res_len);
+        buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF + 1, ioc->res_len);
+        LASSERT(buf);
+        if (copy_to_user(ioc->res, buf, buflen))
+                GOTO(out, rc = -EFAULT);
+        EXIT;
+out:
+        if (req)
+                ptlrpc_req_finished(req);
+        OBD_FREE(cmd, ioc->cmd_len);
+        return rc;
+}
+
+int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
+{
+        struct ptlrpc_request *req = NULL;
+        char *cmd, *buf;
+        int buflen, rc;
+        ENTRY;
+
+        LASSERT(ioc->cmd && ioc->cmd_len && ioc->res && ioc->res_len);
+
+        OBD_ALLOC(cmd, ioc->cmd_len);
+        if (!cmd)
+                RETURN(-ENOMEM);
+        if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
+                GOTO(out, rc = -EFAULT);
+
+        rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+                          OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
+                          ioc->cmd_len, ioc->res_len, 0, &req);
+        if (rc) {
+                CERROR("mdc_setxattr %s [%s] failed: %d\n",
+                       XATTR_NAME_LUSTRE_ACL, cmd, rc);
+                GOTO(out, rc);
+        }
+
+        buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF);
+        LASSERT(buflen <= ioc->res_len);
+        buf = lustre_msg_string(req->rq_repmsg, REPLY_REC_OFF, ioc->res_len);
+        LASSERT(buf);
+        if (copy_to_user(ioc->res, buf, buflen))
+                GOTO(out, rc = -EFAULT);
+        EXIT;
+out:
+        if (req)
+                ptlrpc_req_finished(req);
+        OBD_FREE(cmd, ioc->cmd_len);
+        return rc;
+}
index 12f06f5..d9f4c15 100644 (file)
@@ -315,6 +315,9 @@ void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
         op_data->namelen = namelen;
         op_data->create_mode = mode;
         op_data->mod_time = CURRENT_SECONDS;
+        op_data->fsuid = current->fsuid;
+        op_data->fsgid = current->fsgid;
+        op_data->cap = current->cap_effective;
 }
 
 static void ll_d_add(struct dentry *de, struct inode *inode)
diff --git a/lustre/llite/remote_perm.c b/lustre/llite/remote_perm.c
new file mode 100644 (file)
index 0000000..6eaf01e
--- /dev/null
@@ -0,0 +1,285 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Permission Cache for Remote Client
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *   Author: Fan Yong <fanyong@clusterfs.com>
+ *
+ *  Copyright (c) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include "llite_internal.h"
+
+kmem_cache_t *ll_remote_perm_cachep = NULL;
+kmem_cache_t *ll_rmtperm_hash_cachep = NULL;
+
+static inline struct ll_remote_perm *alloc_ll_remote_perm(void)
+{
+        struct ll_remote_perm *lrp;
+
+        OBD_SLAB_ALLOC(lrp, ll_remote_perm_cachep, SLAB_KERNEL, sizeof(*lrp));
+        if (lrp)
+                INIT_HLIST_NODE(&lrp->lrp_list);
+        return lrp;
+}
+
+static inline void free_ll_remote_perm(struct ll_remote_perm *lrp)
+{
+        if (!hlist_unhashed(&lrp->lrp_list))
+                hlist_del(&lrp->lrp_list);
+        OBD_SLAB_FREE(lrp, ll_remote_perm_cachep, sizeof(*lrp));
+}
+
+struct hlist_head *alloc_rmtperm_hash(void)
+{
+        struct hlist_head *hash;
+        int i;
+
+        OBD_SLAB_ALLOC(hash, ll_rmtperm_hash_cachep, SLAB_KERNEL,
+                       REMOTE_PERM_HASHSIZE * sizeof(*hash));
+
+        if (!hash)
+                return NULL;
+
+        for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
+                INIT_HLIST_HEAD(hash + i);
+
+        return hash;
+}
+
+void free_rmtperm_hash(struct hlist_head *hash)
+{
+        int i;
+        struct ll_remote_perm *lrp;
+        struct hlist_node *node, *next;
+
+        for (i = 0; i < REMOTE_PERM_HASHSIZE; i++)
+                hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list)
+                        free_ll_remote_perm(lrp);
+        OBD_SLAB_FREE(hash, ll_rmtperm_hash_cachep,
+                      REMOTE_PERM_HASHSIZE * sizeof(*hash));
+}
+
+static inline int remote_perm_hashfunc(uid_t uid)
+{
+        return uid & (REMOTE_PERM_HASHSIZE - 1);
+}
+
+/* NB: setxid permission is not checked here, instead it's done on
+ * MDS when client get remote permission. (lookup/mdc_get_remote_perm). */
+static int do_check_remote_perm(struct ll_inode_info *lli, int mask)
+{
+        struct hlist_head *head;
+        struct ll_remote_perm *lrp;
+        struct hlist_node *node;
+        int found = 0, rc;
+        ENTRY;
+
+        if (!lli->lli_remote_perms)
+                RETURN(-ENOENT);
+
+        head = lli->lli_remote_perms + remote_perm_hashfunc(current->uid);
+
+        spin_lock(&lli->lli_lock);
+        hlist_for_each_entry(lrp, node, head, lrp_list) {
+                if (lrp->lrp_uid != current->uid)
+                        continue;
+                if (lrp->lrp_gid != current->gid)
+                        continue;
+                if (lrp->lrp_fsuid != current->fsuid)
+                        continue;
+                if (lrp->lrp_fsgid != current->fsgid)
+                        continue;
+                found = 1;
+                break;
+        }
+
+        if (!found)
+                GOTO(out, rc = -ENOENT);
+
+        CDEBUG(D_SEC, "found remote perm: %u/%u/%u/%u - %#x\n",
+               lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
+               lrp->lrp_access_perm);
+        rc = (lrp->lrp_access_perm & mask) == mask ? 0 : -EACCES;
+        GOTO(out, rc);
+out:
+        spin_unlock(&lli->lli_lock);
+        return rc;
+}
+
+int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_remote_perm *lrp, *tmp = NULL;
+        struct hlist_head *head, *perm_hash = NULL;
+        struct hlist_node *node;
+        ENTRY;
+
+        LASSERT(ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT);
+
+        if (perm->rp_uid != current->uid ||
+            perm->rp_gid != current->gid ||
+            perm->rp_fsuid != current->fsuid ||
+            perm->rp_fsgid != current->fsgid) {
+                /* user might setxid in this small period */
+                CDEBUG(D_SEC,
+                       "remote perm user %u/%u/%u/%u != current %u/%u/%u/%u\n",
+                       perm->rp_uid, perm->rp_gid, perm->rp_fsuid,
+                       perm->rp_fsgid, current->uid, current->gid,
+                       current->fsuid, current->fsgid);
+                RETURN(-EAGAIN);
+        }
+
+        if (!lli->lli_remote_perms) {
+                perm_hash = alloc_rmtperm_hash();
+                if (perm_hash == NULL) {
+                        CERROR("alloc lli_remote_perms failed!\n");
+                        RETURN(-ENOMEM);
+                }
+        }
+
+        lrp = alloc_ll_remote_perm();
+        if (!lrp) {
+                CERROR("alloc memory for ll_remote_perm failed!\n");
+                RETURN(-ENOMEM);
+        }
+
+        spin_lock(&lli->lli_lock);
+
+        if (!lli->lli_remote_perms)
+                lli->lli_remote_perms = perm_hash;
+        else if (perm_hash)
+                free_rmtperm_hash(perm_hash);
+
+        head = lli->lli_remote_perms + remote_perm_hashfunc(perm->rp_uid);
+        hlist_for_each_entry(tmp, node, head, lrp_list) {
+                if (tmp->lrp_uid != current->uid)
+                        continue;
+                if (tmp->lrp_gid != current->gid)
+                        continue;
+                if (tmp->lrp_fsuid != current->fsuid)
+                        continue;
+                if (tmp->lrp_fsgid != current->fsgid)
+                        continue;
+                free_ll_remote_perm(lrp);
+                lrp = tmp;
+                break;
+        }
+
+        lrp->lrp_uid         = perm->rp_uid;
+        lrp->lrp_gid         = perm->rp_gid;
+        lrp->lrp_fsuid       = perm->rp_fsuid;
+        lrp->lrp_fsgid       = perm->rp_fsgid;
+        lrp->lrp_access_perm = perm->rp_access_perm;
+        if (lrp != tmp)
+                hlist_add_head(&lrp->lrp_list, head);
+        spin_unlock(&lli->lli_lock);
+
+        CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
+               lrp, lrp->lrp_uid, lrp->lrp_gid, lrp->lrp_fsuid, lrp->lrp_fsgid,
+               lrp->lrp_access_perm);
+
+        RETURN(0);
+}
+
+int lustre_check_remote_perm(struct inode *inode, int mask)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ptlrpc_request *req = NULL;
+        struct mdt_remote_perm *perm;
+        int i = 0, rc;
+        ENTRY;
+
+check:
+        rc = do_check_remote_perm(lli, mask);
+        if (rc != -ENOENT)
+                RETURN(rc);
+
+        might_sleep();
+
+        down(&lli->lli_rmtperm_sem);
+        /* check again */
+        rc = do_check_remote_perm(lli, mask);
+        if (rc != -ENOENT) {
+                up(&lli->lli_rmtperm_sem);
+                RETURN(rc);
+        }
+
+        if (i++ > 5) {
+                CERROR("check remote perm falls in dead loop!\n");
+                LBUG();
+        }
+
+        rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), &req);
+        if (rc) {
+                up(&lli->lli_rmtperm_sem);
+                RETURN(rc);
+        }
+
+        perm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, sizeof(*perm));
+        LASSERT(perm);
+        LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1);
+
+        rc = ll_update_remote_perm(inode, perm);
+        up(&lli->lli_rmtperm_sem);
+
+        ptlrpc_req_finished(req);
+
+        if (rc == -ENOMEM)
+                RETURN(rc);
+
+        goto check;
+}
+
+#if 0  /* NB: remote perms can't be freed in ll_mdc_blocking_ast of UPDATE lock,
+        * because it will fail sanity test 48.
+        */
+void ll_free_remote_perms(struct inode *inode)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct hlist_head *hash = lli->lli_remote_perms;
+        struct ll_remote_perm *lrp;
+        struct hlist_node *node, *next;
+        int i;
+
+        LASSERT(hash);
+
+        spin_lock(&lli->lli_lock);
+
+        for (i = 0; i < REMOTE_PERM_HASHSIZE; i++) {
+                hlist_for_each_entry_safe(lrp, node, next, hash + i, lrp_list)
+                        free_ll_remote_perm(lrp);
+        }
+
+        spin_unlock(&lli->lli_lock);
+}
+#endif
index 1b1351c..77cc526 100644 (file)
@@ -71,6 +71,31 @@ static int __init init_lustre_lite(void)
         if (ll_file_data_slab == NULL)
                 return -ENOMEM;
 
+        LASSERT(ll_remote_perm_cachep == NULL);
+        ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm",
+                                                  sizeof(struct ll_remote_perm),
+                                                  0, SLAB_HWCACHE_ALIGN, NULL,
+                                                  NULL);
+        if (!ll_remote_perm_cachep) {
+                kmem_cache_destroy(ll_file_data_slab);
+                ll_file_data_slab = NULL;
+                return -ENOMEM;
+        }
+
+        LASSERT(ll_rmtperm_hash_cachep == NULL);
+        ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash",
+                                                  REMOTE_PERM_HASHSIZE *
+                                                  sizeof(struct list_head),
+                                                  0, SLAB_HWCACHE_ALIGN, NULL,
+                                                  NULL);
+        if (!ll_rmtperm_hash_cachep) {
+                kmem_cache_destroy(ll_remote_perm_cachep);
+                kmem_cache_destroy(ll_file_data_slab);
+                ll_remote_perm_cachep = NULL;
+                ll_file_data_slab = NULL;
+                return -ENOMEM;
+        }
+
         if (proc_lustre_root)
                 proc_lustre_fs_root = proc_mkdir("llite", proc_lustre_root);
 
@@ -94,6 +119,14 @@ static void __exit exit_lustre_lite(void)
 
         ll_unregister_cache(&ll_cache_definition);
 
+        rc = kmem_cache_destroy(ll_rmtperm_hash_cachep);
+        LASSERTF(rc == 0, "couldn't destroy ll_rmtperm_hash_cachep\n");
+        ll_rmtperm_hash_cachep = NULL;
+
+        rc = kmem_cache_destroy(ll_remote_perm_cachep);
+        LASSERTF(rc == 0, "couldn't destroy ll_remote_perm_cachep\n");
+        ll_remote_perm_cachep = NULL;
+
         rc = kmem_cache_destroy(ll_file_data_slab);
         LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n");
         if (ll_async_page_slab) {
index 2dd87ad..1e6052c 100644 (file)
@@ -116,6 +116,29 @@ static int __init init_lustre_lite(void)
                 return -ENOMEM;
         }
 
+        LASSERT(ll_remote_perm_cachep == NULL);
+        ll_remote_perm_cachep = kmem_cache_create("ll_remote_perm_cache",
+                                                  sizeof(struct ll_remote_perm),
+                                                  0, 0, NULL, NULL);
+        if (!ll_remote_perm_cachep) {
+                kmem_cache_destroy(ll_file_data_slab);
+                ll_file_data_slab = NULL;
+                return -ENOMEM;
+        }
+
+        LASSERT(ll_rmtperm_hash_cachep == NULL);
+        ll_rmtperm_hash_cachep = kmem_cache_create("ll_rmtperm_hash_cache",
+                                                   REMOTE_PERM_HASHSIZE *
+                                                   sizeof(struct list_head),
+                                                   0, 0, NULL, NULL);
+        if (!ll_rmtperm_hash_cachep) {
+                kmem_cache_destroy(ll_remote_perm_cachep);
+                kmem_cache_destroy(ll_file_data_slab);
+                ll_remote_perm_cachep = NULL;
+                ll_file_data_slab = NULL;
+                return -ENOMEM;
+        }
+
         proc_lustre_fs_root = proc_lustre_root ?
                               proc_mkdir("llite", proc_lustre_root) : NULL;
 
@@ -141,6 +164,14 @@ static void __exit exit_lustre_lite(void)
 
         ll_destroy_inodecache();
 
+        rc = kmem_cache_destroy(ll_rmtperm_hash_cachep);
+        LASSERTF(rc == 0, "couldn't destroy ll_rmtperm_hash_cachep\n");
+        ll_rmtperm_hash_cachep = NULL;
+
+        rc = kmem_cache_destroy(ll_remote_perm_cachep);
+        LASSERTF(rc == 0, "couldn't destroy ll_remote_perm_cachep\n");
+        ll_remote_perm_cachep = NULL;
+
         rc = kmem_cache_destroy(ll_file_data_slab);
         LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n");
         if (ll_async_page_slab) {
index b103c42..83ed667 100644 (file)
@@ -1677,6 +1677,7 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
         
         op_data->fsuid = current->fsuid;
         op_data->fsgid = current->fsgid;
+        op_data->cap   = current->cap_effective;
         rc = md_link(lmv->tgts[mds].ltd_exp, op_data, request);
 
         RETURN(rc);
@@ -1776,6 +1777,7 @@ request:
         }
         op_data->fsuid = current->fsuid;
         op_data->fsgid = current->fsgid;
+        op_data->cap   = current->cap_effective;
         rc = md_rename(lmv->tgts[mds].ltd_exp, op_data, old, oldlen,
                        new, newlen, request);
         RETURN(rc);
@@ -2121,6 +2123,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
                 RETURN(PTR_ERR(tgt_exp));
         op_data->fsuid = current->fsuid;
         op_data->fsgid = current->fsgid;
+        op_data->cap   = current->cap_effective;
         rc = md_unlink(tgt_exp, op_data, request);
         RETURN(rc);
 }
@@ -2452,8 +2455,9 @@ int lmv_set_open_replay_data(struct obd_export *exp,
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
         struct obd_export *tgt_exp;
+
         ENTRY;
-        
+
         tgt_exp = lmv_get_export(lmv, och->och_fid);
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
@@ -2470,10 +2474,31 @@ int lmv_clear_open_replay_data(struct obd_export *exp,
         ENTRY;
 
         tgt_exp = lmv_get_export(lmv, och->och_fid);
+
+        RETURN(md_clear_open_replay_data(tgt_exp, och));
+}
+
+static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+                               struct ptlrpc_request **request)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct lmv_obd *lmv = &obd->u.lmv;
+        struct obd_export *tgt_exp;
+        int rc;
+
+        ENTRY;
+
+        rc = lmv_check_connect(obd);
+        if (rc)
+                RETURN(rc);
+
+        tgt_exp = lmv_get_export(lmv, fid);
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        RETURN(md_clear_open_replay_data(tgt_exp, och));
+        rc = md_get_remote_perm(tgt_exp, fid, request);
+
+        RETURN(rc);
 }
 
 struct obd_ops lmv_obd_ops = {
@@ -2524,7 +2549,8 @@ struct md_ops lmv_md_ops = {
         .m_get_lustre_md        = lmv_get_lustre_md,
         .m_free_lustre_md       = lmv_free_lustre_md,
         .m_set_open_replay_data = lmv_set_open_replay_data,
-        .m_clear_open_replay_data = lmv_clear_open_replay_data
+        .m_clear_open_replay_data = lmv_clear_open_replay_data,
+        .m_get_remote_perm      = lmv_get_remote_perm
 };
 
 int __init lmv_init(void)
index f89cc99..8ae1790 100644 (file)
@@ -66,10 +66,8 @@ int obd_memmax;
 #endif
 
 static void push_group_info(struct lvfs_run_ctxt *save,
-                            struct upcall_cache_entry *uce)
+                            struct group_info *ginfo)
 {
-        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
-
         if (!ginfo) {
                 save->ngroups = current_ngroups;
                 current_ngroups = 0;
@@ -97,10 +95,8 @@ static void push_group_info(struct lvfs_run_ctxt *save,
 }
 
 static void pop_group_info(struct lvfs_run_ctxt *save,
-                           struct upcall_cache_entry *uce)
+                           struct group_info *ginfo)
 {
-        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
-
         if (!ginfo) {
                 current_ngroups = save->ngroups;
         } else {
@@ -142,6 +138,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
         save->pwd = dget(current->fs->pwd);
         save->pwdmnt = mntget(current->fs->pwdmnt);
         save->luc.luc_umask = current->fs->umask;
+        save->ngroups = current->group_info->ngroups;
 
         LASSERT(save->pwd);
         LASSERT(save->pwdmnt);
@@ -149,14 +146,22 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
         LASSERT(new_ctx->pwdmnt);
 
         if (uc) {
+                save->luc.luc_uid = current->uid;
+                save->luc.luc_gid = current->gid;
                 save->luc.luc_fsuid = current->fsuid;
                 save->luc.luc_fsgid = current->fsgid;
                 save->luc.luc_cap = current->cap_effective;
 
+                current->uid = uc->luc_uid;
+                current->gid = uc->luc_gid;
                 current->fsuid = uc->luc_fsuid;
                 current->fsgid = uc->luc_fsgid;
                 current->cap_effective = uc->luc_cap;
-                push_group_info(save, uc->luc_uce);
+
+                push_group_info(save,
+                                uc->luc_ginfo ?:
+                                uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                   NULL);
         }
         current->fs->umask = 0; /* umask already applied on client */
         set_fs(new_ctx->fs);
@@ -206,10 +211,15 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
         mntput(saved->pwdmnt);
         current->fs->umask = saved->luc.luc_umask;
         if (uc) {
+                current->uid = saved->luc.luc_uid;
+                current->gid = saved->luc.luc_gid;
                 current->fsuid = saved->luc.luc_fsuid;
                 current->fsgid = saved->luc.luc_fsgid;
                 current->cap_effective = saved->luc.luc_cap;
-                pop_group_info(saved, uc->luc_uce);
+                pop_group_info(saved,
+                               uc->luc_ginfo ?:
+                               uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                  NULL);
         }
 
         /*
index d2f5b0c..9c94c77 100644 (file)
@@ -74,11 +74,12 @@ void groups_free(struct group_info *ginfo)
 }
 #endif
 
-static struct upcall_cache_entry *alloc_entry(__u64 key)
+static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache,
+                                              __u64 key, void *args)
 {
         struct upcall_cache_entry *entry;
 
-        OBD_ALLOC(entry, sizeof(*entry));
+        OBD_ALLOC_PTR(entry);
         if (!entry)
                 return NULL;
 
@@ -87,34 +88,66 @@ static struct upcall_cache_entry *alloc_entry(__u64 key)
         entry->ue_key = key;
         atomic_set(&entry->ue_refcount, 0);
         init_waitqueue_head(&entry->ue_waitq);
+        if (cache->uc_ops->init_entry)
+                cache->uc_ops->init_entry(entry, args);
         return entry;
 }
 
-/* protected by hash lock */
-static void free_entry(struct upcall_cache_entry *entry)
+/* protected by cache lock */
+static void free_entry(struct upcall_cache *cache,
+                       struct upcall_cache_entry *entry)
 {
-        if (entry->ue_group_info)
-                groups_free(entry->ue_group_info);
+        if (cache->uc_ops->free_entry)
+                cache->uc_ops->free_entry(cache, entry);
+
         list_del(&entry->ue_hash);
         CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
                entry, entry->ue_key);
-        OBD_FREE(entry, sizeof(*entry));
+        OBD_FREE_PTR(entry);
+}
+
+static inline int upcall_compare(struct upcall_cache *cache,
+                                 struct upcall_cache_entry *entry,
+                                 __u64 key, void *args)
+{
+        if (entry->ue_key != key)
+                return -1;
+
+        if (cache->uc_ops->upcall_compare)
+                return cache->uc_ops->upcall_compare(cache, entry, key, args);
+
+        return 0;
+}
+
+static inline int downcall_compare(struct upcall_cache *cache,
+                                   struct upcall_cache_entry *entry,
+                                   __u64 key, void *args)
+{
+        if (entry->ue_key != key)
+                return -1;
+
+        if (cache->uc_ops->downcall_compare)
+                return cache->uc_ops->downcall_compare(cache, entry, key, args);
+
+        return 0;
 }
 
-static void get_entry(struct upcall_cache_entry *entry)
+static inline void get_entry(struct upcall_cache_entry *entry)
 {
         atomic_inc(&entry->ue_refcount);
 }
 
-static void put_entry(struct upcall_cache_entry *entry)
+static inline void put_entry(struct upcall_cache *cache,
+                             struct upcall_cache_entry *entry)
 {
         if (atomic_dec_and_test(&entry->ue_refcount) &&
             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
-                free_entry(entry);
+                free_entry(cache, entry);
         }
 }
 
-static int check_unlink_entry(struct upcall_cache_entry *entry)
+static int check_unlink_entry(struct upcall_cache *cache,
+                              struct upcall_cache_entry *entry)
 {
         if (UC_CACHE_IS_VALID(entry) &&
             time_before(jiffies, entry->ue_expire))
@@ -132,86 +165,19 @@ static int check_unlink_entry(struct upcall_cache_entry *entry)
 
         list_del_init(&entry->ue_hash);
         if (!atomic_read(&entry->ue_refcount))
-                free_entry(entry);
+                free_entry(cache, entry);
         return 1;
 }
 
-static int refresh_entry(struct upcall_cache *hash,
+static inline int refresh_entry(struct upcall_cache *cache,
                          struct upcall_cache_entry *entry)
 {
-        char *argv[4];
-        char *envp[3];
-        char keystr[16];
-        int rc;
-        ENTRY;
-
-        snprintf(keystr, 16, LPU64, entry->ue_key);
-
-        CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
-        argv[0] = hash->uc_upcall;
-        argv[1] = hash->uc_name;
-        argv[2] = keystr;
-        argv[3] = NULL;
-
-        envp[0] = "HOME=/";
-        envp[1] = "PATH=/sbin:/usr/sbin";
-        envp[2] = NULL;
-
-        rc = USERMODEHELPER(argv[0], argv, envp);
-        if (rc < 0) {
-                CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
-                       "check /proc/fs/lustre/mds/%s/group_upcall\n",
-                       hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
-        } else {
-                CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
-                       argv[0], argv[1], argv[2]);
-                rc = 0;
-        }
-        RETURN(rc);
+        LASSERT(cache->uc_ops->do_upcall);
+        return cache->uc_ops->do_upcall(cache, entry);
 }
 
-static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
-                                __u32 ngroups, __u32 *groups)
-{
-        struct group_info *ginfo;
-        int i, j;
-        ENTRY;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
-        if (ngroups > NGROUPS)
-                ngroups = NGROUPS;
-#endif
-
-        if (ngroups > NGROUPS_MAX) {
-                CERROR("using first %d supplementary groups for uid "LPU64"\n",
-                       NGROUPS_MAX, entry->ue_key);
-                ngroups = NGROUPS_MAX;
-        }
-
-        ginfo = groups_alloc(ngroups);
-        if (!ginfo) {
-                CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
-                       entry->ue_key, ngroups);
-                RETURN(-ENOMEM);
-        }
-        entry->ue_group_info = ginfo;
-        entry->ue_primary = primary;
-
-        for (i = 0; i < ginfo->nblocks; i++) {
-                int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
-                int off = i * NGROUPS_PER_BLOCK;
-
-                for (j = 0; j < cp_count; j++)
-                        ginfo->blocks[i][j] = groups[off + j];
-
-                ngroups -= cp_count;
-        }
-        RETURN(0);
-}
-
-struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
-                                                  __u64 key, __u32 primary,
-                                                  __u32 ngroups, __u32 *groups)
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
+                                                  __u64 key, void *args)
 {
         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
         struct list_head *head;
@@ -219,49 +185,17 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
         int rc, found;
         ENTRY;
 
-        LASSERT(hash);
+        LASSERT(cache);
 
-        if (strcmp(hash->uc_upcall, "NONE") == 0) {
-                new = alloc_entry(key);
-                if (!new) {
-                        CERROR("fail to alloc entry\n");
-                        RETURN(NULL);
-                }
-                get_entry(new);
-
-                /* We have to sort the groups for 2.6 kernels */
-                LASSERT(ngroups <= 2);
-                if (ngroups == 2 && groups[1] == -1)
-                        ngroups--;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
-                /* 2.6 needs groups array sorted */
-                if (ngroups == 2 && groups[0] > groups[1]) {
-                        __u32 tmp = groups[1];
-                        groups[1] = groups[0];
-                        groups[0] = tmp;
-                }
-#endif
-                if (ngroups > 0 && groups[0] == -1) {
-                        groups[0] = groups[1];
-                        ngroups--;
-                }
-
-                rc = entry_set_group_info(new, primary, ngroups, groups);
-
-                /* We can't cache this entry as it only has a subset of
-                 * the user's groups, as sent in suppgid1, suppgid2. */
-                UC_CACHE_SET_EXPIRED(new);
-                RETURN(new);
-        }
-        head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+        head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
 find_again:
         found = 0;
-        spin_lock(&hash->uc_lock);
+        spin_lock(&cache->uc_lock);
         list_for_each_entry_safe(entry, next, head, ue_hash) {
                 /* check invalid & expired items */
-                if (check_unlink_entry(entry))
+                if (check_unlink_entry(cache, entry))
                         continue;
-                if (entry->ue_key == key) {
+                if (upcall_compare(cache, entry, key, args) == 0) {
                         found = 1;
                         break;
                 }
@@ -269,8 +203,8 @@ find_again:
 
         if (!found) { /* didn't find it */
                 if (!new) {
-                        spin_unlock(&hash->uc_lock);
-                        new = alloc_entry(key);
+                        spin_unlock(&cache->uc_lock);
+                        new = alloc_entry(cache, key, args);
                         if (!new) {
                                 CERROR("fail to alloc entry\n");
                                 RETURN(ERR_PTR(-ENOMEM));
@@ -282,7 +216,7 @@ find_again:
                 }
         } else {
                 if (new) {
-                        free_entry(new);
+                        free_entry(cache, new);
                         new = NULL;
                 }
                 list_move(&entry->ue_hash, head);
@@ -293,10 +227,10 @@ find_again:
         if (UC_CACHE_IS_NEW(entry)) {
                 UC_CACHE_SET_ACQUIRING(entry);
                 UC_CACHE_CLEAR_NEW(entry);
-                entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
-                spin_unlock(&hash->uc_lock);
-                rc = refresh_entry(hash, entry);
-                spin_lock(&hash->uc_lock);
+                entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire;
+                spin_unlock(&cache->uc_lock);
+                rc = refresh_entry(cache, entry);
+                spin_lock(&cache->uc_lock);
                 if (rc < 0) {
                         UC_CACHE_CLEAR_ACQUIRING(entry);
                         UC_CACHE_SET_INVALID(entry);
@@ -307,32 +241,31 @@ find_again:
          * this item, just wait it complete
          */
         if (UC_CACHE_IS_ACQUIRING(entry)) {
+                unsigned long expiry = jiffies + cache->uc_acquire_expire;
+
                 init_waitqueue_entry(&wait, current);
                 add_wait_queue(&entry->ue_waitq, &wait);
                 set_current_state(TASK_INTERRUPTIBLE);
-                spin_unlock(&hash->uc_lock);
+                spin_unlock(&cache->uc_lock);
 
-                schedule_timeout(hash->uc_acquire_expire);
+                schedule_timeout(cache->uc_acquire_expire);
 
-                spin_lock(&hash->uc_lock);
+                spin_lock(&cache->uc_lock);
                 remove_wait_queue(&entry->ue_waitq, &wait);
                 if (UC_CACHE_IS_ACQUIRING(entry)) {
-                        static unsigned long next;
                         /* we're interrupted or upcall failed in the middle */
-                        if (time_after(jiffies, next)) {
-                                CERROR("key "LPU64" update failed: check %s\n",
-                                       entry->ue_key, hash->uc_upcall);
-                                next = jiffies + 1800;
-                        }
-                        put_entry(entry);
-                        GOTO(out, entry = ERR_PTR(-EIDRM));
+                        rc = time_before(jiffies, expiry) ? -EINTR : -ETIMEDOUT;
+                        put_entry(cache, entry);
+                        CERROR("key "LPU64" update failed: rc = %d, check %s\n",
+                               entry->ue_key, rc, cache->uc_upcall);
+                        GOTO(out, entry = ERR_PTR(rc));
                 }
                 /* fall through */
         }
 
         /* invalid means error, don't need to try again */
         if (UC_CACHE_IS_INVALID(entry)) {
-                put_entry(entry);
+                put_entry(cache, entry);
                 GOTO(out, entry = ERR_PTR(-EIDRM));
         }
 
@@ -340,15 +273,15 @@ find_again:
          * We can't refresh the existing one because some
          * memory might be shared by multiple processes.
          */
-        if (check_unlink_entry(entry)) {
+        if (check_unlink_entry(cache, entry)) {
                 /* if expired, try again. but if this entry is
                  * created by me but too quickly turn to expired
                  * without any error, should at least give a
                  * chance to use it once.
                  */
                 if (entry != new) {
-                        put_entry(entry);
-                        spin_unlock(&hash->uc_lock);
+                        put_entry(cache, entry);
+                        spin_unlock(&cache->uc_lock);
                         new = NULL;
                         goto find_again;
                 }
@@ -356,12 +289,12 @@ find_again:
 
         /* Now we know it's good */
 out:
-        spin_unlock(&hash->uc_lock);
+        spin_unlock(&cache->uc_lock);
         RETURN(entry);
 }
 EXPORT_SYMBOL(upcall_cache_get_entry);
 
-void upcall_cache_put_entry(struct upcall_cache *hash,
+void upcall_cache_put_entry(struct upcall_cache *cache,
                             struct upcall_cache_entry *entry)
 {
         ENTRY;
@@ -372,28 +305,28 @@ void upcall_cache_put_entry(struct upcall_cache *hash,
         }
 
         LASSERT(atomic_read(&entry->ue_refcount) > 0);
-        spin_lock(&hash->uc_lock);
-        put_entry(entry);
-        spin_unlock(&hash->uc_lock);
+        spin_lock(&cache->uc_lock);
+        put_entry(cache, entry);
+        spin_unlock(&cache->uc_lock);
         EXIT;
 }
 EXPORT_SYMBOL(upcall_cache_put_entry);
 
-int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
-                          __u32 primary, __u32 ngroups, __u32 *groups)
+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
+                          void *args)
 {
         struct upcall_cache_entry *entry = NULL;
         struct list_head *head;
         int found = 0, rc = 0;
         ENTRY;
 
-        LASSERT(hash);
+        LASSERT(cache);
 
-        head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+        head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
 
-        spin_lock(&hash->uc_lock);
+        spin_lock(&cache->uc_lock);
         list_for_each_entry(entry, head, ue_hash) {
-                if (entry->ue_key == key) {
+                if (downcall_compare(cache, entry, key, args) == 0) {
                         found = 1;
                         get_entry(entry);
                         break;
@@ -402,73 +335,74 @@ int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
 
         if (!found) {
                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
-                       hash->uc_name, entry->ue_key);
+                       cache->uc_name, key);
                 /* haven't found, it's possible */
-                spin_unlock(&hash->uc_lock);
+                spin_unlock(&cache->uc_lock);
                 RETURN(-EINVAL);
         }
 
         if (err) {
                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
-                       hash->uc_name, entry->ue_key, err);
+                       cache->uc_name, entry->ue_key, err);
                 GOTO(out, rc = -EINVAL);
         }
 
         if (!UC_CACHE_IS_ACQUIRING(entry)) {
-                CWARN("%s: found uptodate entry %p (key "LPU64") in ioctl\n",
-                       hash->uc_name, entry, entry->ue_key);
+                CERROR("%s: found uptodate entry %p (key "LPU64") in ioctl\n",
+                       cache->uc_name, entry, entry->ue_key);
                 GOTO(out, rc = 0);
         }
 
         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
                 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
-                       hash->uc_name, entry, entry->ue_key);
+                       cache->uc_name, entry, entry->ue_key);
                 GOTO(out, rc = -EINVAL);
         }
 
-        spin_unlock(&hash->uc_lock);
-        rc = entry_set_group_info(entry, primary, ngroups, groups);
-        spin_lock(&hash->uc_lock);
+        spin_unlock(&cache->uc_lock);
+        if (cache->uc_ops->parse_downcall)
+                rc = cache->uc_ops->parse_downcall(cache, entry, args);
+        spin_lock(&cache->uc_lock);
         if (rc)
                 GOTO(out, rc);
 
-        entry->ue_expire = jiffies + hash->uc_entry_expire;
+        entry->ue_expire = jiffies + cache->uc_entry_expire;
         UC_CACHE_SET_VALID(entry);
         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
-               hash->uc_name, entry, entry->ue_key);
+               cache->uc_name, entry, entry->ue_key);
 out:
         if (rc) {
                 UC_CACHE_SET_INVALID(entry);
                 list_del_init(&entry->ue_hash);
         }
         UC_CACHE_CLEAR_ACQUIRING(entry);
-        spin_unlock(&hash->uc_lock);
+        spin_unlock(&cache->uc_lock);
         wake_up_all(&entry->ue_waitq);
-        put_entry(entry);
+        put_entry(cache, entry);
 
         RETURN(rc);
 }
 EXPORT_SYMBOL(upcall_cache_downcall);
 
-static void cache_flush(struct upcall_cache *hash, int force)
+static void cache_flush(struct upcall_cache *cache, int force)
 {
         struct upcall_cache_entry *entry, *next;
         int i;
         ENTRY;
 
-        spin_lock(&hash->uc_lock);
+        spin_lock(&cache->uc_lock);
         for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
                 list_for_each_entry_safe(entry, next,
-                                         &hash->uc_hashtable[i], ue_hash) {
+                                         &cache->uc_hashtable[i], ue_hash) {
                         if (!force && atomic_read(&entry->ue_refcount)) {
                                 UC_CACHE_SET_EXPIRED(entry);
                                 continue;
                         }
                         LASSERT(!atomic_read(&entry->ue_refcount));
-                        free_entry(entry);
+                        free_entry(cache, entry);
                 }
         }
-        spin_unlock(&hash->uc_lock);
+        spin_unlock(&cache->uc_lock);
         EXIT;
 }
 
@@ -484,34 +418,68 @@ void upcall_cache_flush_all(struct upcall_cache *cache)
 }
 EXPORT_SYMBOL(upcall_cache_flush_all);
 
-struct upcall_cache *upcall_cache_init(const char *name)
+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args)
+{
+        struct list_head *head;
+        struct upcall_cache_entry *entry;
+        int found = 0;
+        ENTRY;
+
+        head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+
+        spin_lock(&cache->uc_lock);
+        list_for_each_entry(entry, head, ue_hash) {
+                if (upcall_compare(cache, entry, key, args) == 0) {
+                        found = 1;
+                        break;
+                }
+        }
+
+        if (found) {
+                CWARN("%s: flush entry %p: key "LPU64", ref %d, fl %x, "
+                      "cur %lu, ex %ld/%ld\n",
+                      cache->uc_name, entry, entry->ue_key,
+                      atomic_read(&entry->ue_refcount), entry->ue_flags,
+                      get_seconds(), entry->ue_acquire_expire,
+                      entry->ue_expire);
+                UC_CACHE_SET_EXPIRED(entry);
+                if (!atomic_read(&entry->ue_refcount))
+                        free_entry(cache, entry);
+        }
+        spin_unlock(&cache->uc_lock);
+}
+EXPORT_SYMBOL(upcall_cache_flush_one);
+
+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
+                                       struct upcall_cache_ops *ops)
 {
-        struct upcall_cache *hash;
+        struct upcall_cache *cache;
         int i;
         ENTRY;
 
-        OBD_ALLOC(hash, sizeof(*hash));
-        if (!hash)
+        OBD_ALLOC(cache, sizeof(*cache));
+        if (!cache)
                 RETURN(ERR_PTR(-ENOMEM));
 
-        spin_lock_init(&hash->uc_lock);
+        spin_lock_init(&cache->uc_lock);
         for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
-                INIT_LIST_HEAD(&hash->uc_hashtable[i]);
-        strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
-        /* set default value, proc tunable */
-        strcpy(hash->uc_upcall, "NONE");
-        hash->uc_entry_expire = 5 * 60 * HZ;
-        hash->uc_acquire_expire = 5 * HZ;
-
-        RETURN(hash);
+                INIT_LIST_HEAD(&cache->uc_hashtable[i]);
+        strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1);
+        /* upcall pathname proc tunable */
+        strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1);
+        cache->uc_entry_expire = 5 * 60 * HZ;
+        cache->uc_acquire_expire = 5 * HZ;
+        cache->uc_ops = ops;
+
+        RETURN(cache);
 }
 EXPORT_SYMBOL(upcall_cache_init);
 
-void upcall_cache_cleanup(struct upcall_cache *hash)
+void upcall_cache_cleanup(struct upcall_cache *cache)
 {
-        if (!hash)
+        if (!cache)
                 return;
-        upcall_cache_flush_all(hash);
-        OBD_FREE(hash, sizeof(*hash));
+        upcall_cache_flush_all(cache);
+        OBD_FREE(cache, sizeof(*cache));
 }
 EXPORT_SYMBOL(upcall_cache_cleanup);
index fc33ca9..311f5df 100644 (file)
@@ -101,6 +101,12 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
         EXIT;
 }
 
+static inline int client_is_remote(struct obd_export *exp)
+{
+        return class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags &
+               OBD_CONNECT_RMT_CLIENT ? 1 : 0;
+}
+
 /* Quota stuff */
 extern quota_interface_t *quota_interface;
 
index ad24dbd..6b688eb 100644 (file)
@@ -366,7 +366,10 @@ int mdc_enqueue(struct obd_export *exp,
                               it->it_create_mode, 0, it->it_flags, 
                               lmm, lmmsize);
 
-                repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE;
+                /* for remote client, fetch remote perm for current user */
+                repsize[repbufcnt++] = client_is_remote(exp) ?
+                                                sizeof(struct mdt_remote_perm) :
+                                                LUSTRE_POSIX_ACL_MAX_SIZE;
         } else if (it->it_op & IT_UNLINK) {
                 size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink);
                 size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
@@ -387,8 +390,9 @@ int mdc_enqueue(struct obd_export *exp,
                 repsize[repbufcnt++] = obddev->u.cli.cl_max_mds_cookiesize;
         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                 obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
-                                  OBD_MD_FLACL | OBD_MD_FLMODEASIZE |
-                                  OBD_MD_FLDIREA;
+                                  OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA;
+                valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM :
+                                                 OBD_MD_FLACL;
                 size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body);
                 size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
 
@@ -409,7 +413,9 @@ int mdc_enqueue(struct obd_export *exp,
                 mdc_getattr_pack(req, DLM_INTENT_REC_OFF, valid,
                                  it->it_flags, op_data);
 
-                repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE;
+                repsize[repbufcnt++] = client_is_remote(exp) ?
+                                                sizeof(struct mdt_remote_perm) :
+                                                LUSTRE_POSIX_ACL_MAX_SIZE;
         } else if (it->it_op == IT_READDIR) {
                 policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
@@ -493,8 +499,9 @@ int mdc_enqueue(struct obd_export *exp,
         LASSERT(repbufcnt == 5 || repbufcnt == 2);
         if (repbufcnt == 5) {
                 struct mdt_body *body;
+                int offset = DLM_REPLY_REC_OFF;
 
-                body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body),
+                body = lustre_swab_repbuf(req, offset++, sizeof(*body),
                                          lustre_swab_mdt_body);
                 if (body == NULL) {
                         CERROR ("Can't swab mdt_body\n");
@@ -511,8 +518,7 @@ int mdc_enqueue(struct obd_export *exp,
 
                 if ((body->valid & OBD_MD_FLDIREA) != 0) {
                         if (body->eadatasize) {
-                                eadata = lustre_swab_repbuf(req, 
-                                                DLM_REPLY_REC_OFF + 1,
+                                eadata = lustre_swab_repbuf(req, offset++,
                                                 body->eadatasize, NULL);
                                 if (eadata == NULL) {
                                         CERROR ("Missing/short eadata\n");
@@ -523,7 +529,7 @@ int mdc_enqueue(struct obd_export *exp,
                 if ((body->valid & OBD_MD_FLEASIZE)) {
                         /* The eadata is opaque; just check that it is there.
                          * Eventually, obd_unpackmd() will check the contents */
-                        eadata = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF + 1,
+                        eadata = lustre_swab_repbuf(req, offset++,
                                                     body->eadatasize, NULL);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
@@ -550,18 +556,29 @@ int mdc_enqueue(struct obd_export *exp,
                          * large enough request buffer above we need to
                          * reallocate it here to hold the actual LOV EA. */
                         if (it->it_op & IT_OPEN) {
-                                int offset = DLM_INTENT_REC_OFF + 2;
-
-                                if (lustre_msg_buflen(req->rq_reqmsg, offset) <
+                                if (lustre_msg_buflen(req->rq_reqmsg,
+                                                      DLM_INTENT_REC_OFF + 2) <
                                     body->eadatasize)
                                         mdc_realloc_openmsg(req, body, size);
 
-                                lmm = lustre_msg_buf(req->rq_reqmsg, offset,
+                                lmm = lustre_msg_buf(req->rq_reqmsg,
+                                                     DLM_INTENT_REC_OFF + 2,
                                                      body->eadatasize);
                                 if (lmm)
                                         memcpy(lmm, eadata, body->eadatasize);
                         }
                 }
+                if (body->valid & OBD_MD_FLRMTPERM) {
+                        struct mdt_remote_perm *perm;
+
+                        LASSERT(client_is_remote(exp));
+                        perm = lustre_swab_repbuf(req, offset++, sizeof(*perm),
+                                                  lustre_swab_mdt_remote_perm);
+                        if (perm == NULL) {
+                                CERROR("missing remote permission!\n");
+                                RETURN(-EPROTO);
+                        }
+                }
         }
 
         RETURN(rc);
@@ -628,10 +645,10 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
                 /* As not all attributes are kept under update lock, e.g. 
                    owner/group/acls are under lookup lock, we need both 
                    ibits for GETATTR. */
-                
+
                 /* For CMD, UPDATE lock and LOOKUP lock can not be got 
                  * at the same for cross-object, so we can not match 
-                 * the 2 lock at the same time FIXME: but how to handle
+                 * the 2 lock at the same time FIXME: but how to handle 
                  * the above situation */
                 policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
                         MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP;
index ed773ca..2871f15 100644 (file)
@@ -284,7 +284,7 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
         struct ptlrpc_request *req;
         int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
         // int size[3] = {sizeof(struct mdt_body)}, bufcnt = 1;
-        int rc, xattr_namelen = 0, bufcnt = 2, offset;
+        int rc, xattr_namelen = 0, bufcnt = 2, offset, remote_acl = 0;
         void *tmp;
         ENTRY;
 
@@ -310,6 +310,8 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
         if (xattr_name) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen);
                 memcpy(tmp, xattr_name, xattr_namelen);
+                if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL))
+                        remote_acl = 1;
         }
         if (input_size) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset++, input_size);
@@ -329,12 +331,15 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
         ptlrpc_req_set_repsize(req, bufcnt, size);
 
         /* make rpc */
-        if (opcode == MDS_SETXATTR)
+        /* NB: set remote acl doesn't need hold rpc lock, because it just
+         * send command to MDS, and when it's executed on mountpoint on MDS,
+         * another mdc_xattr_common() will be invoked there. */
+        if (opcode == MDS_SETXATTR && !remote_acl)
                 mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 
         rc = ptlrpc_queue_wait(req);
 
-        if (opcode == MDS_SETXATTR)
+        if (opcode == MDS_SETXATTR && !remote_acl)
                 mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 
         if (rc != 0)
@@ -496,6 +501,14 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
                         GOTO(err_out, rc);
                 offset++;
         }
+
+        /* remote permission */
+        if (md->body->valid & OBD_MD_FLRMTPERM) {
+                md->remote_perm = lustre_msg_buf(req->rq_repmsg, offset,
+                                                sizeof(struct mdt_remote_perm));
+                LASSERT(md->remote_perm);
+                offset++;
+        }
 out:
         RETURN(rc);
 
@@ -1447,6 +1460,47 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf)
         return(rc);
 }
 
+/* get remote permission for current user on fid */
+int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+                        struct ptlrpc_request **request)
+{
+        struct ptlrpc_request *req;
+        struct mdt_body *body;
+        struct mdt_remote_perm *perm;
+        int size[3] = { sizeof(struct ptlrpc_body),
+                        sizeof(*body),
+                        sizeof(*perm) };
+        int rc;
+        ENTRY;
+
+        *request = NULL;
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
+                              MDS_GETATTR, 2, size, NULL);
+        if (!req)
+                RETURN(-ENOMEM);
+
+        mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, 0, 0);
+
+        ptlrpc_req_set_repsize(req, 3, size);
+        rc = ptlrpc_queue_wait(req);
+        if (rc) {
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        }
+
+        body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
+                                  lustre_swab_mdt_body);
+        LASSERT(body);
+        LASSERT(body->valid & OBD_MD_FLRMTPERM);
+
+        perm = lustre_swab_repbuf(req, REPLY_REC_OFF + 1, sizeof(*perm),
+                                  lustre_swab_mdt_remote_perm);
+        LASSERT(perm);
+
+        *request = req;
+        RETURN(0);
+}
+
 struct obd_ops mdc_obd_ops = {
         .o_owner            = THIS_MODULE,
         .o_setup            = mdc_setup,
@@ -1497,7 +1551,8 @@ struct md_ops mdc_md_ops = {
         .m_get_lustre_md    = mdc_get_lustre_md,
         .m_free_lustre_md   = mdc_free_lustre_md,
         .m_set_open_replay_data = mdc_set_open_replay_data,
-        .m_clear_open_replay_data = mdc_clear_open_replay_data
+        .m_clear_open_replay_data = mdc_clear_open_replay_data,
+        .m_get_remote_perm  = mdc_get_remote_perm
 };
 
 extern quota_interface_t mdc_quota_interface;
index 3ff593e..d707dca 100644 (file)
@@ -54,8 +54,17 @@ static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj,
                           struct thandle *handle);
 static void __mdd_ref_del(const struct lu_context *ctxt, struct mdd_object *obj,
                           struct thandle *handle);
-static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj,
-                      const char *name, struct lu_fid* fid);
+static int mdd_lookup_intent(const struct lu_context *ctxt,
+                             struct md_object *pobj,
+                             const char *name, const struct lu_fid* fid,
+                             int mask, struct md_ucred *uc);
+static int mdd_exec_permission_lite(const struct lu_context *ctxt,
+                                    struct mdd_object *obj,
+                                    struct md_ucred *uc);
+static int __mdd_permission_internal(const struct lu_context *ctxt,
+                                     struct mdd_object *obj,
+                                     int mask, int getattr,
+                                     struct md_ucred *uc);
 
 static struct md_object_operations mdd_obj_ops;
 static struct md_dir_operations    mdd_dir_ops;
@@ -68,6 +77,76 @@ static const char dot[] = ".";
 static const char dotdot[] = "..";
 
 
+#define mdd_get_group_info(group_info) do {             \
+        atomic_inc(&(group_info)->usage);               \
+} while (0)
+
+#define mdd_put_group_info(group_info) do {             \
+        if (atomic_dec_and_test(&(group_info)->usage))  \
+                groups_free(group_info);                \
+} while (0)
+
+#define MDD_NGROUPS_PER_BLOCK       ((int)(CFS_PAGE_SIZE / sizeof(gid_t)))
+
+#define MDD_GROUP_AT(gi, i) \
+    ((gi)->blocks[(i) / MDD_NGROUPS_PER_BLOCK][(i) % MDD_NGROUPS_PER_BLOCK])
+
+/* groups_search() is copied from linux kernel! */
+/* a simple bsearch */
+static int mdd_groups_search(struct group_info *group_info, gid_t grp)
+{
+        int left, right;
+
+        if (!group_info)
+                return 0;
+
+        left = 0;
+        right = group_info->ngroups;
+        while (left < right) {
+                int mid = (left + right) / 2;
+                int cmp = grp - MDD_GROUP_AT(group_info, mid);
+
+                if (cmp > 0)
+                        left = mid + 1;
+                else if (cmp < 0)
+                        right = mid;
+                else
+                        return 1;
+        }
+        return 0;
+}
+
+static int mdd_in_group_p(struct md_ucred *uc, gid_t grp)
+{
+        int rc = 1;
+
+        if (grp != uc->mu_fsgid) {
+                struct group_info *group_info = NULL;
+
+                if (uc->mu_ginfo)
+                        group_info = uc->mu_ginfo;
+#if 0
+                else if (uc->mu_identity)
+                        group_info = uc->mu_identity->mi_ginfo;
+#endif
+
+                if (!group_info)
+                        return 0;
+
+                mdd_get_group_info(group_info);
+                rc = mdd_groups_search(group_info, grp);
+                mdd_put_group_info(group_info);
+        }
+        return rc;
+}
+
+static inline int mdd_permission_internal(const struct lu_context *ctxt,
+                                          struct mdd_object *obj, int mask,
+                                          struct md_ucred *uc)
+{
+        return __mdd_permission_internal(ctxt, obj, mask, 1, uc);
+}
+
 struct mdd_thread_info *mdd_ctx_info(const struct lu_context *ctx)
 {
         struct mdd_thread_info *info;
@@ -178,9 +257,12 @@ static inline int mdd_is_dead_obj(struct mdd_object *obj)
 
 /*Check whether it may create the cobj under the pobj*/
 static int mdd_may_create(const struct lu_context *ctxt,
-                          struct mdd_object *pobj, struct mdd_object *cobj)
+                          struct mdd_object *pobj, struct mdd_object *cobj,
+                          int need_check, struct md_ucred *uc)
 {
+        int rc = 0;
         ENTRY;
+
         if (cobj && lu_object_exists(&cobj->mod_obj.mo_lu))
                 RETURN(-EEXIST);
 
@@ -188,7 +270,11 @@ static int mdd_may_create(const struct lu_context *ctxt,
                 RETURN(-ENOENT);
 
         /*check pobj may create or not*/
-        RETURN(0);
+        if (need_check)
+                rc = mdd_permission_internal(ctxt, pobj,
+                                             MAY_WRITE | MAY_EXEC, uc);
+
+        RETURN(rc);
 }
 
 static inline int __mdd_la_get(const struct lu_context *ctxt,
@@ -224,10 +310,54 @@ static int mdd_get_flags(const struct lu_context *ctxt, struct mdd_object *obj)
         RETURN(rc);
 }
 
+#define mdd_cap_t(x) (x)
+
+#define MDD_CAP_TO_MASK(x) (1 << (x))
+
+#define mdd_cap_raised(c, flag) (mdd_cap_t(c) & MDD_CAP_TO_MASK(flag))
+
+/* capable() is copied from linux kernel! */
+static inline int mdd_capable(struct md_ucred *uc, int cap)
+{
+        if (mdd_cap_raised(uc->mu_cap, cap))
+                return 1;
+        return 0;
+}
+
+/*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+ */
+static inline int mdd_is_sticky(const struct lu_context *ctxt,
+                                struct mdd_object *pobj,
+                                struct mdd_object *cobj,
+                                struct md_ucred *uc)
+{
+        struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la;
+        int rc;
+
+        rc = __mdd_la_get(ctxt, cobj, tmp_la);
+        if (rc) {
+                return rc;
+        } else if (tmp_la->la_uid == uc->mu_fsuid) {
+                return 0;
+        } else {
+                rc = __mdd_la_get(ctxt, pobj, tmp_la);
+                if (rc)
+                        return rc;
+                else if (!(tmp_la->la_mode & S_ISVTX))
+                        return 0;
+                else if (tmp_la->la_uid == uc->mu_fsuid)
+                        return 0;
+                else
+                        return !mdd_capable(uc, CAP_FOWNER);
+        }
+}
+
 /*Check whether it may delete the cobj under the pobj*/
 static int mdd_may_delete(const struct lu_context *ctxt,
                           struct mdd_object *pobj, struct mdd_object *cobj,
-                          int is_dir)
+                          int is_dir, int need_check, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(&cobj->mod_obj);
         int rc = 0;
@@ -248,14 +378,24 @@ static int mdd_may_delete(const struct lu_context *ctxt,
                 if (lu_fid_eq(mdo2fid(cobj), &mdd->mdd_root_fid))
                         RETURN(-EBUSY);
 
-        } else if (S_ISDIR(mdd_object_type(cobj)))
+        } else if (S_ISDIR(mdd_object_type(cobj))) {
                         RETURN(-EISDIR);
+        }
 
-        if (pobj && mdd_is_dead_obj(pobj))
-                RETURN(-ENOENT);
+        if (pobj) {
+                if (mdd_is_dead_obj(pobj))
+                        RETURN(-ENOENT);
 
+                if (mdd_is_sticky(ctxt, pobj, cobj, uc))
+                        RETURN(-EPERM);
+
+                if (need_check)
+                        rc = mdd_permission_internal(ctxt, pobj,
+                                                     MAY_WRITE | MAY_EXEC, uc);
+        }
         RETURN(rc);
 }
+
 /* get only inode attributes */
 static int __mdd_iattr_get(const struct lu_context *ctxt,
                            struct mdd_object *mdd_obj, struct md_attr *ma)
@@ -268,6 +408,7 @@ static int __mdd_iattr_get(const struct lu_context *ctxt,
                 ma->ma_valid = MA_INODE;
         RETURN(rc);
 }
+
 /* get lov EA only */
 static int __mdd_lmm_get(const struct lu_context *ctxt,
                          struct mdd_object *mdd_obj, struct md_attr *ma)
@@ -335,8 +476,11 @@ static inline int mdd_attr_get_internal_locked(const struct lu_context *ctxt,
         return rc;
 }
 
-static int mdd_attr_get(const struct lu_context *ctxt,
-                        struct md_object *obj, struct md_attr *ma)
+/*
+ * No permission check is needed.
+ */
+static int mdd_attr_get(const struct lu_context *ctxt, struct md_object *obj,
+                        struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         int                rc;
@@ -346,8 +490,12 @@ static int mdd_attr_get(const struct lu_context *ctxt,
         RETURN(rc);
 }
 
-static int mdd_xattr_get(const struct lu_context *ctxt, struct md_object *obj,
-                         void *buf, int buf_len, const char *name)
+/*
+ * No permission check is needed.
+ */
+static int mdd_xattr_get(const struct lu_context *ctxt,
+                         struct md_object *obj, void *buf, int buf_len,
+                         const char *name, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct dt_object  *next;
@@ -365,8 +513,12 @@ static int mdd_xattr_get(const struct lu_context *ctxt, struct md_object *obj,
         RETURN(rc);
 }
 
+/*
+ * Permission check is done when open,
+ * no need check again.
+ */
 static int mdd_readlink(const struct lu_context *ctxt, struct md_object *obj,
-                        void *buf, int buf_len)
+                        void *buf, int buf_len, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct dt_object  *next;
@@ -380,8 +532,9 @@ static int mdd_readlink(const struct lu_context *ctxt, struct md_object *obj,
         rc = next->do_body_ops->dbo_read(ctxt, next, buf, buf_len, &pos);
         RETURN(rc);
 }
+
 static int mdd_xattr_list(const struct lu_context *ctxt, struct md_object *obj,
-                          void *buf, int buf_len)
+                          void *buf, int buf_len, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct dt_object  *next;
@@ -742,6 +895,7 @@ static int __mdd_xattr_set(const struct lu_context *ctxt, struct mdd_object *o,
         }
         RETURN(rc);
 }
+
 /* this gives the same functionality as the code between
  * sys_chmod and inode_setattr
  * chown_common and inode_setattr
@@ -750,64 +904,97 @@ static int __mdd_xattr_set(const struct lu_context *ctxt, struct mdd_object *o,
  * and port to
  */
 int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj,
-                 const struct md_attr *ma, struct lu_attr *la)
+                 struct lu_attr *la, struct md_ucred *uc)
 {
-        struct lu_attr   *tmp_la = &mdd_ctx_info(ctxt)->mti_la;
-        time_t            now = CURRENT_SECONDS;
+        struct lu_attr   *tmp_la     = &mdd_ctx_info(ctxt)->mti_la;
+        time_t            now        = CURRENT_SECONDS;
         int               rc;
         ENTRY;
 
+        if (!la->la_valid)
+                RETURN(0);
+
+        /* Do not permit change file type */
+        if (la->la_valid & LA_TYPE)
+                RETURN(-EPERM);
+
+        /* They should not be processed by setattr */
+        if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
+                RETURN(-EPERM);
+
         rc = __mdd_la_get(ctxt, obj, tmp_la);
         if (rc)
                 RETURN(rc);
-        /*XXX Check permission */
+
         if (mdd_is_immutable(obj) || mdd_is_append(obj)) {
 
-                /*If only change flags of the object, we should
+                /*
+                 * If only change flags of the object, we should
                  * let it pass, but also need capability check
                  * here if (!capable(CAP_LINUX_IMMUTABLE)),
-                 * fix it, when implement capable in mds*/
+                 * fix it, when implement capable in mds
+                 */
                 if (la->la_valid & ~LA_FLAGS)
                         RETURN(-EPERM);
 
-                /*According to Ext3 implementation on this, the
-                 *Ctime will be changed, but not clear why?*/
+                if (!mdd_capable(uc, CAP_LINUX_IMMUTABLE))
+                        RETURN(-EPERM);
+
+                if ((uc->mu_fsuid != tmp_la->la_uid) &&
+                    !mdd_capable(uc, CAP_FOWNER))
+                        RETURN(-EPERM);
+
+                /*
+                 * According to Ext3 implementation on this, the
+                 * Ctime will be changed, but not clear why?
+                 */
                 la->la_ctime = now;
                 la->la_valid |= LA_CTIME;
                 RETURN(0);
         }
-        if (!(la->la_valid & LA_CTIME)) {
-                la->la_ctime = now;
-                la->la_valid |= LA_CTIME;
-        }
 
-#if 0
-        /* times */
-        if ((ia_valid & (ATTR_MTIME|ATTR_ATIME)) == (ATTR_MTIME|ATTR_ATIME)) {
-                if (current->fsuid != inode->i_uid &&
-                    (error = ll_permission(inode, MAY_WRITE, NULL)) != 0)
-                        RETURN(error);
-        }
-        if (ia_valid & ATTR_SIZE &&
-            /* NFSD hack for open(O_CREAT|O_TRUNC)=mknod+truncate (bug 5781) */
-            !(rec->ur_uc.luc_fsuid == inode->i_uid &&
-              ia_valid & MDS_OPEN_OWNEROVERRIDE)) {
-                if ((error = ll_permission(inode, MAY_WRITE, NULL)) != 0)
-                        RETURN(error);
+        /* Check for setting the obj time. */
+        if (la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) {
+                if ((uc->mu_fsuid != tmp_la->la_uid) &&
+                    !mdd_capable(uc, CAP_FOWNER) &&
+                    !(la->la_valid & (LA_SIZE | LA_BLOCKS)))
+                        RETURN(-EPERM);
         }
-#endif
 
-        if (la->la_valid & (LA_UID | LA_GID)) {
-                /* chown */
-
-                if (mdd_is_immutable(obj) || mdd_is_append(obj))
+        /* Make sure a caller can chmod. */
+        if (la->la_valid & LA_MODE) {
+                /*
+                 * Bypass la_vaild == LA_MODE,
+                 * this is for changing file with SUID or SGID.
+                 */
+                if ((la->la_valid & ~LA_MODE) &&
+                    (uc->mu_fsuid != tmp_la->la_uid) &&
+                    !mdd_capable(uc, CAP_FOWNER))
                         RETURN(-EPERM);
+
+                if (la->la_mode == (umode_t) -1)
+                        la->la_mode = tmp_la->la_mode;
+                else
+                        la->la_mode = (la->la_mode & S_IALLUGO) |
+                                      (tmp_la->la_mode & ~S_IALLUGO);
+
+                /* Also check the setgid bit! */
+                if (!mdd_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid :
+                                tmp_la->la_gid) && !mdd_capable(uc, CAP_FSETID))
+                        la->la_mode &= ~S_ISGID;
+        } else {
+               la->la_mode = tmp_la->la_mode;
+        }
+
+        /* Make sure a caller can chown. */
+        if (la->la_valid & LA_UID) {
                 if (la->la_uid == (uid_t) -1)
                         la->la_uid = tmp_la->la_uid;
-                if (la->la_gid == (gid_t) -1)
-                        la->la_gid = tmp_la->la_gid;
-                if (!(la->la_valid & LA_MODE))
-                        la->la_mode = tmp_la->la_mode;
+                if (((uc->mu_fsuid != tmp_la->la_uid) ||
+                    (la->la_uid != tmp_la->la_uid)) &&
+                    !mdd_capable(uc, CAP_CHOWN))
+                        RETURN(-EPERM);
+
                 /*
                  * If the user or group of a non-directory has been
                  * changed by a non-root user, remove the setuid bit.
@@ -819,11 +1006,23 @@ int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj,
                  * for 2.2 anyway, as it should have been using
                  * CAP_FSETID rather than fsuid -- 19990830 SD.
                  */
-                if ((tmp_la->la_mode & S_ISUID) == S_ISUID &&
+                if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
                     !S_ISDIR(tmp_la->la_mode)) {
                         la->la_mode &= ~S_ISUID;
                         la->la_valid |= LA_MODE;
                 }
+        }
+
+        /* Make sure caller can chgrp. */
+        if (la->la_valid & LA_GID) {
+                if (la->la_gid == (gid_t) -1)
+                        la->la_gid = tmp_la->la_gid;
+                if (((uc->mu_fsuid != tmp_la->la_uid) ||
+                    ((la->la_gid != tmp_la->la_gid) &&
+                    !mdd_in_group_p(uc, la->la_gid))) &&
+                    !mdd_capable(uc, CAP_CHOWN))
+                        RETURN(-EPERM);
+
                 /*
                  * Likewise, if the user or group of a non-directory
                  * has been changed by a non-root user, remove the
@@ -839,35 +1038,44 @@ int mdd_fix_attr(const struct lu_context *ctxt, struct mdd_object *obj,
                         la->la_mode &= ~S_ISGID;
                         la->la_valid |= LA_MODE;
                 }
-        } else if (la->la_valid & LA_MODE) {
-                int mode = la->la_mode;
-                /* chmod */
-                if (la->la_mode == (umode_t)-1)
-                        mode = tmp_la->la_mode;
-                la->la_mode =
-                        (mode & S_IALLUGO) | (tmp_la->la_mode & ~S_IALLUGO);
         }
 
-        /* For the "Size-on-MDS" setattr update, merge coming attributes with
-         * the set in the inode. */
-        if (la->la_valid & LA_SIZE) {
+        /* For tuncate (or setsize), we should have MAY_WRITE perm */
+        if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
+                rc = mdd_permission_internal(ctxt, obj, MAY_WRITE, uc);
+                if (rc)
+                        RETURN(rc);
+
+                /* 
+                 * For the "Size-on-MDS" setattr update, merge coming
+                 * attributes with the set in the inode. BUG 10641
+                 */
                 if ((la->la_valid & LA_ATIME) &&
                     (la->la_atime < tmp_la->la_atime))
                         la->la_valid &= ~LA_ATIME;
-
-                if ((la->la_valid & LA_CTIME) &&
+                
+                if ((la->la_valid & LA_CTIME) && 
                     (la->la_ctime < tmp_la->la_ctime))
                         la->la_valid &= ~(LA_MTIME | LA_CTIME);
+
+                if (!(la->la_valid & LA_MTIME) && (now > tmp_la->la_mtime)) {
+                        la->la_mtime = now;
+                        la->la_valid |= LA_MTIME;
+                }
         }
 
-        RETURN(rc);
-}
+        /* For last, ctime must be fixed */
+        if (!(la->la_valid & LA_CTIME) && (now > tmp_la->la_ctime)) {
+                la->la_ctime = now;
+                la->la_valid |= LA_CTIME;
+        }
 
+        RETURN(0);
+}
 
 /* set attr and LOV EA at once, return updated attr */
-static int mdd_attr_set(const struct lu_context *ctxt,
-                        struct md_object *obj,
-                        const struct md_attr *ma)
+static int mdd_attr_set(const struct lu_context *ctxt, struct md_object *obj,
+                        const struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
@@ -903,7 +1111,7 @@ static int mdd_attr_set(const struct lu_context *ctxt,
 
         *la_copy = ma->ma_attr;
         mdd_write_lock(ctxt, mdd_obj);
-        rc = mdd_fix_attr(ctxt, mdd_obj, ma, la_copy);
+        rc = mdd_fix_attr(ctxt, mdd_obj, la_copy, uc);
         mdd_write_unlock(ctxt, mdd_obj);
         if (rc)
                 GOTO(cleanup, rc);
@@ -960,15 +1168,43 @@ int mdd_xattr_set_txn(const struct lu_context *ctxt, struct mdd_object *obj,
         RETURN(rc);
 }
 
+static int mdd_xattr_sanity_check(const struct lu_context *ctxt,
+                                  struct mdd_object *obj,
+                                  struct md_ucred *uc)
+{
+        struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la;
+        int rc;
+        ENTRY;
+
+        if (mdd_is_immutable(obj) || mdd_is_append(obj))
+                RETURN(-EPERM);
+
+        mdd_read_lock(ctxt, obj);
+        rc = __mdd_la_get(ctxt, obj, tmp_la);
+        mdd_read_unlock(ctxt, obj);
+        if (rc)
+                RETURN(rc);
+
+        if ((uc->mu_fsuid != tmp_la->la_uid) && !mdd_capable(uc, CAP_FOWNER))
+                RETURN(-EPERM);
+
+        RETURN(rc);
+}
+
 static int mdd_xattr_set(const struct lu_context *ctxt, struct md_object *obj,
-                         const void *buf, int buf_len, const char *name,
-                         int fl)
+                         const void *buf, int buf_len, const char *name, int fl,
+                         struct md_ucred *uc)
 {
+        struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
         struct thandle *handle;
         int  rc;
         ENTRY;
 
+        rc = mdd_xattr_sanity_check(ctxt, mdd_obj, uc);
+        if (rc)
+                RETURN(rc);
+
         mdd_txn_param_build(ctxt, &MDD_TXN_XATTR_SET);
         handle = mdd_trans_start(ctxt, mdd);
         if (IS_ERR(handle))
@@ -1005,7 +1241,7 @@ static int __mdd_xattr_del(const struct lu_context *ctxt,struct mdd_device *mdd,
 }
 
 int mdd_xattr_del(const struct lu_context *ctxt, struct md_object *obj,
-                  const char *name)
+                  const char *name, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
@@ -1013,6 +1249,10 @@ int mdd_xattr_del(const struct lu_context *ctxt, struct md_object *obj,
         int  rc;
         ENTRY;
 
+        rc = mdd_xattr_sanity_check(ctxt, mdd_obj, uc);
+        if (rc)
+                RETURN(rc);
+
         mdd_txn_param_build(ctxt, &MDD_TXN_XATTR_SET);
         handle = mdd_trans_start(ctxt, mdd);
         if (IS_ERR(handle))
@@ -1096,13 +1336,16 @@ static int __mdd_index_delete(const struct lu_context *ctxt,
 
 static int mdd_link_sanity_check(const struct lu_context *ctxt,
                                  struct mdd_object *tgt_obj,
-                                 struct mdd_object *src_obj)
+                                 struct mdd_object *src_obj,
+                                 struct md_ucred *uc)
 {
         int rc;
+        ENTRY;
 
-        rc = mdd_may_create(ctxt, tgt_obj, NULL);
+        rc = mdd_may_create(ctxt, tgt_obj, NULL, 1, uc);
         if (rc)
                 RETURN(rc);
+
         if (S_ISDIR(mdd_object_type(src_obj)))
                 RETURN(-EPERM);
 
@@ -1114,7 +1357,7 @@ static int mdd_link_sanity_check(const struct lu_context *ctxt,
 
 static int mdd_link(const struct lu_context *ctxt, struct md_object *tgt_obj,
                     struct md_object *src_obj, const char *name,
-                    struct md_attr *ma)
+                    struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_object *mdd_tobj = md2mdd_obj(tgt_obj);
         struct mdd_object *mdd_sobj = md2mdd_obj(src_obj);
@@ -1131,7 +1374,7 @@ static int mdd_link(const struct lu_context *ctxt, struct md_object *tgt_obj,
 
         mdd_lock2(ctxt, mdd_tobj, mdd_sobj);
 
-        rc = mdd_link_sanity_check(ctxt, mdd_tobj, mdd_sobj);
+        rc = mdd_link_sanity_check(ctxt, mdd_tobj, mdd_sobj, uc);
         if (rc)
                 GOTO(out, rc);
 
@@ -1211,12 +1454,13 @@ int __mdd_object_kill(const struct lu_context *ctxt,
         mdd_set_dead_obj(obj);
         if (S_ISREG(mdd_object_type(obj)) && ma->ma_need&MA_LOV) {
                 rc = __mdd_lmm_get(ctxt, obj, ma);
-                if (ma->ma_valid&MA_LOV && ma->ma_need&MA_COOKIE)
+                if ((ma->ma_valid & MA_LOV) && (ma->ma_need & MA_COOKIE))
                         rc = mdd_unlink_log(ctxt, mdo2mdd(&obj->mod_obj),
                                             obj, ma);
         }
         RETURN(rc);
 }
+
 /* caller should take a lock before calling */
 static int __mdd_finish_unlink(const struct lu_context *ctxt,
                                struct mdd_object *obj, struct md_attr *ma,
@@ -1240,29 +1484,31 @@ static int __mdd_finish_unlink(const struct lu_context *ctxt,
 static int mdd_unlink_sanity_check(const struct lu_context *ctxt,
                                    struct mdd_object *pobj,
                                    struct mdd_object *cobj,
-                                   struct md_attr *ma)
+                                   struct md_attr *ma,
+                                   struct md_ucred *uc)
 {
         struct dt_object  *dt_cobj  = mdd_object_child(cobj);
         int rc = 0;
         ENTRY;
 
-        rc = mdd_may_delete(ctxt, pobj, cobj, S_ISDIR(ma->ma_attr.la_mode));
+        rc = mdd_may_delete(ctxt, pobj, cobj,
+                            S_ISDIR(ma->ma_attr.la_mode), 1, uc);
         if (rc)
                 RETURN(rc);
 
-        if (S_ISDIR(mdd_object_type(cobj)) &&
-            dt_try_as_dir(ctxt, dt_cobj)) {
-                rc = mdd_dir_is_empty(ctxt, cobj);
-                if (rc != 0)
-                        RETURN(rc);
+        if (S_ISDIR(mdd_object_type(cobj))) {
+                if (dt_try_as_dir(ctxt, dt_cobj))
+                        rc = mdd_dir_is_empty(ctxt, cobj);
+                else
+                        rc = -ENOTDIR;
         }
 
         RETURN(rc);
 }
 
-static int mdd_unlink(const struct lu_context *ctxt, struct md_object *pobj,
-                      struct md_object *cobj, const char *name,
-                      struct md_attr *ma)
+static int mdd_unlink(const struct lu_context *ctxt,
+                      struct md_object *pobj, struct md_object *cobj,
+                      const char *name, struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
@@ -1279,11 +1525,10 @@ static int mdd_unlink(const struct lu_context *ctxt, struct md_object *pobj,
 
         mdd_lock2(ctxt, mdd_pobj, mdd_cobj);
 
-        rc = mdd_unlink_sanity_check(ctxt, mdd_pobj, mdd_cobj, ma);
+        rc = mdd_unlink_sanity_check(ctxt, mdd_pobj, mdd_cobj, ma, uc);
         if (rc)
                 GOTO(cleanup, rc);
 
-
         rc = __mdd_index_delete(ctxt, mdd_pobj, name, handle);
         if (rc)
                 GOTO(cleanup, rc);
@@ -1314,9 +1559,10 @@ cleanup:
         mdd_trans_stop(ctxt, mdd, rc, handle);
         RETURN(rc);
 }
+
 /* partial unlink */
 static int mdd_ref_del(const struct lu_context *ctxt, struct md_object *obj,
-                       struct md_attr *ma)
+                       struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
@@ -1331,7 +1577,7 @@ static int mdd_ref_del(const struct lu_context *ctxt, struct md_object *obj,
 
         mdd_write_lock(ctxt, mdd_obj);
 
-        rc = mdd_unlink_sanity_check(ctxt, NULL, mdd_obj, ma);
+        rc = mdd_unlink_sanity_check(ctxt, NULL, mdd_obj, ma, uc);
         if (rc)
                 GOTO(cleanup, rc);
 
@@ -1355,7 +1601,7 @@ static int mdd_parent_fid(const struct lu_context *ctxt,
                           struct mdd_object *obj,
                           struct lu_fid *fid)
 {
-        return mdd_lookup(ctxt, &obj->mod_obj, dotdot, fid);
+        return mdd_lookup_intent(ctxt, &obj->mod_obj, dotdot, fid, 0, NULL);
 }
 
 /*
@@ -1463,39 +1709,54 @@ static int mdd_rename_sanity_check(const struct lu_context *ctxt,
                                    struct mdd_object *tgt_pobj,
                                    const struct lu_fid *sfid,
                                    int src_is_dir,
-                                   struct mdd_object *tobj)
+                                   struct mdd_object *sobj,
+                                   struct mdd_object *tobj,
+                                   struct md_ucred *uc)
 {
-        int rc = 0, tgt_is_dir;
+        struct mdd_device *mdd = mdo2mdd(&src_pobj->mod_obj);
+        int rc = 0, need_check = 1;
         ENTRY;
 
-        if (mdd_is_dead_obj(src_pobj))
-                RETURN(-ENOENT);
+        mdd_read_lock(ctxt, src_pobj);
+        rc = mdd_may_delete(ctxt, src_pobj, sobj, src_is_dir, need_check, uc);
+        mdd_read_unlock(ctxt, src_pobj);
+        if (rc)
+                RETURN(rc);
+
+        if (src_pobj == tgt_pobj)
+                need_check = 0;
 
         if (!tobj) {
-                rc = mdd_may_create(ctxt, tgt_pobj, NULL);
+                mdd_read_lock(ctxt, tgt_pobj);
+                rc = mdd_may_create(ctxt, tgt_pobj, NULL, need_check, uc);
+                mdd_read_unlock(ctxt, tgt_pobj);
         } else {
-                rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir);
-                if (rc == 0) {
-                        tgt_is_dir = S_ISDIR(mdd_object_type(tobj));
-                        if (tgt_is_dir && mdd_dir_is_empty(ctxt, tobj))
-                                rc = -ENOTEMPTY;
-                }
+                mdd_read_lock(ctxt, tgt_pobj);
+                rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir,
+                                    need_check, uc);
+                mdd_read_unlock(ctxt, tgt_pobj);
+                if (!rc && S_ISDIR(mdd_object_type(tobj)) &&
+                    mdd_dir_is_empty(ctxt, tobj))
+                                RETURN(-ENOTEMPTY);
         }
-        if (rc)
-                RETURN(rc);
+
+        /* source should not be ancestor of target dir */
+        if (!rc && src_is_dir && mdd_is_parent(ctxt, mdd, tgt_pobj, sfid, NULL))
+                RETURN(-EINVAL);
 
         RETURN(rc);
 }
 /* src object can be remote that is why we use only fid and type of object */
-static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj,
-                      struct md_object *tgt_pobj, const struct lu_fid *lf,
-                      const char *sname, struct md_object *tobj,
-                      const char *tname, struct md_attr *ma)
+static int mdd_rename(const struct lu_context *ctxt,
+                      struct md_object *src_pobj, struct md_object *tgt_pobj,
+                      const struct lu_fid *lf, const char *sname,
+                      struct md_object *tobj, const char *tname,
+                      struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(src_pobj);
         struct mdd_object *mdd_spobj = md2mdd_obj(src_pobj);
         struct mdd_object *mdd_tpobj = md2mdd_obj(tgt_pobj);
-        struct mdd_object *mdd_sobj = NULL;
+        struct mdd_object *mdd_sobj = mdd_object_find(ctxt, mdd, lf);
         struct mdd_object *mdd_tobj = NULL;
         struct lu_attr    *la_copy = &mdd_ctx_info(ctxt)->mti_la_for_fix;
         struct thandle *handle;
@@ -1507,21 +1768,21 @@ static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj,
         is_dir = S_ISDIR(ma->ma_attr.la_mode);
         if (ma->ma_attr.la_valid & LA_FLAGS &&
             ma->ma_attr.la_flags & (LUSTRE_APPEND_FL | LUSTRE_IMMUTABLE_FL))
-                RETURN(-EPERM);
+                GOTO(out, rc = -EPERM);
 
         if (tobj)
                 mdd_tobj = md2mdd_obj(tobj);
 
         /*XXX: shouldn't this check be done under lock below? */
         rc = mdd_rename_sanity_check(ctxt, mdd_spobj, mdd_tpobj,
-                                     lf, is_dir, mdd_tobj);
+                                     lf, is_dir, mdd_sobj, mdd_tobj, uc);
         if (rc)
-                RETURN(rc);
+                GOTO(out, rc);
 
         mdd_txn_param_build(ctxt, &MDD_TXN_RENAME);
         handle = mdd_trans_start(ctxt, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out, rc = PTR_ERR(handle));
 
         /*FIXME: Should consider tobj and sobj too in rename_lock*/
         rc = mdd_rename_lock(ctxt, mdd, mdd_spobj, mdd_tpobj);
@@ -1546,7 +1807,6 @@ static int mdd_rename(const struct lu_context *ctxt, struct md_object *src_pobj,
         if (rc)
                 GOTO(cleanup, rc);
 
-        mdd_sobj = mdd_object_find(ctxt, mdd, lf);
         *la_copy = ma->ma_attr;
         la_copy->la_valid = LA_CTIME;
         if (mdd_sobj) {
@@ -1587,13 +1847,16 @@ cleanup:
         mdd_rename_unlock(ctxt, mdd_spobj, mdd_tpobj);
 cleanup_unlocked:
         mdd_trans_stop(ctxt, mdd, rc, handle);
+out:
         if (mdd_sobj)
                 mdd_object_put(ctxt, mdd_sobj);
         RETURN(rc);
 }
 
-static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj,
-                      const char *name, struct lu_fid* fid)
+static int mdd_lookup_intent(const struct lu_context *ctxt,
+                             struct md_object *pobj,
+                             const char *name, const struct lu_fid* fid,
+                             int mask, struct md_ucred *uc)
 {
         struct mdd_object   *mdd_obj = md2mdd_obj(pobj);
         struct dt_object    *dir = mdd_object_child(mdd_obj);
@@ -1604,15 +1867,35 @@ static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj,
 
         if (mdd_is_dead_obj(mdd_obj))
                 RETURN(-ESTALE);
+
         mdd_read_lock(ctxt, mdd_obj);
+        if (mask == MAY_EXEC)
+                rc = mdd_exec_permission_lite(ctxt, mdd_obj, uc);
+        else
+                rc = mdd_permission_internal(ctxt, mdd_obj, mask, uc);
+        if (rc)
+                GOTO(out_unlock, rc);
+
         if (S_ISDIR(mdd_object_type(mdd_obj)) && dt_try_as_dir(ctxt, dir))
                 rc = dir->do_index_ops->dio_lookup(ctxt, dir, rec, key);
         else
                 rc = -ENOTDIR;
+
+out_unlock:
         mdd_read_unlock(ctxt, mdd_obj);
         RETURN(rc);
 }
 
+static int mdd_lookup(const struct lu_context *ctxt,
+                      struct md_object *pobj, const char *name,
+                      struct lu_fid* fid, struct md_ucred *uc)
+{
+        int rc;
+        ENTRY;
+        rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_EXEC, uc);
+        RETURN(rc);
+}
+
 /*
  * returns 1: if fid is ancestor of @mo;
  * returns 0: if fid is not a ancestor of @mo;
@@ -1622,8 +1905,9 @@ static int mdd_lookup(const struct lu_context *ctxt, struct md_object *pobj,
  *
  * returns < 0: if error
  */
-static int mdd_is_subdir(const struct lu_context *ctx, struct md_object *mo,
-                         const struct lu_fid *fid, struct lu_fid *sfid)
+static int mdd_is_subdir(const struct lu_context *ctx,
+                         struct md_object *mo, const struct lu_fid *fid,
+                         struct lu_fid *sfid, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(mo);
         int rc;
@@ -1679,10 +1963,33 @@ static int __mdd_object_initialize(const struct lu_context *ctxt,
         RETURN(rc);
 }
 
+/*
+ * XXX: Need MAY_WRITE to be checked?
+ */
+static int mdd_cd_sanity_check(const struct lu_context *ctxt,
+                               struct mdd_object *obj, struct md_ucred *uc)
+{
+        int rc = 0;
+        ENTRY;
+
+        /* EEXIST check */
+        if (!obj || mdd_is_dead_obj(obj))
+                RETURN(-ENOENT);
+
+#if 0
+        mdd_read_lock(ctxt, obj);
+        rc = mdd_permission_internal(ctxt, obj, MAY_WRITE, uc);
+        mdd_read_unlock(ctxt, obj);
+#endif
+
+        RETURN(rc);
+
+}
+
 static int mdd_create_data(const struct lu_context *ctxt,
                            struct md_object *pobj, struct md_object *cobj,
                            const struct md_create_spec *spec,
-                           struct md_attr *ma)
+                           struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(cobj);
         struct mdd_object *mdd_pobj = md2mdd_obj(pobj);/* XXX maybe NULL */
@@ -1694,6 +2001,10 @@ static int mdd_create_data(const struct lu_context *ctxt,
         int                rc;
         ENTRY;
 
+        rc = mdd_cd_sanity_check(ctxt, son, uc);
+        if (rc)
+                RETURN(rc);
+
         if (spec->sp_cr_flags & MDS_OPEN_DELAY_CREATE ||
                         !(spec->sp_cr_flags & FMODE_WRITE))
                 RETURN(0);
@@ -1732,9 +2043,9 @@ static int mdd_create_data(const struct lu_context *ctxt,
 }
 
 static int mdd_create_sanity_check(const struct lu_context *ctxt,
-                                   struct mdd_device *mdd,
                                    struct md_object *pobj,
-                                   const char *name, struct md_attr *ma)
+                                   const char *name, struct md_attr *ma,
+                                   struct md_ucred *uc)
 {
         struct mdd_thread_info *info = mdd_ctx_info(ctxt);
         struct lu_attr    *la        = &info->mti_la;
@@ -1746,7 +2057,8 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt,
         /* EEXIST check */
         if (mdd_is_dead_obj(obj))
                 RETURN(-ENOENT);
-        rc = mdd_lookup(ctxt, pobj, name, fid);
+
+        rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc);
         if (rc != -ENOENT)
                 RETURN(rc ? : -EEXIST);
 
@@ -1754,7 +2066,7 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt,
         mdd_read_lock(ctxt, obj);
         rc = __mdd_la_get(ctxt, obj, la);
         mdd_read_unlock(ctxt, obj);
-        if (rc != 0)
+        if (rc)
                 RETURN(rc);
 
         if (la->la_mode & S_ISGID) {
@@ -1785,10 +2097,11 @@ static int mdd_create_sanity_check(const struct lu_context *ctxt,
 /*
  * Create object and insert it into namespace.
  */
-static int mdd_create(const struct lu_context *ctxt, struct md_object *pobj,
-                      const char *name, struct md_object *child,
+static int mdd_create(const struct lu_context *ctxt,
+                      struct md_object *pobj, const char *name,
+                      struct md_object *child,
                       const struct md_create_spec *spec,
-                      struct md_attr* ma)
+                      struct md_attr* ma, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
@@ -1801,9 +2114,10 @@ static int mdd_create(const struct lu_context *ctxt, struct md_object *pobj,
         ENTRY;
 
         /* sanity checks before big job */
-        rc = mdd_create_sanity_check(ctxt, mdd, pobj, name, ma);
+        rc = mdd_create_sanity_check(ctxt, pobj, name, ma, uc);
         if (rc)
                 RETURN(rc);
+
         /* no RPC inside the transaction, so OST objects should be created at
          * first */
         if (S_ISREG(attr->la_mode)) {
@@ -1945,11 +2259,42 @@ cleanup:
         mdd_trans_stop(ctxt, mdd, rc, handle);
         RETURN(rc);
 }
+
 /* partial operation */
+static int mdd_oc_sanity_check(const struct lu_context *ctxt,
+                               struct mdd_object *obj,
+                               struct md_attr *ma,
+                               struct md_ucred *uc)
+{
+        int rc;
+        ENTRY;
+
+        /* EEXIST check */
+        if (lu_object_exists(&obj->mod_obj.mo_lu))
+                RETURN(-EEXIST);
+
+        switch (ma->ma_attr.la_mode & S_IFMT) {
+        case S_IFREG:
+        case S_IFDIR:
+        case S_IFLNK:
+        case S_IFCHR:
+        case S_IFBLK:
+        case S_IFIFO:
+        case S_IFSOCK:
+                rc = 0;
+                break;
+        default:
+                rc = -EINVAL;
+                break;
+        }
+        RETURN(rc);
+}
+
 static int mdd_object_create(const struct lu_context *ctxt,
                              struct md_object *obj,
                              const struct md_create_spec *spec,
-                             struct md_attr *ma)
+                             struct md_attr *ma,
+                             struct md_ucred *uc)
 {
 
         struct mdd_device *mdd = mdo2mdd(obj);
@@ -1959,6 +2304,10 @@ static int mdd_object_create(const struct lu_context *ctxt,
         int rc;
         ENTRY;
 
+        rc = mdd_oc_sanity_check(ctxt, mdd_obj, ma, uc);
+        if (rc)
+                RETURN(rc);
+
         mdd_txn_param_build(ctxt, &MDD_TXN_OBJECT_CREATE);
         handle = mdd_trans_start(ctxt, mdd);
         if (IS_ERR(handle))
@@ -1982,13 +2331,37 @@ static int mdd_object_create(const struct lu_context *ctxt,
 
         if (rc == 0)
                 rc = mdd_attr_get_internal_locked(ctxt, mdd_obj, ma);
+
         mdd_trans_stop(ctxt, mdd, rc, handle);
         RETURN(rc);
 }
+
 /* partial operation */
+static int mdd_ni_sanity_check(const struct lu_context *ctxt,
+                               struct md_object *pobj,
+                               const char *name,
+                               const struct lu_fid *fid,
+                               struct md_ucred *uc)
+{
+        struct mdd_object *obj       = md2mdd_obj(pobj);
+        int rc;
+        ENTRY;
+
+        /* EEXIST check */
+        if (mdd_is_dead_obj(obj))
+                RETURN(-ENOENT);
+
+        rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc);
+        if (rc != -ENOENT)
+                RETURN(rc ? : -EEXIST);
+        else
+                RETURN(0);
+}
+
 static int mdd_name_insert(const struct lu_context *ctxt,
-                           struct md_object *pobj, const char *name,
-                           const struct lu_fid *fid, int isdir)
+                           struct md_object *pobj,
+                           const char *name, const struct lu_fid *fid,
+                           int isdir, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(pobj);
         struct thandle *handle;
@@ -2001,16 +2374,42 @@ static int mdd_name_insert(const struct lu_context *ctxt,
                 RETURN(PTR_ERR(handle));
 
         mdd_write_lock(ctxt, mdd_obj);
+        rc = mdd_ni_sanity_check(ctxt, pobj, name, fid, uc);
+        if (rc)
+                GOTO(out_unlock, rc);
+
         rc = __mdd_index_insert(ctxt, mdd_obj, fid, name, isdir, handle);
+
+out_unlock:
         mdd_write_unlock(ctxt, mdd_obj);
 
         mdd_trans_stop(ctxt, mdo2mdd(pobj), rc, handle);
         RETURN(rc);
 }
 
+static int mdd_nr_sanity_check(const struct lu_context *ctxt,
+                               struct md_object *pobj,
+                               const char *name,
+                               struct md_ucred *uc)
+{
+        struct mdd_thread_info *info = mdd_ctx_info(ctxt);
+        struct lu_fid     *fid       = &info->mti_fid;
+        struct mdd_object *obj       = md2mdd_obj(pobj);
+        int rc;
+        ENTRY;
+
+        /* EEXIST check */
+        if (mdd_is_dead_obj(obj))
+                RETURN(-ENOENT);
+
+        rc = mdd_lookup_intent(ctxt, pobj, name, fid, MAY_WRITE | MAY_EXEC, uc);
+        RETURN(rc);
+}
+
 static int mdd_name_remove(const struct lu_context *ctxt,
                            struct md_object *pobj,
-                           const char *name)
+                           const char *name,
+                           struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct mdd_object *mdd_obj = md2mdd_obj(pobj);
@@ -2024,18 +2423,55 @@ static int mdd_name_remove(const struct lu_context *ctxt,
                 RETURN(PTR_ERR(handle));
 
         mdd_write_lock(ctxt, mdd_obj);
+        rc = mdd_nr_sanity_check(ctxt, pobj, name, uc);
+        if (rc)
+                GOTO(out_unlock, rc);
 
         rc = __mdd_index_delete(ctxt, mdd_obj, name, handle);
 
+out_unlock:
         mdd_write_unlock(ctxt, mdd_obj);
 
         mdd_trans_stop(ctxt, mdd, rc, handle);
         RETURN(rc);
 }
 
-static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj,
-                          struct md_object *tobj, const struct lu_fid *lf,
-                          const char *name, struct md_attr *ma)
+static int mdd_rt_sanity_check(const struct lu_context *ctxt,
+                               struct mdd_object *tgt_pobj,
+                               struct mdd_object *tobj,
+                               const struct lu_fid *sfid,
+                               const char *name, struct md_attr *ma,
+                               struct md_ucred *uc)
+{
+        struct mdd_device *mdd = mdo2mdd(&tgt_pobj->mod_obj);
+        int rc, src_is_dir;
+        ENTRY;
+
+        /* EEXIST check */
+        if (mdd_is_dead_obj(tgt_pobj))
+                RETURN(-ENOENT);
+
+        src_is_dir = S_ISDIR(ma->ma_attr.la_mode);
+        if (tobj) {
+                rc = mdd_may_delete(ctxt, tgt_pobj, tobj, src_is_dir, 1, uc);
+                if (!rc && S_ISDIR(mdd_object_type(tobj)) &&
+                     mdd_dir_is_empty(ctxt, tobj))
+                                RETURN(-ENOTEMPTY);
+        } else {
+                rc = mdd_may_create(ctxt, tgt_pobj, NULL, 1, uc);
+        }
+
+        /* source should not be ancestor of target dir */
+        if (!rc &&& src_is_dir && mdd_is_parent(ctxt, mdd, tgt_pobj, sfid, NULL))
+                RETURN(-EINVAL);
+
+        RETURN(rc);
+}
+
+static int mdd_rename_tgt(const struct lu_context *ctxt,
+                          struct md_object *pobj, struct md_object *tobj,
+                          const struct lu_fid *lf, const char *name,
+                          struct md_attr *ma, struct md_ucred *uc)
 {
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct mdd_object *mdd_tpobj = md2mdd_obj(pobj);
@@ -2049,12 +2485,18 @@ static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj,
         if (IS_ERR(handle))
                 RETURN(PTR_ERR(handle));
 
-        if (tobj)
+        if (tobj) {
                 mdd_tobj = md2mdd_obj(tobj);
-
-        mdd_lock2(ctxt, mdd_tpobj, mdd_tobj);
+                mdd_lock2(ctxt, mdd_tpobj, mdd_tobj);
+        } else {
+                mdd_write_lock(ctxt, mdd_tpobj);
+        }
 
         /*TODO rename sanity checking*/
+        rc = mdd_rt_sanity_check(ctxt, mdd_tpobj, mdd_tobj, lf, name, ma, uc);
+        if (rc)
+                GOTO(cleanup, rc);
+
         if (tobj) {
                 rc = __mdd_index_delete(ctxt, mdd_tpobj, name, handle);
                 if (rc)
@@ -2068,13 +2510,20 @@ static int mdd_rename_tgt(const struct lu_context *ctxt, struct md_object *pobj,
         if (tobj && lu_object_exists(&tobj->mo_lu))
                 __mdd_ref_del(ctxt, mdd_tobj, handle);
 cleanup:
-        mdd_unlock2(ctxt, mdd_tpobj, mdd_tobj);
+        if (tobj)
+                mdd_unlock2(ctxt, mdd_tpobj, mdd_tobj);
+        else
+                mdd_write_unlock(ctxt, mdd_tpobj);
         mdd_trans_stop(ctxt, mdd, rc, handle);
         RETURN(rc);
 }
 
+/*
+ * No permission check is needed.
+ */
 static int mdd_root_get(const struct lu_context *ctx,
-                        struct md_device *m, struct lu_fid *f)
+                        struct md_device *m, struct lu_fid *f,
+                        struct md_ucred *uc)
 {
         struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
 
@@ -2083,8 +2532,11 @@ static int mdd_root_get(const struct lu_context *ctx,
         RETURN(0);
 }
 
-static int mdd_statfs(const struct lu_context *ctx,
-                      struct md_device *m, struct kstatfs *sfs)
+/*
+ * No permission check is needed.
+ */
+static int mdd_statfs(const struct lu_context *ctx, struct md_device *m,
+                      struct kstatfs *sfs, struct md_ucred *uc)
 {
        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
         int rc;
@@ -2096,9 +2548,11 @@ static int mdd_statfs(const struct lu_context *ctx,
         RETURN(rc);
 }
 
-static int mdd_maxsize_get(const struct lu_context *ctx,
-                           struct md_device *m, int *md_size,
-                           int *cookie_size)
+/*
+ * No permission check is needed.
+ */
+static int mdd_maxsize_get(const struct lu_context *ctx, struct md_device *m,
+                           int *md_size, int *cookie_size, struct md_ucred *uc)
 {
        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
         ENTRY;
@@ -2119,7 +2573,11 @@ static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj,
         next->do_ops->do_ref_add(ctxt, next, handle);
 }
 
-static int mdd_ref_add(const struct lu_context *ctxt, struct md_object *obj)
+/*
+ * XXX: if permission check is needed here?
+ */
+static int mdd_ref_add(const struct lu_context *ctxt,
+                       struct md_object *obj, struct md_ucred *uc)
 {
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
@@ -2167,26 +2625,78 @@ static int accmode(struct mdd_object *mdd_obj, int flags)
 #endif
         if (flags & FMODE_READ)
                 res = MAY_READ;
-        if (flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
+        if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
                 res |= MAY_WRITE;
         if (flags & MDS_FMODE_EXEC)
                 res = MAY_EXEC;
         return res;
 }
 
+static int mdd_open_sanity_check(const struct lu_context *ctxt,
+                                 struct mdd_object *obj, int flag,
+                                 struct md_ucred *uc)
+{
+        struct lu_attr *tmp_la = &mdd_ctx_info(ctxt)->mti_la;
+        int mode = accmode(obj, flag);
+        int rc;
+        ENTRY;
+
+        /* EEXIST check */
+        if (mdd_is_dead_obj(obj))
+                RETURN(-ENOENT);
+
+        rc = __mdd_la_get(ctxt, obj, tmp_la);
+        if (rc)
+               RETURN(rc);
+
+        if (S_ISLNK(tmp_la->la_mode))
+                RETURN(-ELOOP);
+
+        if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
+                RETURN(-EISDIR);
+
+        if (!(flag & MDS_OPEN_CREATED)) {
+                rc = __mdd_permission_internal(ctxt, obj, mode, 0, uc);
+                if (rc)
+                        RETURN(rc);
+        }
+
+        /*
+         * FIFO's, sockets and device files are special: they don't
+         * actually live on the filesystem itself, and as such you
+         * can write to them even if the filesystem is read-only.
+         */
+        if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
+            S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
+                flag &= ~O_TRUNC;
+
+        /*
+         * An append-only file must be opened in append mode for writing.
+         */
+        if (mdd_is_append(obj)) {
+                if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
+                        RETURN(-EPERM);
+                if (flag & O_TRUNC)
+                        RETURN(-EPERM);
+        }
+
+        /* O_NOATIME can only be set by the owner or superuser */
+        if (flag & O_NOATIME)
+                if (uc->mu_fsuid != tmp_la->la_uid && !mdd_capable(uc, CAP_FOWNER))
+                        RETURN(-EPERM);
+
+        RETURN(0);
+}
+
 static int mdd_open(const struct lu_context *ctxt, struct md_object *obj,
-                    int flags)
+                    int flags, struct md_ucred *uc)
 {
-        int mode = accmode(md2mdd_obj(obj), flags);
+        struct mdd_object *mdd_obj = md2mdd_obj(obj);
         int rc = 0;
 
         mdd_write_lock(ctxt, md2mdd_obj(obj));
 
-        if (mode & MAY_WRITE) {
-                if (mdd_is_immutable(md2mdd_obj(obj)))
-                        rc = -EACCES;
-        }
-
+        rc = mdd_open_sanity_check(ctxt, mdd_obj, flags, uc);
         if (rc == 0)
                 md2mdd_obj(obj)->mod_count ++;
 
@@ -2194,8 +2704,11 @@ static int mdd_open(const struct lu_context *ctxt, struct md_object *obj,
         return rc;
 }
 
+/*
+ * No permission check is needed.
+ */
 static int mdd_close(const struct lu_context *ctxt, struct md_object *obj,
-                     struct md_attr *ma)
+                     struct md_attr *ma, struct md_ucred *uc)
 {
         int rc;
         struct mdd_object *mdd_obj;
@@ -2223,26 +2736,306 @@ static int mdd_close(const struct lu_context *ctxt, struct md_object *obj,
         RETURN(rc);
 }
 
+static int mdd_readpage_sanity_check(const struct lu_context *ctxt,
+                                     struct mdd_object *obj,
+                                     struct md_ucred *uc)
+{
+        struct dt_object *next = mdd_object_child(obj);
+        int rc;
+        ENTRY;
+
+        if (S_ISDIR(mdd_object_type(obj)) &&
+            dt_try_as_dir(ctxt, next))
+                rc = mdd_permission_internal(ctxt, obj, MAY_READ, uc);
+        else
+                rc = -ENOTDIR;
+
+        RETURN(rc);
+}
+
 static int mdd_readpage(const struct lu_context *ctxt, struct md_object *obj,
-                        const struct lu_rdpg *rdpg)
+                        const struct lu_rdpg *rdpg, struct md_ucred *uc)
 {
         struct dt_object *next;
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         int rc;
+        ENTRY;
 
         LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj)));
         next = mdd_object_child(mdd_obj);
 
         mdd_read_lock(ctxt, mdd_obj);
-        if (S_ISDIR(mdd_object_type(mdd_obj)) &&
-            dt_try_as_dir(ctxt, next))
-                rc = next->do_ops->do_readpage(ctxt, next, rdpg);
-        else
-                rc = -ENOTDIR;
+        rc = mdd_readpage_sanity_check(ctxt, mdd_obj, uc);
+        if (rc)
+                GOTO(out_unlock, rc);
+
+        rc = next->do_ops->do_readpage(ctxt, next, rdpg);
+
+out_unlock:
         mdd_read_unlock(ctxt, mdd_obj);
+        RETURN(rc);
+}
+
+#ifdef CONFIG_FS_POSIX_ACL
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+
+static int mdd_posix_acl_permission(struct md_ucred *uc, struct lu_attr *la,
+                                    int want, posix_acl_xattr_entry *entry,
+                                    int count)
+{
+        posix_acl_xattr_entry *pa, *pe, *mask_obj;
+        int found = 0;
+        ENTRY;
+
+        if (count <= 0)
+                RETURN(-EACCES);
+
+        pa = &entry[0];
+        pe = &entry[count - 1];
+        for (; pa <= pe; pa++) {
+                switch(pa->e_tag) {
+                        case ACL_USER_OBJ:
+                                /* (May have been checked already) */
+                                if (la->la_uid == uc->mu_fsuid)
+                                        goto check_perm;
+                                break;
+                        case ACL_USER:
+                                if (pa->e_id == uc->mu_fsuid)
+                                        goto mask;
+                                break;
+                        case ACL_GROUP_OBJ:
+                                if (mdd_in_group_p(uc, la->la_gid)) {
+                                        found = 1;
+                                        if ((pa->e_perm & want) == want)
+                                                goto mask;
+                                }
+                                break;
+                        case ACL_GROUP:
+                                if (mdd_in_group_p(uc, pa->e_id)) {
+                                        found = 1;
+                                        if ((pa->e_perm & want) == want)
+                                                goto mask;
+                                }
+                                break;
+                        case ACL_MASK:
+                                break;
+                        case ACL_OTHER:
+                                if (found)
+                                        RETURN(-EACCES);
+                                else
+                                        goto check_perm;
+                        default:
+                                RETURN(-EIO);
+                }
+        }
+        RETURN(-EIO);
+
+mask:
+        for (mask_obj = pa + 1; mask_obj <= pe; mask_obj++) {
+                if (mask_obj->e_tag == ACL_MASK) {
+                        if ((pa->e_perm & mask_obj->e_perm & want) == want)
+                                RETURN(0);
+
+                        RETURN(-EACCES);
+                }
+        }
+
+check_perm:
+        if ((pa->e_perm & want) == want)
+                RETURN(0);
+
+        RETURN(-EACCES);
+}
+#endif
+
+static int mdd_check_acl(const struct lu_context *ctxt, struct mdd_object *obj,
+                         struct lu_attr* la, int mask, struct md_ucred *uc)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+        struct dt_object  *next;
+        void *buf;
+        int buf_len;
+        posix_acl_xattr_entry *entry;
+        int entry_count;
+        int rc;
+        ENTRY;
+
+        next = mdd_object_child(obj);
+        buf_len = next->do_ops->do_xattr_get(ctxt, next, NULL, 0, "");
+        if (buf_len <= 0)
+                RETURN(buf_len ? : -EACCES);
+
+        OBD_ALLOC(buf, buf_len);
+        if (buf == NULL)
+                RETURN(-ENOMEM);
+        rc = next->do_ops->do_xattr_get(ctxt, next, buf, buf_len, "");
+        if (rc <= 0)
+                GOTO(out, rc = rc ? : -EACCES);
+
+        entry = ((posix_acl_xattr_header *)buf)->a_entries;
+        entry_count = (rc - 4) / sizeof(posix_acl_xattr_entry);
+        
+        rc = mdd_posix_acl_permission(uc, la, mask, entry, entry_count);
+
+out:
+        OBD_FREE(buf, buf_len);
+        RETURN(rc);
+#else
+        ENTRY;
+        RETURN(-EAGAIN);
+#endif
+}
+
+static int mdd_exec_permission_lite(const struct lu_context *ctxt,
+                                    struct mdd_object *obj,
+                                    struct md_ucred *uc)
+{
+        struct lu_attr *la = &mdd_ctx_info(ctxt)->mti_la;
+        umode_t mode;
+        int rc;
+        ENTRY;
+
+        /* These means unnecessary for permission check */
+        if ((uc == NULL) || (uc->mu_valid == UCRED_INIT))
+                RETURN(0);
+
+        /* Invalid user credit */
+        if (uc->mu_valid == UCRED_INVALID)
+                RETURN(-EACCES);
+
+        rc = __mdd_la_get(ctxt, obj, la);
+        if (rc)
+                RETURN(rc);
+
+        mode = la->la_mode;
+        if (uc->mu_fsuid == la->la_uid)
+                mode >>= 6;
+        else if (mdd_in_group_p(uc, la->la_gid))
+                mode >>= 3;
+
+        if (mode & MAY_EXEC)
+                RETURN(0);
+
+        if (((la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode)) &&
+            mdd_capable(uc, CAP_DAC_OVERRIDE))
+                RETURN(0);
+
+        if (S_ISDIR(la->la_mode) && mdd_capable(uc, CAP_DAC_READ_SEARCH))
+                RETURN(0);
+
+        RETURN(-EACCES);
+}
+
+static int __mdd_permission_internal(const struct lu_context *ctxt,
+                                     struct mdd_object *obj,
+                                     int mask, int getattr,
+                                     struct md_ucred *uc)
+{
+        struct lu_attr *la = &mdd_ctx_info(ctxt)->mti_la;
+        __u32 mode;
+        int rc;
+
+        ENTRY;
+
+        if (mask == 0)
+                RETURN(0);
+
+        /* These means unnecessary for permission check */
+        if ((uc == NULL) || (uc->mu_valid == UCRED_INIT))
+                RETURN(0);
+
+        /* Invalid user credit */
+        if (uc->mu_valid == UCRED_INVALID)
+                RETURN(-EACCES);
+
+        /*
+         * Nobody gets write access to an immutable file.
+         */
+        if ((mask & MAY_WRITE) && mdd_is_immutable(obj))
+                RETURN(-EACCES);
+
+        if (getattr) {
+                rc = __mdd_la_get(ctxt, obj, la);
+                if (rc)
+                        RETURN(rc);
+        }
+
+        mode = la->la_mode;
+        if (uc->mu_fsuid == la->la_uid) {
+                mode >>= 6;
+        } else {
+                if (mode & S_IRWXG) {
+                        if (((mode >> 3) & mask & S_IRWXO) != mask)
+                                goto check_groups;
+
+                        rc = mdd_check_acl(ctxt, obj, la, mask, uc);
+                        if (rc == -EACCES)
+                                goto check_capabilities;
+                        else if ((rc != -EAGAIN) && (rc != -EOPNOTSUPP))
+                                RETURN(rc);
+                }
+
+check_groups:
+                if (mdd_in_group_p(uc, la->la_gid))
+                        mode >>= 3;
+        }
+
+        /*
+         * If the DACs are ok we don't need any capability check.
+         */
+        if (((mode & mask & S_IRWXO) == mask))
+                RETURN(0);
+
+check_capabilities:
+
+        /*
+         * Read/write DACs are always overridable.
+         * Executable DACs are overridable if at least one exec bit is set.
+         * Dir's DACs are always overridable.
+         */
+        if (!(mask & MAY_EXEC) ||
+            (la->la_mode & S_IXUGO) || S_ISDIR(la->la_mode))
+                if (mdd_capable(uc, CAP_DAC_OVERRIDE))
+                        RETURN(0);
+
+        /*
+         * Searching includes executable on directories, else just read.
+         */
+        if ((mask == MAY_READ) ||
+            (S_ISDIR(la->la_mode) && !(mask & MAY_WRITE)))
+                if (mdd_capable(uc, CAP_DAC_READ_SEARCH))
+                        RETURN(0);
+
+        RETURN(-EACCES);
+}
+
+static inline int mdd_permission_internal_locked(const struct lu_context *ctxt,
+                                                 struct mdd_object *obj,
+                                                 int mask, struct md_ucred *uc)
+{
+        int rc;
+
+        mdd_read_lock(ctxt, obj);
+        rc = mdd_permission_internal(ctxt, obj, mask, uc);
+        mdd_read_unlock(ctxt, obj);
+
         return rc;
 }
 
+static int mdd_permission(const struct lu_context *ctxt, struct md_object *obj,
+                          int mask, struct md_ucred *uc)
+{
+        struct mdd_object *mdd_obj = md2mdd_obj(obj);
+        int rc;
+        ENTRY;
+
+        rc = mdd_permission_internal_locked(ctxt, mdd_obj, mask, uc);
+
+        RETURN(rc);
+}
+
 struct md_device_operations mdd_ops = {
         .mdo_statfs         = mdd_statfs,
         .mdo_root_get       = mdd_root_get,
@@ -2262,8 +3055,8 @@ static struct md_dir_operations mdd_dir_ops = {
         .mdo_create_data   = mdd_create_data
 };
 
-
 static struct md_object_operations mdd_obj_ops = {
+        .moo_permission    = mdd_permission,
         .moo_attr_get      = mdd_attr_get,
         .moo_attr_set      = mdd_attr_set,
         .moo_xattr_get     = mdd_xattr_get,
index 6483ee3..e92dd44 100644 (file)
@@ -31,6 +31,8 @@
 
 #include <md_object.h>
 #include <dt_object.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
 
 struct dt_device;
 
index a0b4002..3f1bbca 100644 (file)
@@ -533,8 +533,9 @@ int mdd_unlink_log(const struct lu_context *ctxt, struct mdd_device *mdd,
 {
         struct obd_device *obd = mdd2obd_dev(mdd);
 
-        if (mds_log_op_unlink(obd, ma->ma_lmm, ma->ma_lmm_size,
-                              ma->ma_cookie, ma->ma_cookie_size) > 0) {
+        if ((ma->ma_cookie_size > 0) &&
+            (mds_log_op_unlink(obd, ma->ma_lmm, ma->ma_lmm_size,
+                              ma->ma_cookie, ma->ma_cookie_size) > 0)) {
                 ma->ma_valid |= MA_COOKIE;
         }
         return 0;
index 1bd28c3..b93dbb5 100644 (file)
@@ -812,7 +812,7 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset,
         struct lvfs_run_ctxt saved;
         struct mds_body *body;
         struct dentry *dparent = NULL, *dchild = NULL;
-        struct lvfs_ucred uc = {NULL,};
+        struct lvfs_ucred uc = {0,};
         struct lustre_handle parent_lockh;
         int namesize;
         int rc = 0, cleanup_phase = 0, resent_req = 0;
@@ -978,7 +978,7 @@ static int mds_getattr(struct ptlrpc_request *req, int offset)
         struct lvfs_run_ctxt saved;
         struct dentry *de;
         struct mds_body *body;
-        struct lvfs_ucred uc = { NULL, };
+        struct lvfs_ucred uc = {0,};
         int rc = 0;
         ENTRY;
 
@@ -1131,7 +1131,7 @@ static int mds_readpage(struct ptlrpc_request *req, int offset)
         struct mds_body *body, *repbody;
         struct lvfs_run_ctxt saved;
         int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) };
-        struct lvfs_ucred uc = {NULL,};
+        struct lvfs_ucred uc = {0,};
         ENTRY;
 
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK))
@@ -1970,12 +1970,14 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         if (rc)
                 GOTO(err_fs, rc);
 
+#if 0
         mds->mds_group_hash = upcall_cache_init(obd->obd_name);
         if (IS_ERR(mds->mds_group_hash)) {
                 rc = PTR_ERR(mds->mds_group_hash);
                 mds->mds_group_hash = NULL;
                 GOTO(err_qctxt, rc);
         }
+#endif
 
         /* Don't wait for mds_postrecov trying to clear orphans */
         obd->obd_async_recov = 1;
@@ -2051,8 +2053,10 @@ err_qctxt:
 err_fs:
         /* No extra cleanup needed for llog_init_commit_thread() */
         mds_fs_cleanup(obd);
+#if 0
         upcall_cache_cleanup(mds->mds_group_hash);
         mds->mds_group_hash = NULL;
+#endif
 err_ns:
         ldlm_namespace_free(obd->obd_namespace, 0);
         obd->obd_namespace = NULL;
@@ -2255,8 +2259,10 @@ static int mds_cleanup(struct obd_device *obd)
                 OBD_FREE(mds->mds_lov_objids, mds->mds_lov_objids_size);
         mds_fs_cleanup(obd);
 
+#if 0
         upcall_cache_cleanup(mds->mds_group_hash);
         mds->mds_group_hash = NULL;
+#endif
 
         must_put = server_put_mount(obd->obd_name, mds->mds_vfsmnt);
         /* must_put is for old method (l_p_m returns non-0 on err) */
index 1140a61..8ca1319 100644 (file)
@@ -82,6 +82,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
         return count;
 }
 
+#if 0
 static int lprocfs_wr_group_info(struct file *file, const char *buffer,
                                  unsigned long count, void *data)
 {
@@ -235,6 +236,7 @@ static int lprocfs_wr_group_flush(struct file *file, const char *buffer,
         upcall_cache_flush_idle(obd->u.mds.mds_group_hash);
         return count;
 }
+#endif
 
 #ifdef HAVE_QUOTA_SUPPORT        
 static int lprocfs_mds_rd_bunit(char *page, char **start, off_t off, int count, 
@@ -406,6 +408,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "quota_iunit_sz",  lprocfs_mds_rd_iunit, lprocfs_mds_wr_iunit, 0 },
         { "quota_itune_sz",  lprocfs_mds_rd_itune, lprocfs_mds_wr_itune, 0 },
 #endif
+#if 0
         { "group_expire_interval", lprocfs_rd_group_expire,
                              lprocfs_wr_group_expire, 0},
         { "group_acquire_expire", lprocfs_rd_group_acquire_expire,
@@ -414,6 +417,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
                              lprocfs_wr_group_upcall, 0},
         { "group_flush",     0, lprocfs_wr_group_flush, 0},
         { "group_info",      0, lprocfs_wr_group_info, 0 },
+#endif
         { "atime_diff",      lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
         { 0 }
 };
index c9d33f4..e46c3e2 100644 (file)
@@ -104,8 +104,10 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->sa_fsuid;
         r->ur_uc.luc_fsgid = rec->sa_fsgid;
         r->ur_uc.luc_cap = rec->sa_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->sa_suppgid;
         r->ur_uc.luc_suppgid2 = -1;
+#endif
         r->ur_fid1 = &rec->sa_fid;
         attr->ia_valid = rec->sa_valid;
         attr->ia_mode = rec->sa_mode;
@@ -150,8 +152,10 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->cr_fsuid;
         r->ur_uc.luc_fsgid = rec->cr_fsgid;
         r->ur_uc.luc_cap = rec->cr_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->cr_suppgid;
         r->ur_uc.luc_suppgid2 = -1;
+#endif
         r->ur_fid1 = &rec->cr_fid;
         r->ur_fid2 = &rec->cr_replayfid;
         r->ur_mode = rec->cr_mode;
@@ -196,8 +200,10 @@ static int mds_link_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->lk_fsuid;
         r->ur_uc.luc_fsgid = rec->lk_fsgid;
         r->ur_uc.luc_cap = rec->lk_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->lk_suppgid1;
         r->ur_uc.luc_suppgid2 = rec->lk_suppgid2;
+#endif
         r->ur_fid1 = &rec->lk_fid1;
         r->ur_fid2 = &rec->lk_fid2;
         r->ur_time = rec->lk_time;
@@ -224,8 +230,10 @@ static int mds_unlink_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->ul_fsuid;
         r->ur_uc.luc_fsgid = rec->ul_fsgid;
         r->ur_uc.luc_cap = rec->ul_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->ul_suppgid;
         r->ur_uc.luc_suppgid2 = -1;
+#endif
         r->ur_mode = rec->ul_mode;
         r->ur_fid1 = &rec->ul_fid1;
         r->ur_fid2 = &rec->ul_fid2;
@@ -253,8 +261,10 @@ static int mds_rename_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->rn_fsuid;
         r->ur_uc.luc_fsgid = rec->rn_fsgid;
         r->ur_uc.luc_cap = rec->rn_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->rn_suppgid1;
         r->ur_uc.luc_suppgid2 = rec->rn_suppgid2;
+#endif
         r->ur_fid1 = &rec->rn_fid1;
         r->ur_fid2 = &rec->rn_fid2;
         r->ur_time = rec->rn_time;
@@ -287,8 +297,10 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset,
         r->ur_uc.luc_fsuid = rec->cr_fsuid;
         r->ur_uc.luc_fsgid = rec->cr_fsgid;
         r->ur_uc.luc_cap = rec->cr_cap;
+#if 0
         r->ur_uc.luc_suppgid1 = rec->cr_suppgid;
         r->ur_uc.luc_suppgid2 = -1;
+#endif
         r->ur_fid1 = &rec->cr_fid;
         r->ur_fid2 = &rec->cr_replayfid;
         r->ur_mode = rec->cr_mode;
@@ -358,8 +370,10 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req,
 {
         struct mds_body *body = lustre_msg_buf(req->rq_reqmsg, offset,
                                                sizeof(*body));
+#if 0
         struct mds_obd *mds = mds_req2mds(req);
         int rc;
+#endif
 
         LASSERT(body != NULL); /* previously verified & swabbed by caller */
 
@@ -376,6 +390,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req,
                 ucred->luc_cap = body->capability;
         }
 
+#if 0
         ucred->luc_uce = upcall_cache_get_entry(mds->mds_group_hash,
                                                 ucred->luc_fsuid,
                                                 ucred->luc_fsgid, 1,
@@ -390,11 +405,14 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req,
         if (ucred->luc_uce)
                 ucred->luc_fsgid = ucred->luc_uce->ue_primary;
 #endif
+#endif
 
         return 0;
 }
 
 void mds_exit_ucred(struct lvfs_ucred *ucred, struct mds_obd *mds)
 {
+#if 0
         upcall_cache_put_entry(mds->mds_group_hash, ucred->luc_uce);
+#endif
 }
index e857405..82f3a9f 100644 (file)
@@ -2306,7 +2306,9 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
                   struct ptlrpc_request *req, struct lustre_handle *lockh)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
+#if 0
         struct mds_obd *mds = &obd->u.mds;
+#endif
         struct lvfs_run_ctxt saved;
         int rc;
         ENTRY;
@@ -2321,6 +2323,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
         }
 #endif
 
+#if 0
         /* get group info of this user */
         rec->ur_uc.luc_uce = upcall_cache_get_entry(mds->mds_group_hash,
                                                     rec->ur_uc.luc_fsuid,
@@ -2340,11 +2343,14 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
         if (rec->ur_uc.luc_uce)
                 rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary;
 #endif
+#endif
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc);
         rc = reinters[rec->ur_opcode] (rec, offset, req, lockh);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc);
 
+#if 0
         upcall_cache_put_entry(mds->mds_group_hash, rec->ur_uc.luc_uce);
+#endif
         RETURN(rc);
 }
index ca46092..33695d0 100644 (file)
@@ -170,7 +170,7 @@ int mds_getxattr(struct ptlrpc_request *req)
         struct lvfs_run_ctxt saved;
         struct dentry *de;
         struct mds_body *body;
-        struct lvfs_ucred uc = { NULL, };
+        struct lvfs_ucred uc = {0,};
         int rc = 0;
         ENTRY;
 
@@ -332,7 +332,7 @@ int mds_setxattr(struct ptlrpc_request *req)
         struct obd_device *obd = req->rq_export->exp_obd;
         struct lvfs_run_ctxt saved;
         struct mds_body *body;
-        struct lvfs_ucred uc = { NULL, };
+        struct lvfs_ucred uc = {0,};
         int rc;
         ENTRY;
 
index b1d1c39..3dcf8bf 100644 (file)
@@ -1,4 +1,5 @@
 MODULES := mdt
 mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o mdt_open.o
+mdt-objs += mdt_idmap.o mdt_identity.o mdt_rmtacl.o
 
 @INCLUDE_RULES@
index d6503ce..34e3695 100644 (file)
@@ -57,6 +57,7 @@
 #include <lustre_mdt.h>
 #include "mdt_internal.h"
 #include <linux/lustre_acl.h>
+#include <lustre_param.h>
 /*
  * Initialized in mdt_mod_init().
  */
@@ -168,12 +169,12 @@ static int mdt_getstatus(struct mdt_thread_info *info)
 
         ENTRY;
 
-        if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK))
+        if (MDT_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) {
                 rc = -ENOMEM;
-        else {
+        else {
                 body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
-                rc = next->md_ops->mdo_root_get(info->mti_ctxt,
-                                                next, &body->fid1);
+                rc = next->md_ops->mdo_root_get(info->mti_ctxt, next,
+                                                &body->fid1, NULL);
                 if (rc == 0)
                         body->valid |= OBD_MD_FLID;
         }
@@ -199,8 +200,8 @@ static int mdt_statfs(struct mdt_thread_info *info)
         } else {
                 osfs = req_capsule_server_get(&info->mti_pill,&RMF_OBD_STATFS);
                 /* XXX max_age optimisation is needed here. See mds_statfs */
-                rc = next->md_ops->mdo_statfs(info->mti_ctxt,
-                                              next, &info->mti_u.ksfs);
+                rc = next->md_ops->mdo_statfs(info->mti_ctxt, next,
+                                              &info->mti_u.ksfs, NULL);
                 statfs_pack(osfs, &info->mti_u.ksfs);
         }
 
@@ -292,7 +293,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                 ma->ma_lmm_size = req_capsule_get_size(pill, &RMF_MDT_MD,
                                                              RCL_SERVER);
         }
-        rc = mo_attr_get(ctxt, next, ma);
+        rc = mo_attr_get(ctxt, next, ma, &info->mti_uc);
         if (rc == -EREMOTE) {
                 /* This object is located on remote node.*/
                 repbody->fid1 = *mdt_object_fid(o);
@@ -304,10 +305,12 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                 RETURN(rc);
         }
 
-        if (ma->ma_valid & MA_INODE)
+        if (ma->ma_valid & MA_INODE) {
                 mdt_pack_attr2body(repbody, la, mdt_object_fid(o));
-        else
+                mdt_body_reverse_idmap(info, repbody);
+        } else {
                 RETURN(-EFAULT);
+        }
 
         if (mdt_body_has_lov(la, reqbody)) {
                 if (ma->ma_valid & MA_LOV) {
@@ -327,7 +330,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                 }
         } else if (S_ISLNK(la->la_mode) &&
                           reqbody->valid & OBD_MD_LINKNAME) {
-                rc = mo_readlink(ctxt, next, ma->ma_lmm, ma->ma_lmm_size);
+                rc = mo_readlink(ctxt, next, ma->ma_lmm, ma->ma_lmm_size,
+                                 &info->mti_uc);
                 if (rc <= 0) {
                         CERROR("readlink failed: %d\n", rc);
                         rc = -EFAULT;
@@ -351,14 +355,26 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                                 repbody->max_cookiesize);
         }
 
+        if (reqbody->valid & OBD_MD_FLRMTPERM) {
+                buffer = req_capsule_server_get(pill, &RMF_ACL);
+                /* mdt_getattr_lock only */
+                rc = mdt_pack_remote_perm(info, o, buffer);
+                if (rc) {
+                        RETURN(rc);
+                } else {
+                        repbody->valid |= OBD_MD_FLRMTPERM;
+                        repbody->aclsize = sizeof(struct mdt_remote_perm);
+                }
+        }
+
 #ifdef CONFIG_FS_POSIX_ACL
-        if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) &&
-            (reqbody->valid & OBD_MD_FLACL)) {
+        else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) &&
+                 (reqbody->valid & OBD_MD_FLACL)) {
                 buffer = req_capsule_server_get(pill, &RMF_ACL);
                 length = req_capsule_get_size(pill, &RMF_ACL, RCL_SERVER);
                 if (length > 0) {
-                        rc = mo_xattr_get(ctxt, next, buffer,
-                                          length, XATTR_NAME_ACL_ACCESS);
+                        rc = mo_xattr_get(ctxt, next, buffer, length,
+                                          XATTR_NAME_ACL_ACCESS, &info->mti_uc);
                         if (rc < 0) {
                                 if (rc == -ENODATA || rc == -EOPNOTSUPP)
                                         rc = 0;
@@ -379,14 +395,24 @@ static int mdt_getattr(struct mdt_thread_info *info)
 {
         int rc;
         struct mdt_object *obj;
+        struct mdt_body *reqbody;
 
         obj = info->mti_object;
         LASSERT(obj != NULL);
         LASSERT(lu_object_assert_exists(&obj->mot_obj.mo_lu));
         ENTRY;
 
+        reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY);
+        if (reqbody == NULL)
+                RETURN(-EFAULT);
+
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         rc = mdt_getattr_internal(info, obj);
         mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_exit_ucred(info);
         RETURN(rc);
 }
 
@@ -410,7 +436,7 @@ static int mdt_is_subdir(struct mdt_thread_info *info)
          */
         LASSERT(fid_is_sane(&info->mti_body->fid2));
         rc = mdo_is_subdir(info->mti_ctxt, mdt_object_child(obj),
-                           &info->mti_body->fid2, &repbody->fid1);
+                           &info->mti_body->fid2, &repbody->fid1, NULL);
         if (rc < 0)
                 RETURN(rc);
         
@@ -511,7 +537,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                 RETURN(rc);
 
         /*step 2: lookup child's fid by name */
-        rc = mdo_lookup(info->mti_ctxt, next, name, child_fid);
+        rc = mdo_lookup(info->mti_ctxt, next, name, child_fid, &info->mti_uc);
         if (rc != 0) {
                 if (rc == -ENOENT)
                         mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
@@ -592,16 +618,26 @@ out:
 static int mdt_getattr_name(struct mdt_thread_info *info)
 {
         struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_CHILD];
+        struct mdt_body *reqbody;
         int rc;
 
         ENTRY;
 
+        reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY);
+        if (reqbody == NULL)
+                RETURN(-EFAULT);
+
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         rc = mdt_getattr_name_lock(info, lhc, MDS_INODELOCK_UPDATE, NULL);
         if (lustre_handle_is_used(&lhc->mlh_lh)) {
                 ldlm_lock_decref(&lhc->mlh_lh, lhc->mlh_mode);
                 lhc->mlh_lh.cookie = 0;
         }
         mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_exit_ucred(info);
         RETURN(rc);
 }
 
@@ -628,6 +664,7 @@ static int mdt_connect(struct mdt_thread_info *info)
         if (rc == 0) {
                 LASSERT(req->rq_export != NULL);
                 info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev);
+                rc = mdt_init_idmap(info);
         }
         return rc;
 }
@@ -712,6 +749,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page)
         struct lu_dirent *ent;
         int rc = 0;
 
+        ENTRY;
 
         /* Disable trans for this name insert, since it will 
          * include many trans for this */
@@ -733,7 +771,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page)
                         memcpy(name, ent->lde_name, ent->lde_namelen);
                         rc = mdo_name_insert(info->mti_ctxt,
                                              md_object_next(&object->mot_obj),
-                                             name, lf, 0);
+                                             name, lf, 0, &info->mti_uc);
                         OBD_FREE(name, ent->lde_namelen + 1);
                         if (rc)
                                 GOTO(out, rc);
@@ -852,6 +890,10 @@ static int mdt_readpage(struct mdt_thread_info *info)
         if (reqbody == NULL || repbody == NULL)
                 RETURN(-EFAULT);
 
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         /*
          * prepare @rdpg before calling lower layers and transfer itself. Here
          * reqbody->size contains offset of where to start to read and
@@ -861,13 +903,13 @@ static int mdt_readpage(struct mdt_thread_info *info)
         if ((__u64)rdpg->rp_hash != reqbody->size) {
                 CERROR("Invalid hash: %#llx != %#llx\n",
                        (__u64)rdpg->rp_hash, reqbody->size);
-                RETURN(-EFAULT);
+                GOTO(out, rc = -EFAULT);
         }
         rdpg->rp_count  = reqbody->nlink;
         rdpg->rp_npages = (rdpg->rp_count + CFS_PAGE_SIZE - 1)>>CFS_PAGE_SHIFT;
         OBD_ALLOC(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]);
         if (rdpg->rp_pages == NULL)
-                RETURN(-ENOMEM);
+                GOTO(out, rc = -ENOMEM);
 
         for (i = 0; i < rdpg->rp_npages; ++i) {
                 rdpg->rp_pages[i] = alloc_pages(GFP_KERNEL, 0);
@@ -876,11 +918,12 @@ static int mdt_readpage(struct mdt_thread_info *info)
         }
 
         /* call lower layers to fill allocated pages with directory data */
-        rc = mo_readpage(info->mti_ctxt, mdt_object_child(object), rdpg);
+        rc = mo_readpage(info->mti_ctxt, mdt_object_child(object), rdpg,
+                         &info->mti_uc);
         if (rc) {
                 if (rc == -ERANGE)
                         rc1 = rc;
-                else 
+               else 
                         GOTO(free_rdpg, rc);
         }
 
@@ -895,8 +938,11 @@ free_rdpg:
                         __free_pages(rdpg->rp_pages[i], 0);
         OBD_FREE(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]);
 
+        mdt_exit_ucred(info);
         MDT_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, 0);
 
+out:
+        mdt_exit_ucred(info);
         return rc ? rc : rc1;
 }
 
@@ -937,6 +983,14 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
                 RETURN(rc);
         }
 
+        rc = mdt_init_ucred_reint(info);
+        if (rc != 0)
+                RETURN(rc);
+
+        rc = mdt_fix_attr_ucred(info, op);
+        if (rc != 0)
+                GOTO(out, rc);
+
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
                 struct mdt_client_data *mcd;
 
@@ -944,13 +998,16 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
                 if (mcd->mcd_last_xid == req->rq_xid ||
                     mcd->mcd_last_close_xid == req->rq_xid) {
                         mdt_reconstruct(info, lhc);
-                        RETURN(lustre_msg_get_status(req->rq_repmsg));
+                        rc = lustre_msg_get_status(req->rq_repmsg);
+                        GOTO(out, rc);
                 }
                 DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")",
                           mcd->mcd_last_xid);
         }
         rc = mdt_reint_rec(info, lhc);
 
+out:
+        mdt_exit_ucred(info);
         RETURN(rc);
 }
 
@@ -996,8 +1053,9 @@ static int mdt_reint(struct mdt_thread_info *info)
                  * path.
                  */
                 rc = mdt_reint_internal(info, NULL, opc);
-        } else
+        } else {
                 rc = opc;
+        }
 
         info->mti_fail_id = OBD_FAIL_MDS_REINT_NET_REP;
         RETURN(rc);
@@ -1031,8 +1089,12 @@ static int mdt_sync(struct mdt_thread_info *info)
         if (body == NULL)
                 RETURN(-EINVAL);
 
+        rc = mdt_init_ucred(info, body);
+        if (rc)
+                RETURN(rc);
+
         if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SYNC_PACK))
-                RETURN(-ENOMEM);
+                GOTO(out, rc = -ENOMEM);
 
         if (fid_seq(&body->fid1) == 0) {
                 /* sync the whole device */
@@ -1052,17 +1114,22 @@ static int mdt_sync(struct mdt_thread_info *info)
                                 next = mdt_object_child(info->mti_object);
                                 info->mti_attr.ma_need = MA_INODE;
                                 rc = mo_attr_get(info->mti_ctxt, next,
-                                                 &info->mti_attr);
+                                                 &info->mti_attr,
+                                                 &info->mti_uc);
                                 if (rc == 0) {
                                         body = req_capsule_server_get(pill,
                                                                 &RMF_MDT_BODY);
                                         fid = mdt_object_fid(info->mti_object);
                                         mdt_pack_attr2body(body, la, fid);
+                                        mdt_body_reverse_idmap(info, body);
                                 }
                         }
                 }
         }
-        RETURN(rc);
+        EXIT;
+out:
+        mdt_exit_ucred(info);
+        return rc;
 }
 
 static int mdt_quotacheck_handle(struct mdt_thread_info *info)
@@ -1160,7 +1227,7 @@ static int mdt_cp_callback(struct mdt_thread_info *info)
  */
 static int mdt_sec_ctx_handle(struct mdt_thread_info *info)
 {
-        return 0;
+        return mdt_handle_idmap(info);
 }
 
 static struct mdt_object *mdt_obj(struct lu_object *o)
@@ -1518,7 +1585,7 @@ static int mdt_req_handle(struct mdt_thread_info *info,
         /* If we're DISCONNECTing, the mdt_export_data is already freed */
         if (rc == 0 && h->mh_opc != MDS_DISCONNECT)
                 target_committed_to_req(req);
-        
+
         RETURN(rc);
 }
 
@@ -1579,8 +1646,9 @@ extern int mds_filter_recovery_request(struct ptlrpc_request *req,
  *       -ve: abort immediately with the given error code;
  *         0: send reply with error code in req->rq_status;
  */
-static int mdt_recovery(struct ptlrpc_request *req)
+static int mdt_recovery(struct mdt_thread_info *info)
 {
+        struct ptlrpc_request *req = mdt_info_req(info);
         int recovering;
         int abort_recovery;
         struct obd_device *obd;
@@ -1592,6 +1660,7 @@ static int mdt_recovery(struct ptlrpc_request *req)
         case SEC_CTX_INIT:
         case SEC_CTX_INIT_CONT:
         case SEC_CTX_FINI:
+                mdt_handle_idmap(info);
                 RETURN(+1);
         }
 
@@ -1687,7 +1756,7 @@ static int mdt_handle0(struct ptlrpc_request *req,
         msg = req->rq_reqmsg;
         rc = mds_msg_check_version(msg);
         if (rc == 0) {
-                rc = mdt_recovery(req);
+                rc = mdt_recovery(info);
                 switch (rc) {
                 case +1:
                         h = mdt_handler_find(lustre_msg_get_opc(msg),
@@ -2010,6 +2079,8 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
         __u64                   child_bits;
         struct ldlm_reply      *ldlm_rep;
         struct ptlrpc_request  *req;
+        struct mdt_body        *reqbody;
+        int                     rc;
 
         ENTRY;
 
@@ -2025,6 +2096,14 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
                 RETURN(-EINVAL);
         }
 
+        reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY);
+        if (reqbody == NULL)
+                RETURN(-EFAULT);
+
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         req = info->mti_pill.rc_req;
         ldlm_rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP);
         mdt_set_disposition(info, ldlm_rep, DISP_IT_EXECD);
@@ -2040,10 +2119,14 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
                 ldlm_rep->lock_policy_res2 = 0;
         if (!mdt_get_disposition(ldlm_rep, DISP_LOOKUP_POS) ||
                     ldlm_rep->lock_policy_res2) {
-                RETURN(ELDLM_LOCK_ABORTED);
+                GOTO(out, rc = ELDLM_LOCK_ABORTED);
         }
 
-        return mdt_intent_lock_replace(info, lockp, new_lock, lhc, flags);
+        rc = mdt_intent_lock_replace(info, lockp, new_lock, lhc, flags);
+        EXIT;
+out:
+        mdt_exit_ucred(info);
+        return rc;
 }
 
 static int mdt_intent_reint(enum mdt_it_code opcode,
@@ -2081,7 +2164,7 @@ static int mdt_intent_reint(enum mdt_it_code opcode,
         rep = req_capsule_server_get(&info->mti_pill, &RMF_DLM_REP);
         if (rep == NULL)
                 RETURN(-EFAULT);
-        
+
         /* MDC expects this in any case */
         if (rc != 0)
                 mdt_set_disposition(info, rep, DISP_LOOKUP_EXECD);
@@ -2092,7 +2175,7 @@ static int mdt_intent_reint(enum mdt_it_code opcode,
         if (rc == -EREMOTE) {
                 LASSERT(lustre_handle_is_used(&lhc->mlh_lh));
                 rep->lock_policy_res2 = 0;
-                return mdt_intent_lock_replace(info, lockp, NULL, lhc, flags);
+                RETURN(mdt_intent_lock_replace(info, lockp, NULL, lhc, flags));
         }
         rep->lock_policy_res2 = rc;
 
@@ -2935,6 +3018,17 @@ static void mdt_fini(const struct lu_context *ctx, struct mdt_device *m)
 
         ENTRY;
         target_cleanup_recovery(m->mdt_md_dev.md_lu_dev.ld_obd);
+
+        upcall_cache_cleanup(m->mdt_rmtacl_cache);
+        m->mdt_rmtacl_cache = NULL;
+
+        upcall_cache_cleanup(m->mdt_identity_cache);
+        m->mdt_identity_cache = NULL;
+
+        if (m->mdt_rootsquash_info)
+                OBD_FREE_PTR(m->mdt_rootsquash_info);
+
+        mdt_fs_cleanup(ctx, m);
         ping_evictor_stop();
         mdt_stop_ptlrpc_service(m);
 
@@ -3052,13 +3146,41 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m,
                 GOTO(err_free_ns, rc);
 
         ping_evictor_start();
-        rc = mdt_fs_setup(ctx, m);
+        rc = mdt_fs_setup(ctx, m, obd);
         if (rc)
                 GOTO(err_stop_service, rc);
+
+        m->mdt_identity_cache = upcall_cache_init(obd->obd_name,
+                                                MDT_IDENTITY_UPCALL_PATH,
+                                                &mdt_identity_upcall_cache_ops);
+        if (IS_ERR(m->mdt_identity_cache)) {
+                rc = PTR_ERR(m->mdt_identity_cache);
+                m->mdt_identity_cache = NULL;
+                GOTO(err_fs, rc);
+        }
+
+        m->mdt_rmtacl_cache = upcall_cache_init(obd->obd_name,
+                                                MDT_RMTACL_UPCALL_PATH,
+                                                &mdt_rmtacl_upcall_cache_ops);
+        if (IS_ERR(m->mdt_rmtacl_cache)) {
+                rc = PTR_ERR(m->mdt_rmtacl_cache);
+                m->mdt_rmtacl_cache = NULL;
+                GOTO(err_fs, rc);
+        }
+
         if(obd->obd_recovering == 0)
                 mdt_postrecov(ctx, m);
+
+        m->no_gss_support = 1;
+
         RETURN(0);
 
+err_fs:
+        upcall_cache_cleanup(m->mdt_rmtacl_cache);
+        m->mdt_rmtacl_cache = NULL;
+        upcall_cache_cleanup(m->mdt_identity_cache);
+        m->mdt_identity_cache = NULL;
+        mdt_fs_cleanup(ctx, m);
 err_stop_service:
         mdt_stop_ptlrpc_service(m);
 err_free_ns:
@@ -3079,6 +3201,39 @@ err_free_site:
         return (rc);
 }
 
+/* FIXME: this macro is copied from lnet/libcfs/nidstring.c */
+#define LNET_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
+static void do_process_nosquash_nids(struct mdt_device *m, char *buf)
+{
+        struct rootsquash_info *rsi = m->mdt_rootsquash_info;
+        char str[LNET_NIDSTR_SIZE], *end;
+        lnet_nid_t nid;
+
+        LASSERT(rsi);
+        rsi->rsi_n_nosquash_nids = 0;
+        while (rsi->rsi_n_nosquash_nids < N_NOSQUASH_NIDS) {
+                end = strchr(buf, ',');
+                memset(str, 0, sizeof(str));
+                if (end)
+                        strncpy(str, buf, min_t(int, sizeof(str), end - buf));
+                else
+                        strncpy(str, buf, min_t(int, sizeof(str), strlen(buf)));
+                
+                if (!strcmp(str, "*")) {
+                        nid = LNET_NID_ANY;
+                } else {
+                        nid = libcfs_str2nid(str);
+                        if (nid == LNET_NID_ANY)
+                                goto ignore;
+                }
+                rsi->rsi_nosquash_nids[rsi->rsi_n_nosquash_nids++] = nid;
+ignore:
+                if (!end || (*(end + 1) == 0))
+                        return;
+                buf = end + 1;
+        }
+}
+
 /* used by MGS to process specific configurations */
 static int mdt_process_config(const struct lu_context *ctx,
                               struct lu_device *d, struct lustre_cfg *cfg)
@@ -3086,26 +3241,90 @@ static int mdt_process_config(const struct lu_context *ctx,
         struct mdt_device *m = mdt_dev(d);
         struct md_device *md_next  = m->mdt_child;
         struct lu_device *next = md2lu_dev(md_next);
-        int err;
+        int rc = 0;
         ENTRY;
 
         switch (cfg->lcfg_command) {
+        case LCFG_PARAM: {
+                int i;
+
+                for (i = 1; i < cfg->lcfg_bufcount; i++) {
+                        char *key, *val;
+
+                        key = lustre_cfg_buf(cfg, i);
+                        val = strchr(key, '=');
+                        if (!val || (*(val + 1) == 0)) {
+                                CERROR("Can't parse param %s\n", key);
+                                rc = -EINVAL;
+                                /* continue parsing other params */
+                                continue;
+                        }
+
+                        val++;
+                        if (class_match_param(key,
+                                              PARAM_GSS_SUPPORT, 0) == 0) {
+                                if (memcmp(val, "no", 2) == 0) {
+                                        m->no_gss_support = 1;
+                                } else if (memcmp(val, "yes", 3) == 0) {
+                                        m->no_gss_support = 0;
+                                } else {
+                                        CERROR("Can't parse param %s\n", key);
+                                        rc = -EINVAL;
+                                        /* continue parsing other params */
+                                        continue;
+                                }
+                        } else if (class_match_param(key,
+                                        PARAM_ROOTSQUASH_UID, 0) == 0) {
+                                if (!m->mdt_rootsquash_info)
+                                        OBD_ALLOC_PTR(m->mdt_rootsquash_info);
+                                if (!m->mdt_rootsquash_info)
+                                        RETURN(-ENOMEM);
+
+                                m->mdt_rootsquash_info->rsi_uid = 
+                                        simple_strtoul(val, NULL, 0);
+                        } else if (class_match_param(key,
+                                        PARAM_ROOTSQUASH_GID, 0) == 0) {
+                                if (!m->mdt_rootsquash_info)
+                                        OBD_ALLOC_PTR(m->mdt_rootsquash_info);
+                                if (!m->mdt_rootsquash_info)
+                                        RETURN(-ENOMEM);
+
+                                m->mdt_rootsquash_info->rsi_gid =
+                                        simple_strtoul(val, NULL, 0);
+                        } else if (class_match_param(key,
+                                        PARAM_ROOTSQUASH_SKIPS, 0) == 0) {
+                                if (!m->mdt_rootsquash_info)
+                                        OBD_ALLOC_PTR(m->mdt_rootsquash_info);
+                                if (!m->mdt_rootsquash_info)
+                                        RETURN(-ENOMEM);
+
+                                do_process_nosquash_nids(m, val);
+                        } else {
+                                rc = -EINVAL;
+                        }
+                }
+
+                if (rc)
+                        /* others are passed further */
+                        rc = next->ld_ops->ldo_process_config(ctx, next, cfg);
+                break;
+        }
         case LCFG_ADD_MDC:
                 /*
                  * Add mdc hook to get first MDT uuid and connect it to
                  * ls->controller to use for seq manager.
                  */
-                err = mdt_seq_init_cli(ctx, mdt_dev(d), cfg);
-                if (err) {
+                rc = mdt_seq_init_cli(ctx, mdt_dev(d), cfg);
+                if (rc) {
                         CERROR("can't initialize controller export, "
-                               "rc %d\n", err);
+                               "rc %d\n", rc);
                 }
         default:
                 /* others are passed further */
-                err = next->ld_ops->ldo_process_config(ctx, next, cfg);
+                rc = next->ld_ops->ldo_process_config(ctx, next, cfg);
                 break;
         }
-        RETURN(err);
+        RETURN(rc);
 }
 
 static struct lu_object *mdt_object_alloc(const struct lu_context *ctxt,
@@ -3190,6 +3409,8 @@ static int mdt_connect_internal(struct obd_export *exp,
                                 struct mdt_device *mdt,
                                 struct obd_connect_data *data)
 {
+        __u64 flags;
+
         if (data != NULL) {
                 data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED;
                 data->ocd_ibits_known &= MDS_INODELOCK_FULL;
@@ -3218,6 +3439,14 @@ static int mdt_connect_internal(struct obd_export *exp,
                       mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
                 return -EBADE;
         }
+
+        flags = OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT;
+        if ((exp->exp_connect_flags & flags) == flags) {
+                CWARN("%s: both local and remote client flags are set\n",
+                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
+                return -EBADE;
+        }
+
         return 0;
 }
 
@@ -3341,6 +3570,8 @@ static int mdt_destroy_export(struct obd_export *export)
         ENTRY;
 
         med = &export->exp_mdt_data;
+        if (med->med_rmtclient)
+                mdt_cleanup_idmap(med);
 
         target_destroy_export(export);
 
@@ -3419,7 +3650,7 @@ static int mdt_upcall(const struct lu_context *ctx, struct md_device *md,
                 case MD_LOV_SYNC:
                         rc = next->md_ops->mdo_maxsize_get(ctx, next,
                                         &m->mdt_max_mdsize,
-                                        &m->mdt_max_cookiesize);
+                                        &m->mdt_max_cookiesize, NULL);
                         CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n",
                                      m->mdt_max_mdsize, m->mdt_max_cookiesize);
                         break;
diff --git a/lustre/mdt/mdt_identity.c b/lustre/mdt/mdt_identity.c
new file mode 100644 (file)
index 0000000..17f78b8
--- /dev/null
@@ -0,0 +1,307 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *   Author: Fan Yong <fanyong@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/kmod.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/version.h>
+#include <linux/unistd.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <asm/segment.h>
+
+#include <libcfs/kp30.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_lib.h>
+#include <lustre_ucache.h>
+
+#include "mdt_internal.h"
+
+static void mdt_identity_entry_init(struct upcall_cache_entry *entry,
+                                    void *unused)
+{
+        entry->u.identity.mi_uc_entry = entry;
+}
+
+static void mdt_identity_entry_free(struct upcall_cache *cache,
+                                    struct upcall_cache_entry *entry)
+{
+        struct mdt_identity *identity = &entry->u.identity;
+
+        if (identity->mi_ginfo)
+                groups_free(identity->mi_ginfo);
+
+        if (identity->mi_nperms) {
+                LASSERT(identity->mi_perms);
+                OBD_FREE(identity->mi_perms,
+                         identity->mi_nperms * sizeof(struct mdt_setxid_perm));
+        }
+}
+
+static int mdt_identity_do_upcall(struct upcall_cache *cache,
+                                  struct upcall_cache_entry *entry)
+{
+        char keystr[16];
+        char *argv[] = {
+                  [0] = cache->uc_upcall,
+                  [1] = cache->uc_name,
+                  [2] = keystr,
+                  [3] = NULL
+        };
+        char *envp[] = {
+                  [0] = "HOME=/",
+                  [1] = "PATH=/sbin:/usr/sbin",
+                  [2] = NULL
+        };
+        int rc;
+        ENTRY;
+
+        snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key);
+
+        LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!");
+        CDEBUG(D_INFO, "The upcall is: %s \n", cache->uc_upcall);
+
+        rc = USERMODEHELPER(argv[0], argv, envp);
+        if (rc < 0) {
+                CERROR("%s: error invoking upcall %s %s %s: rc %d; "
+                       "check /proc/fs/lustre/mdt/%s/identity_upcall\n",
+                       cache->uc_name, argv[0], argv[1], argv[2], rc,
+                       cache->uc_name);
+        } else {
+                CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", cache->uc_name,
+                       argv[0], argv[1], argv[2]);
+                rc = 0;
+        }
+        RETURN(rc);
+}
+
+static int mdt_identity_parse_downcall(struct upcall_cache *cache,
+                                       struct upcall_cache_entry *entry,
+                                       void *args)
+{
+        struct mdt_identity *identity = &entry->u.identity;
+        struct identity_downcall_data *data = args;
+        struct group_info *ginfo;
+        struct mdt_setxid_perm *perms = NULL;
+        int size, i;
+        ENTRY;
+
+        LASSERT(data);
+        if (data->idd_ngroups > NGROUPS_MAX)
+                return -E2BIG;
+
+        ginfo = groups_alloc(data->idd_ngroups);
+        if (!ginfo) {
+                CERROR("failed to alloc %d groups\n", data->idd_ngroups);
+                RETURN(-ENOMEM);
+        }
+
+        groups_from_list(ginfo, data->idd_groups);
+        groups_sort(ginfo);
+        identity->mi_ginfo = ginfo;
+
+        if (data->idd_nperms) {
+                size = data->idd_nperms * sizeof(*perms);
+                OBD_ALLOC(perms, size);
+                if (!perms) {
+                        CERROR("failed to alloc %d permissions\n",
+                               data->idd_nperms);
+                        put_group_info(ginfo);
+                        RETURN(-ENOMEM);
+                }
+                for (i = 0; i < data->idd_nperms; i++) {
+                        perms[i].mp_nid = data->idd_perms[i].pdd_nid;
+                        perms[i].mp_perm = data->idd_perms[i].pdd_perm;
+                }
+        }
+
+        identity->mi_uid = data->idd_uid;
+        identity->mi_gid = data->idd_gid;
+        identity->mi_ginfo = ginfo;
+        identity->mi_nperms = data->idd_nperms;
+        identity->mi_perms = perms;
+
+        CDEBUG(D_OTHER, "parse mdt identity@%p: %d:%d, ngroups %u, nperms %u\n",
+               identity, identity->mi_uid, identity->mi_gid,
+               identity->mi_ginfo->ngroups, identity->mi_nperms);
+
+        RETURN(0);
+}
+
+struct mdt_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid)
+{
+        struct upcall_cache_entry *entry;
+
+        entry = upcall_cache_get_entry(cache, (__u64)uid, NULL);
+        if (IS_ERR(entry)) {
+                CERROR("upcall_cache_get_entry failed: %ld\n", PTR_ERR(entry));
+                return NULL;
+        }
+
+        return &entry->u.identity;
+}
+
+#if 0
+struct mdt_identity *mdt_identity_get(struct mdt_thread_info *info,
+                                      struct upcall_cache *cache, __u32 uid)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct lvfs_run_ctxt saved;
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct upcall_cache_entry *entry;
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, &info->mti_uc);
+        entry = upcall_cache_get_entry(cache, (__u64)uid, NULL);
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &info->mti_uc);
+        if (IS_ERR(entry)) {
+                CERROR("upcall_cache_get_entry failed: %ld\n", PTR_ERR(entry));
+                return NULL;
+        }
+
+        return &entry->u.identity;
+}
+#endif
+
+void mdt_identity_put(struct upcall_cache *cache, struct mdt_identity *identity)
+{
+        LASSERT(identity);
+        upcall_cache_put_entry(cache, identity->mi_uc_entry);
+}
+
+struct upcall_cache_ops mdt_identity_upcall_cache_ops = {
+        .init_entry     = mdt_identity_entry_init,
+        .free_entry     = mdt_identity_entry_free,
+        .do_upcall      = mdt_identity_do_upcall,
+        .parse_downcall = mdt_identity_parse_downcall,
+};
+
+void mdt_flush_identity(struct upcall_cache *cache, __u32 uid)
+{
+        if (uid == -1)
+                upcall_cache_flush_idle(cache);
+        else
+                upcall_cache_flush_one(cache, (__u64)uid, NULL);
+}
+
+__u32 mdt_identity_get_setxid_perm(struct mdt_identity *identity,
+                                   __u32 is_rmtclient, lnet_nid_t nid)
+{
+        struct mdt_setxid_perm *perm = identity->mi_perms;
+        int i;
+
+        for (i = 0; i < identity->mi_nperms; i++) {
+                if ((perm[i].mp_nid != LNET_NID_ANY) && (perm[i].mp_nid != nid))
+                        continue;
+                return perm[i].mp_perm;
+        }
+
+        /* default */
+        return is_rmtclient ? 0 : LUSTRE_SETGRP_PERM;
+}
+
+int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o,
+                         void *buf)
+{
+        struct ptlrpc_request   *req = mdt_info_req(info);
+        struct md_ucred         *uc = &info->mti_uc;
+        struct md_object        *next = mdt_object_child(o);
+        struct mdt_export_data  *med = mdt_req2med(req);
+        struct mdt_remote_perm  *perm = buf;
+
+        ENTRY;
+
+        /* remote client request always pack ptlrpc_user_desc! */
+        LASSERT(perm);
+
+        if (!med->med_rmtclient)
+                RETURN(-EBADE);
+
+        perm->rp_uid = uc->mu_o_uid;
+        perm->rp_gid = uc->mu_o_gid;
+        perm->rp_fsuid = uc->mu_o_fsuid;
+        perm->rp_fsgid = uc->mu_o_fsgid;
+
+        perm->rp_access_perm = 0;
+        if (mo_permission(info->mti_ctxt, next, MAY_READ, &info->mti_uc) == 0)
+                perm->rp_access_perm |= MAY_READ;
+        if (mo_permission(info->mti_ctxt, next, MAY_WRITE, &info->mti_uc) == 0)
+                perm->rp_access_perm |= MAY_WRITE;
+        if (mo_permission(info->mti_ctxt, next, MAY_EXEC, &info->mti_uc) == 0)
+                perm->rp_access_perm |= MAY_EXEC;
+
+        RETURN(0);
+}
+
+#if 0
+int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o,
+                         void *buf)
+{
+        struct ptlrpc_request   *req = mdt_info_req(info);
+        struct lvfs_ucred       *uc = &info->mti_uc;
+        struct md_object        *next = mdt_object_child(o);
+        struct mdt_export_data  *med = mdt_req2med(req);
+        struct ptlrpc_user_desc *pud = req->rq_user_desc;
+        struct mdt_remote_perm  *perm = buf;
+        int                     rc;
+        ENTRY;
+
+        /* remote client request always pack ptlrpc_user_desc! */
+        LASSERT(pud);
+        LASSERT(perm);
+
+        if (!med->med_rmtclient)
+                RETURN(-EBADE);
+
+        perm->rp_uid = pud->pud_uid;
+        perm->rp_gid = pud->pud_gid;
+        perm->rp_fsuid = pud->pud_fsuid;
+        perm->rp_fsgid = pud->pud_fsgid;
+
+        rc = mdt_remote_perm_reverse_idmap(req, perm);
+        if (rc)
+                RETURN(rc);
+
+        return mo_permission(ctxt, &info->mti_uc, next,
+                             (MAY_EXEC | MAY_WRITE | MAY_READ),
+                             &perm->rp_access_perm);
+}
+#endif
diff --git a/lustre/mdt/mdt_idmap.c b/lustre/mdt/mdt_idmap.c
new file mode 100644 (file)
index 0000000..88f67fe
--- /dev/null
@@ -0,0 +1,739 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *   Author: Fan Yong <fanyong@clusterfs.com>
+
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/kmod.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/version.h>
+#include <linux/unistd.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <asm/segment.h>
+
+#include <libcfs/kp30.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+#include <lustre_lib.h>
+#include <lustre_ucache.h>
+
+#include "mdt_internal.h"
+
+enum {
+        MDT_IDMAP_NOTFOUND      = -1,
+};
+
+struct mdt_idmap_entry {
+        struct list_head mie_rmt_hash; /* hashed as mie_rmt_id; */
+        struct list_head mie_lcl_hash; /* hashed as mie_lcl_id; */
+        int              mie_refcount;
+        uid_t            mie_rmt_id;   /* remote uid/gid */
+        uid_t            mie_lcl_id;   /* local uid/gid */
+};
+
+/* uid/gid mapping */
+static struct mdt_idmap_table *mdt_idmap_alloc(void)
+{
+        struct mdt_idmap_table *tbl;
+        int i, j;
+
+        OBD_ALLOC_PTR(tbl);
+        if (!tbl)
+                return NULL;
+
+        spin_lock_init(&tbl->mit_lock);
+        for (i = 0; i < ARRAY_SIZE(tbl->mit_idmaps); i++)
+                for (j = 0; j < ARRAY_SIZE(tbl->mit_idmaps[i]); j++)
+                        INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]);
+
+        return tbl;
+}
+
+static struct mdt_idmap_entry *idmap_entry_alloc(__u32 mie_rmt_id,
+                                                 __u32 mie_lcl_id)
+{
+        struct mdt_idmap_entry *e;
+
+        OBD_ALLOC_PTR(e);
+        if (!e)
+                return NULL;
+
+        INIT_LIST_HEAD(&e->mie_rmt_hash);
+        INIT_LIST_HEAD(&e->mie_lcl_hash);
+        e->mie_refcount = 1;
+        e->mie_rmt_id = mie_rmt_id;
+        e->mie_lcl_id = mie_lcl_id;
+
+        return e;
+}
+
+static void idmap_entry_free(struct mdt_idmap_entry *e)
+{
+        if (!list_empty(&e->mie_rmt_hash))
+                list_del(&e->mie_rmt_hash);
+        if (!list_empty(&e->mie_lcl_hash))
+                list_del(&e->mie_lcl_hash);
+        OBD_FREE_PTR(e);
+}
+
+int mdt_init_idmap(struct mdt_thread_info *info)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        char *client = libcfs_nid2str(req->rq_peer.nid);
+        struct mdt_export_data *med = mdt_req2med(req);
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct obd_connect_data *data, *reply;
+        int rc = 0, remote;
+        ENTRY;
+
+        data = req_capsule_client_get(&info->mti_pill, &RMF_CONNECT_DATA);
+        reply = req_capsule_server_get(&info->mti_pill, &RMF_CONNECT_DATA);
+        if (data == NULL || reply == NULL)
+                RETURN(-EFAULT);
+
+        remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT;
+
+        if (req->rq_auth_uid == INVALID_UID) {
+                if (remote)
+                        CWARN("client %s -> target %s null sec is used, force "
+                              "to be local!\n", client, obd->obd_name);
+        } else {
+                if (remote) {
+                        if (!req->rq_auth_remote)
+                                CWARN("client %s -> target %s local realm asked"
+                                      " to be remote!\n",
+                                      client, obd->obd_name);
+                        med->med_rmtclient = 1;
+                        med->med_nllu = data->ocd_nllu;
+                        med->med_nllg = data->ocd_nllg;
+                } else if (req->rq_auth_remote) {
+                        CWARN("client %s -> target %s remote realm asked to be "
+                              "local!\n", client, obd->obd_name);
+                }
+        }
+
+        if (med->med_rmtclient) {
+                if (!med->med_idmap)
+                        med->med_idmap = mdt_idmap_alloc();
+                if (!med->med_idmap) {
+                        CERROR("client %s -> target %s failed to alloc idmap!\n"
+                               , client, obd->obd_name);
+                        RETURN(-ENOMEM);
+                }
+
+                reply->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT;
+                reply->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT;
+                CDEBUG(D_SEC, "client %s -> target %s is remote.\n",
+                       client, obd->obd_name);
+
+                /* NB, MDT_CONNECT establish root idmap too! */
+                rc = mdt_handle_idmap(info);
+        } else {
+                reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT;
+                reply->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT;
+        }
+
+        RETURN(rc);
+}
+
+static void idmap_clear_mie_rmt_hash(struct list_head *list)
+{
+        struct mdt_idmap_entry *e;
+        int i;
+
+        for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) {
+                while (!list_empty(&list[i])) {
+                        e = list_entry(list[i].next, struct mdt_idmap_entry,
+                                       mie_rmt_hash);
+                        idmap_entry_free(e);
+                }
+        }
+}
+
+void mdt_cleanup_idmap(struct mdt_export_data *med)
+{
+        struct mdt_idmap_table *tbl = med->med_idmap;
+        int i;
+
+        LASSERT(med->med_rmtclient);
+        LASSERT(tbl);
+
+        spin_lock(&tbl->mit_lock);
+        idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_UIDMAP_IDX]);
+        idmap_clear_mie_rmt_hash(tbl->mit_idmaps[RMT_GIDMAP_IDX]);
+
+        /* paranoid checking */
+        for (i = 0; i < MDT_IDMAP_HASHSIZE; i++) {
+                LASSERT(list_empty(&tbl->mit_idmaps[LCL_UIDMAP_IDX][i]));
+                LASSERT(list_empty(&tbl->mit_idmaps[LCL_GIDMAP_IDX][i]));
+        }
+        spin_unlock(&tbl->mit_lock);
+
+        OBD_FREE_PTR(tbl);
+        med->med_idmap = NULL;
+}
+
+static inline void mdd_revoke_export_locks(struct obd_export *exp)
+{
+        if (!exp->exp_mdt_data.med_rmtclient)
+                return;
+
+        /* don't revoke locks during recovery */
+        if (exp->exp_obd->obd_recovering)
+                return;
+
+        ldlm_revoke_export_locks(exp);
+}
+
+static
+struct mdt_idmap_entry *idmap_search_entry(struct list_head *mie_rmt_hash,
+                                           uid_t mie_rmt_id, uid_t mie_lcl_id,
+                                           const char *warn_msg)
+{
+        struct list_head *rmt_head =
+                         &mie_rmt_hash[MDT_IDMAP_HASHFUNC(mie_rmt_id)];
+        struct mdt_idmap_entry *e;
+
+        list_for_each_entry(e, rmt_head, mie_rmt_hash) {
+                if ((e->mie_rmt_id == mie_rmt_id) &&
+                    (e->mie_lcl_id == mie_lcl_id)) {
+                        e->mie_refcount++;
+                        return e;
+                }
+                if ((e->mie_rmt_id == mie_rmt_id) && warn_msg)
+                        CWARN("%s: rmt id %u already map to %u (new %u)\n",
+                              warn_msg, e->mie_rmt_id, e->mie_lcl_id,
+                              mie_lcl_id);
+                if ((e->mie_lcl_id == mie_lcl_id) && warn_msg)
+                        CWARN("%s: lcl id %u already be mapped from %u "
+                              "(new %u)\n", warn_msg,
+                              e->mie_lcl_id, e->mie_rmt_id, mie_rmt_id);
+        }
+        return NULL;
+}
+
+static int idmap_insert_entry(struct list_head *mie_rmt_hash,
+                              struct list_head *mie_lcl_hash,
+                              struct mdt_idmap_entry *new,
+                              const char *warn_msg)
+{
+        struct list_head *rmt_head =
+                         &mie_rmt_hash[MDT_IDMAP_HASHFUNC(new->mie_rmt_id)];
+        struct list_head *lcl_head =
+                         &mie_lcl_hash[MDT_IDMAP_HASHFUNC(new->mie_lcl_id)];
+        struct mdt_idmap_entry *e;
+
+        e = idmap_search_entry(mie_rmt_hash,
+                               new->mie_rmt_id, new->mie_lcl_id, warn_msg);
+        if (!e) {
+                list_add_tail(&new->mie_rmt_hash, rmt_head);
+                list_add_tail(&new->mie_lcl_hash, lcl_head);
+                return 0;
+        } else {
+                return 1; 
+        }
+}
+
+static int idmap_remove_entry(struct list_head *mie_rmt_hash,
+                              struct list_head *mie_lcl_hash,
+                              __u32 mie_rmt_id, __u32 mie_lcl_id)
+{
+        struct mdt_idmap_entry *e;
+
+        e = idmap_search_entry(mie_rmt_hash,
+                               mie_rmt_id, mie_lcl_id, NULL);
+        if (e) {
+                        e->mie_refcount -= 2;
+                        if (e->mie_refcount <= 0) {
+                                list_del(&e->mie_rmt_hash);
+                                list_del(&e->mie_lcl_hash);
+                                OBD_FREE_PTR(e);
+                                return 0;
+                        } else {
+                                return 1;
+                        }
+        } else {
+                return -ENOENT;
+        }
+}
+
+static int mdt_idmap_add(struct mdt_idmap_table *tbl,
+                         uid_t ruid, uid_t luid,
+                         gid_t rgid, gid_t lgid)
+{
+        struct mdt_idmap_entry *ue, *ge;
+        ENTRY;
+
+        LASSERT(tbl);
+
+        spin_lock(&tbl->mit_lock);
+        ue = idmap_search_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX],
+                                ruid, luid, "UID mapping");
+        spin_unlock(&tbl->mit_lock);
+        if (!ue) {
+                ue = idmap_entry_alloc(ruid, luid);
+                if (!ue)
+                        RETURN(-ENOMEM);
+
+                spin_lock(&tbl->mit_lock);
+                if (idmap_insert_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX],
+                                       tbl->mit_idmaps[LCL_UIDMAP_IDX],
+                                       ue, "UID mapping"))
+                        idmap_entry_free(ue);
+                spin_unlock(&tbl->mit_lock);
+        }
+
+        spin_lock(&tbl->mit_lock);
+        ge = idmap_search_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX],
+                                rgid, lgid, "GID mapping");
+        spin_unlock(&tbl->mit_lock);
+        if (!ge) {
+                ge = idmap_entry_alloc(rgid, lgid);
+                spin_lock(&tbl->mit_lock);
+                if (!ge) {
+                        ue->mie_refcount--;
+                        if (ue->mie_refcount <= 0) {
+                                list_del(&ue->mie_rmt_hash);
+                                list_del(&ue->mie_lcl_hash);
+                                OBD_FREE_PTR(ue);
+                        }
+                        spin_unlock(&tbl->mit_lock);
+                        RETURN(-ENOMEM);
+                }
+
+                if (idmap_insert_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX],
+                                       tbl->mit_idmaps[LCL_GIDMAP_IDX],
+                                       ge, "GID mapping"))
+                        idmap_entry_free(ge);
+                spin_unlock(&tbl->mit_lock);
+        }
+
+        RETURN(0);
+}
+
+static int mdt_idmap_del(struct mdt_idmap_table *tbl,
+                         uid_t ruid, uid_t luid,
+                         gid_t rgid, gid_t lgid)
+{
+        ENTRY;
+
+        if (!tbl)
+                RETURN(0);
+
+        spin_lock(&tbl->mit_lock);
+        idmap_remove_entry(tbl->mit_idmaps[RMT_UIDMAP_IDX],
+                           tbl->mit_idmaps[LCL_UIDMAP_IDX],
+                           ruid, luid);
+        idmap_remove_entry(tbl->mit_idmaps[RMT_GIDMAP_IDX],
+                           tbl->mit_idmaps[LCL_GIDMAP_IDX],
+                           rgid, lgid);
+        spin_unlock(&tbl->mit_lock);
+
+        RETURN(0);
+}
+
+int mdt_handle_idmap(struct mdt_thread_info *info)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct mdt_device *mdt = info->mti_mdt;
+        struct mdt_export_data *med;
+        struct ptlrpc_user_desc *pud = req->rq_user_desc;
+        struct mdt_identity *identity;
+        __u32 opc;
+        int rc = 0;
+
+        ENTRY;
+
+        if (!req->rq_export)
+                RETURN(0);
+
+        med = mdt_req2med(req);
+        if (!med->med_rmtclient)
+                RETURN(0);
+
+        if (req->rq_auth_usr_mdt)
+                RETURN(0);
+
+        opc = lustre_msg_get_opc(req->rq_reqmsg);
+        /* Bypass other opc */
+        if ((opc != SEC_CTX_INIT) && (opc != SEC_CTX_INIT_CONT) &&
+            (opc != SEC_CTX_FINI) && (opc != MDS_CONNECT))
+                RETURN(0);
+
+        LASSERT(pud);
+        LASSERT(med->med_idmap);
+
+        if (mdt->no_gss_support) {
+                CWARN("The server is running with no GSS support now! "
+                      "and don't permit remote client to access!\n");
+                RETURN(-EACCES);
+        }
+
+        if (req->rq_auth_mapped_uid == INVALID_UID) {
+                CERROR("invalid authorized mapped uid, please check "
+                       "/etc/lustre/idmap.conf!\n");
+                RETURN(-EACCES);
+        }
+
+        identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                    req->rq_auth_mapped_uid);
+        if (!identity) {
+                CERROR("can't get mdt identity(%u), no mapping added\n",
+                       req->rq_auth_mapped_uid);
+                RETURN(-EACCES);
+        }
+
+        switch (opc) {
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case MDS_CONNECT:
+                rc = mdt_idmap_add(med->med_idmap,
+                                   pud->pud_uid, identity->mi_uid,
+                                   pud->pud_gid, identity->mi_gid);
+                break;
+        case SEC_CTX_FINI:
+                rc = mdt_idmap_del(med->med_idmap,
+                                   pud->pud_uid, identity->mi_uid,
+                                   pud->pud_gid, identity->mi_gid);
+                break;
+        }
+
+        mdt_identity_put(mdt->mdt_identity_cache, identity);
+
+        if (rc)
+                RETURN(rc);
+
+        switch (opc) {
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case SEC_CTX_FINI:
+                mdd_revoke_export_locks(req->rq_export);
+                break;
+        }
+        RETURN(0);
+}
+
+static __u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id)
+{
+        struct list_head *head = &hash[MDT_IDMAP_HASHFUNC(id)];
+        struct mdt_idmap_entry *e;
+
+        if (!reverse) {
+                list_for_each_entry(e, head, mie_rmt_hash) {
+                        if (e->mie_rmt_id == id)
+                                return e->mie_lcl_id;
+                }
+        } else {
+                list_for_each_entry(e, head, mie_lcl_hash) {
+                        if (e->mie_lcl_id == id)
+                                return e->mie_rmt_id;
+                }
+        }
+        return MDT_IDMAP_NOTFOUND;
+}
+
+static int mdt_idmap_lookup_uid(struct mdt_idmap_table *tbl, int reverse,
+                                uid_t uid)
+{
+        struct list_head *hash;
+
+        if (!tbl)
+                return MDT_IDMAP_NOTFOUND;
+
+        hash = tbl->mit_idmaps[reverse ? LCL_UIDMAP_IDX : RMT_UIDMAP_IDX];
+
+        spin_lock(&tbl->mit_lock);
+        uid = idmap_lookup_id(hash, reverse, uid);
+        spin_unlock(&tbl->mit_lock);
+
+        return uid;
+}
+
+static int mdt_idmap_lookup_gid(struct mdt_idmap_table *tbl, int reverse,
+                                gid_t gid)
+{
+        struct list_head *hash;
+
+        if (!tbl)
+                return MDT_IDMAP_NOTFOUND;
+
+        hash = tbl->mit_idmaps[reverse ? LCL_GIDMAP_IDX : RMT_GIDMAP_IDX];
+
+        spin_lock(&tbl->mit_lock);
+        gid = idmap_lookup_id(hash, reverse, gid);
+        spin_unlock(&tbl->mit_lock);
+
+        return gid;
+}
+
+int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *req,
+                              struct ptlrpc_user_desc *pud)
+{
+        struct mdt_export_data *med = mdt_req2med(req);
+        struct mdt_idmap_table *idmap = med->med_idmap;
+        uid_t uid, fsuid;
+        gid_t gid, fsgid;
+
+        /* Only remote client need desc_to_idmap. */
+        if (!med->med_rmtclient)
+                return 0;
+
+        if (req->rq_auth_usr_mdt)
+                return 0;
+
+        uid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_uid);
+        if (uid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for uid %u\n", pud->pud_uid);
+                return -EACCES;
+        }
+
+        if (pud->pud_uid == pud->pud_fsuid) {
+                fsuid = uid;
+        } else {
+                fsuid = mdt_idmap_lookup_uid(idmap, 0, pud->pud_fsuid);
+                if (fsuid == MDT_IDMAP_NOTFOUND) {
+                        CERROR("no mapping for fsuid %u\n", pud->pud_fsuid);
+                        return -EACCES;
+                }
+        }
+
+        gid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_gid);
+        if (gid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for gid %u\n", pud->pud_gid);
+                return -EACCES;
+        }
+
+        if (pud->pud_gid == pud->pud_fsgid) {
+                fsgid = gid;
+        } else {
+                fsgid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_fsgid);
+                if (fsgid == MDT_IDMAP_NOTFOUND) {
+                        CERROR("no mapping for fsgid %u\n", pud->pud_fsgid);
+                        return -EACCES;
+                }
+        }
+
+        pud->pud_uid = uid;
+        pud->pud_gid = gid;
+        pud->pud_fsuid = fsuid;
+        pud->pud_fsgid = fsgid;
+
+#if 0
+        /* remote client doesn't support setgroups */
+        if (med->med_rmtclient)
+                return 0;
+
+        for (i = 0; i < pud->pud_ngroups; i++) {
+                gid = mdt_idmap_lookup_gid(idmap, 0, pud->pud_groups[i]);
+                if (gid == MDT_IDMAP_NOTFOUND) {
+                        CERROR("no mapping for gid %u\n", pud->pud_gid);
+                        return -EACCES;
+                }
+                pud->pud_groups[i] = gid;
+        }
+#endif
+
+        return 0;
+}
+
+/* reverse map */
+void mdt_body_reverse_idmap(struct mdt_thread_info *info, struct mdt_body *body)
+{
+        struct ptlrpc_request   *req = mdt_info_req(info);
+        struct md_ucred         *uc = &info->mti_uc;
+        struct mdt_export_data  *med = mdt_req2med(req);
+        struct mdt_idmap_table  *idmap = med->med_idmap;
+        uid_t uid;
+        gid_t gid;
+
+        if (!med->med_rmtclient)
+                return;
+
+        if (req->rq_auth_usr_mdt)
+                return;
+
+        if (body->valid & OBD_MD_FLUID) {
+                if (body->uid == uc->mu_uid)
+                        uid = uc->mu_o_uid;
+                else if (body->uid == uc->mu_fsuid)
+                        uid = uc->mu_o_fsuid;
+                else
+                        uid = mdt_idmap_lookup_uid(idmap, 1, body->uid);
+
+                if (uid == MDT_IDMAP_NOTFOUND) {
+                        uid = med->med_nllu;
+                        if (body->valid & OBD_MD_FLMODE)
+                                body->mode = (body->mode & ~S_IRWXU) |
+                                             ((body->mode & S_IRWXO) << 6);
+                }
+
+                body->uid = uid;
+        }
+
+        if (body->valid & OBD_MD_FLGID) {
+                if (body->gid == uc->mu_gid)
+                        gid = uc->mu_o_gid;
+                else if (body->gid == uc->mu_fsgid)
+                        gid = uc->mu_o_fsgid;
+                else
+                        gid = mdt_idmap_lookup_gid(idmap, 1, body->gid);
+
+                if (gid == MDT_IDMAP_NOTFOUND) {
+                        gid = med->med_nllg;
+                        if (body->valid & OBD_MD_FLMODE)
+                                body->mode = (body->mode & ~S_IRWXG) |
+                                             ((body->mode & S_IRWXO) << 3);
+                }
+
+                body->gid = gid;
+        }
+}
+
+/* NB: return error if no mapping, so this will look strange:
+ * if client hasn't kinit the to map xid for the mapped xid, client
+ * will always get -EPERM, and the same for rootsquash case. */
+int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *req,
+                                  struct mdt_remote_perm *perm)
+{
+        struct mdt_export_data *med = mdt_req2med(req);
+        uid_t uid, fsuid;
+        gid_t gid, fsgid;
+
+        LASSERT(med->med_rmtclient);
+
+        if (req->rq_auth_usr_mdt)
+                return 0;
+
+        uid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_uid);
+        if (uid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for uid %u\n", perm->rp_uid);
+                return -EPERM;
+        }
+
+        gid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_gid);
+        if (gid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for gid %u\n", perm->rp_gid);
+                return -EPERM;
+        }
+
+        fsuid = mdt_idmap_lookup_uid(med->med_idmap, 1, perm->rp_fsuid);
+        if (fsuid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for fsuid %u\n", perm->rp_fsuid);
+                return -EPERM;
+        }
+
+        fsgid = mdt_idmap_lookup_gid(med->med_idmap, 1, perm->rp_fsgid);
+        if (fsgid == MDT_IDMAP_NOTFOUND) {
+                CERROR("no mapping for fsgid %u\n", perm->rp_fsgid);
+                return -EPERM;
+        }
+
+        perm->rp_uid = uid;
+        perm->rp_gid = gid;
+        perm->rp_fsuid = fsuid;
+        perm->rp_fsgid = fsgid;
+        return 0;
+}
+
+int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op)
+{
+        struct ptlrpc_request   *req = mdt_info_req(info);
+        struct md_ucred         *uc = &info->mti_uc;
+        struct lu_attr          *attr = &info->mti_attr.ma_attr;
+        struct mdt_export_data  *med = mdt_req2med(req);
+        struct mdt_idmap_table  *idmap = med->med_idmap;
+
+        ENTRY;
+
+        if (!med->med_rmtclient)
+                RETURN(0);
+
+        if (req->rq_auth_usr_mdt)
+                RETURN(0);
+
+        if (op != REINT_SETATTR) {
+                if ((attr->la_valid & LA_UID) && (attr->la_uid != -1))
+                        attr->la_uid = uc->mu_fsuid;
+                if ((attr->la_valid & LA_GID) && (attr->la_gid != -1))
+                        attr->la_gid = uc->mu_fsgid;
+        } else {
+                /* NB: -1 case will be handled by mdt_fix_attr() later. */
+                if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) {
+                        uid_t uid;
+
+                        if (attr->la_uid == uc->mu_o_uid)
+                                uid = uc->mu_uid;
+                        else if (attr->la_uid == uc->mu_o_fsuid)
+                                uid = uc->mu_fsuid;
+                        else
+                                uid = mdt_idmap_lookup_uid(idmap, 0,
+                                                           attr->la_uid);
+
+                        if (uid == MDT_IDMAP_NOTFOUND) {
+                                CWARN("Deny chown to uid %u\n", attr->la_uid);
+                                RETURN(-EPERM);
+                        }
+
+                        attr->la_uid = uid;
+                }
+                if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) {
+                        gid_t gid;
+
+                        if (attr->la_gid == uc->mu_o_gid)
+                                gid = uc->mu_gid;
+                        else if (attr->la_gid == uc->mu_o_fsgid)
+                                gid = uc->mu_fsgid;
+                        else
+                                gid = mdt_idmap_lookup_gid(idmap, 0,
+                                                           attr->la_gid);
+
+                        if (gid == MDT_IDMAP_NOTFOUND) {
+                                CWARN("Deny chown to gid %u\n", attr->la_gid);
+                                RETURN(-EPERM);
+                        }
+
+                        attr->la_gid = gid;
+                }
+        }
+
+        RETURN(0);
+}
index 627d838..d37743a 100644 (file)
@@ -53,6 +53,8 @@
 #include <lustre_req_layout.h>
 /* LR_CLIENT_SIZE, etc. */
 #include <lustre_disk.h>
+#include <lustre_sec.h>
+#include <lvfs.h>
 
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
@@ -159,6 +161,13 @@ struct mdt_device {
         struct mdt_server_data     mdt_msd;
         spinlock_t                 mdt_client_bitmap_lock;
         unsigned long              mdt_client_bitmap[(LR_MAX_CLIENTS >> 3) / sizeof(long)];
+
+        struct upcall_cache        *mdt_identity_cache;
+        struct upcall_cache        *mdt_rmtacl_cache;
+
+        /* root squash */
+        struct rootsquash_info     *mdt_rootsquash_info;
+        int                        no_gss_support;
 };
 
 /*XXX copied from mds_internal.h */
@@ -243,6 +252,10 @@ struct mdt_thread_info {
          */
         struct mdt_object         *mti_object;
         /*
+         * User credential.
+         */
+        struct md_ucred            mti_uc;
+        /*
          * Object attributes.
          */
         struct md_attr             mti_attr;
@@ -393,7 +406,8 @@ void mdt_lock_handle_fini(struct mdt_lock_handle *lh);
 
 void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *);
 
-int mdt_fs_setup(const struct lu_context *, struct mdt_device *);
+int mdt_fs_setup(const struct lu_context *, struct mdt_device *,
+                 struct obd_device *);
 void mdt_fs_cleanup(const struct lu_context *, struct mdt_device *);
 
 int mdt_client_del(const struct lu_context *ctxt,
@@ -436,6 +450,57 @@ void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
 
 void mdt_dump_lmm(int level, const struct lov_mds_md *lmm);
 
+int mdt_init_ucred(struct mdt_thread_info *, struct mdt_body *);
+
+int mdt_init_ucred_reint(struct mdt_thread_info *);
+
+void mdt_exit_ucred(struct mdt_thread_info *);
+
+int groups_from_list(struct group_info *, gid_t *);
+
+void groups_sort(struct group_info *);
+
+/* mdt_idmap.c */
+int mdt_init_idmap(struct mdt_thread_info *);
+
+void mdt_cleanup_idmap(struct mdt_export_data *);
+
+int mdt_handle_idmap(struct mdt_thread_info *);
+
+int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *,
+                              struct ptlrpc_user_desc *);
+
+void mdt_body_reverse_idmap(struct mdt_thread_info *,
+                            struct mdt_body *);
+
+int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *,
+                                  struct mdt_remote_perm *);
+
+int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32);
+
+/* mdt/mdt_identity.c */
+#define MDT_IDENTITY_UPCALL_PATH        "/usr/sbin/l_getidentity"
+
+extern struct upcall_cache_ops mdt_identity_upcall_cache_ops;
+
+struct mdt_identity *mdt_identity_get(struct upcall_cache *, __u32);
+
+void mdt_identity_put(struct upcall_cache *, struct mdt_identity *);
+
+void mdt_flush_identity(struct upcall_cache *, __u32);
+
+__u32 mdt_identity_get_setxid_perm(struct mdt_identity *, __u32, lnet_nid_t);
+
+int mdt_pack_remote_perm(struct mdt_thread_info *, struct mdt_object *, void *);
+
+/* mdt/mdt_rmtacl.c */
+#define MDT_RMTACL_UPCALL_PATH          "/usr/sbin/l_facl"
+
+extern struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops;
+
+int mdt_rmtacl_upcall(struct mdt_thread_info *, unsigned long,
+                      char *, char *, int);
+
 extern struct lu_context_key       mdt_thread_key;
 /* debug issues helper starts here*/
 static inline void mdt_fail_write(const struct lu_context *ctx,
@@ -450,6 +515,11 @@ static inline void mdt_fail_write(const struct lu_context *ctx,
         }
 }
 
+static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req)
+{
+        return &req->rq_export->exp_mdt_data;
+}
+
 #define MDT_FAIL_CHECK(id)                                              \
 ({                                                                      \
         if (OBD_FAIL_CHECK(id))                                         \
index 0b5fd93..9de4efe 100644 (file)
 #include "mdt_internal.h"
 
 
+int groups_from_list(struct group_info *ginfo, gid_t *glist)
+{
+        int i;
+        int count = ginfo->ngroups;
+
+        /* fill group_info from gid array */
+        for (i = 0; i < ginfo->nblocks; i++) {
+                int cp_count = min(NGROUPS_PER_BLOCK, count);
+                int off = i * NGROUPS_PER_BLOCK;
+                int len = cp_count * sizeof(*glist);
+
+                if (memcpy(ginfo->blocks[i], glist + off, len))
+                        return -EFAULT;
+
+                count -= cp_count;
+        }
+        return 0;
+}
+
+/* groups_sort() is copied from linux kernel! */
+/* a simple shell-metzner sort */
+void groups_sort(struct group_info *group_info)
+{
+        int base, max, stride;
+        int gidsetsize = group_info->ngroups;
+
+        for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
+                ; /* nothing */
+        stride /= 3;
+
+        while (stride) {
+                max = gidsetsize - stride;
+                for (base = 0; base < max; base++) {
+                        int left = base;
+                        int right = left + stride;
+                        gid_t tmp = GROUP_AT(group_info, right);
+
+                        while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
+                                GROUP_AT(group_info, right) =
+                                    GROUP_AT(group_info, left);
+                                right = left;
+                                left -= stride;
+                        }
+                        GROUP_AT(group_info, right) = tmp;
+                }
+                stride /= 3;
+        }
+}
+
+void mdt_exit_ucred(struct mdt_thread_info *info)
+{
+        struct md_ucred   *uc = &info->mti_uc;
+        struct mdt_device *mdt = info->mti_mdt;
+
+        if (uc->mu_valid != UCRED_INIT) {
+                if (uc->mu_ginfo && (uc->mu_valid != UCRED_OLD)) {
+                        groups_free(uc->mu_ginfo);
+                        uc->mu_ginfo = NULL;
+                }
+                if (uc->mu_identity) {
+                        mdt_identity_put(mdt->mdt_identity_cache,
+                                         uc->mu_identity);
+                        uc->mu_identity = NULL;
+                }
+                uc->mu_valid = UCRED_INIT;
+        }
+}
+
+static int nid_nosquash(struct mdt_device *mdt, lnet_nid_t nid)
+{
+        struct rootsquash_info *rsi = mdt->mdt_rootsquash_info;
+        int i;
+
+        for (i = 0; i < rsi->rsi_n_nosquash_nids; i++)
+                if (rsi->rsi_nosquash_nids[i] == nid ||
+                    rsi->rsi_nosquash_nids[i] == LNET_NID_ANY)
+                        return 1;
+
+        return 0;
+}
+
+/*
+ * FIXME: here we follow simple rule: once uid/fsuid is root, we also squash
+ *        the gid/fsgid, don't care setuid/setgid attributes.
+ *
+ * NB: don't change pud fields in root squash, because xid in pud will be
+ *     packed in remote perm reply.
+ */
+static int mdt_squash_root(struct mdt_device *mdt, struct md_ucred *ucred,
+                           struct ptlrpc_user_desc *pud, lnet_nid_t peernid)
+{
+        struct rootsquash_info *rsi = mdt->mdt_rootsquash_info;
+
+        if (pud->pud_uid && pud->pud_fsuid)
+                return 0;
+
+        if (!rsi || !rsi->rsi_uid || nid_nosquash(mdt, peernid))
+                return 0;
+
+        CDEBUG(D_SEC, "squash req from "LPX64":"
+               "(%u:%u-%u:%u/%x)=>(%u:%u-%u:%u/%x)\n", peernid,
+               pud->pud_uid, pud->pud_gid,
+               pud->pud_fsuid, pud->pud_fsgid, pud->pud_cap,
+               pud->pud_uid ? pud->pud_uid : rsi->rsi_uid,
+               pud->pud_uid ? pud->pud_gid : rsi->rsi_gid,
+               pud->pud_fsuid ? pud->pud_fsuid : rsi->rsi_uid,
+               pud->pud_fsuid ? pud->pud_fsgid : rsi->rsi_gid,
+               pud->pud_cap & ~CAP_FS_MASK);
+
+        if (pud->pud_uid == 0) {
+                ucred->mu_uid = rsi->rsi_uid;
+                ucred->mu_gid = rsi->rsi_gid;
+        } else {
+                ucred->mu_uid = pud->pud_uid;
+                ucred->mu_gid = pud->pud_gid;
+        }
+
+        if (pud->pud_fsuid == 0) {
+                ucred->mu_fsuid = rsi->rsi_uid;
+                ucred->mu_fsgid = rsi->rsi_gid;
+        } else {
+                ucred->mu_fsuid = pud->pud_fsuid;
+                ucred->mu_fsgid = pud->pud_fsgid;
+        }
+
+        ucred->mu_cap &= (pud->pud_cap & ~CAP_FS_MASK);
+
+        return 1;
+}
+
+static int new_init_ucred(struct mdt_thread_info *info)
+{
+        struct ptlrpc_request   *req = mdt_info_req(info);
+        struct mdt_export_data  *med = mdt_req2med(req);
+        struct mdt_device       *mdt = info->mti_mdt;
+        struct ptlrpc_user_desc *pud = req->rq_user_desc;
+        struct md_ucred         *ucred = &info->mti_uc;
+        struct mdt_identity     *identity = NULL;
+        lnet_nid_t              peernid = req->rq_peer.nid;
+        __u32                   setxid_perm = 0;
+        int                     root_squashed = 0;
+        int                     rc = 0;
+
+        ENTRY;
+
+        ucred->mu_valid = UCRED_INVALID;
+
+        if (mdt->no_gss_support && med->med_rmtclient) {
+                CWARN("The server is running with no GSS support now! "
+                      "and don't permit remote client to access!\n");
+                RETURN(-EACCES);
+        }
+
+        if (req->rq_auth_gss && req->rq_auth_uid == INVALID_UID) {
+                CWARN("user not authenticated, deny access!\n");
+                RETURN(-EACCES);
+        }
+
+        ucred->mu_o_uid   = pud->pud_uid;
+        ucred->mu_o_gid   = pud->pud_gid;
+        ucred->mu_o_fsuid = pud->pud_fsuid;
+        ucred->mu_o_fsgid = pud->pud_fsgid;
+
+        /* sanity check: if we use strong authentication, we expect the
+         * uid which client claimed is true */
+        if (req->rq_auth_gss) {
+                if (med->med_rmtclient) {
+                        if (ptlrpc_user_desc_do_idmap(req, pud))
+                                RETURN(-EACCES);
+
+                        if (req->rq_auth_mapped_uid != pud->pud_uid) {
+                                CERROR("remote client "LPU64": auth uid %u "
+                                       "while client claim %u:%u/%u:%u\n",
+                                       peernid, req->rq_auth_uid, pud->pud_uid,
+                                       pud->pud_gid, pud->pud_fsuid,
+                                       pud->pud_fsgid);
+                                RETURN(-EACCES);
+                        }
+                } else {
+                        if (req->rq_auth_uid != pud->pud_uid) {
+                                CERROR("local client "LPU64": auth uid %u "
+                                       "while client claim %u:%u/%u:%u\n",
+                                       peernid, req->rq_auth_uid, pud->pud_uid,
+                                       pud->pud_gid, pud->pud_fsuid,
+                                       pud->pud_fsgid);
+                                RETURN(-EACCES);
+                        }
+                }
+        }
+
+        if (mdt->no_gss_support)
+                goto check_squash;
+
+        identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
+        if (!identity) {
+                CERROR("Deny access without identity: uid %d\n",
+                       ucred->mu_fsuid);
+                RETURN(-EACCES);
+        }
+
+        /* check setuid/setgid permissions */
+        if (!req->rq_auth_usr_mdt) {
+                int setuid, setgid;
+
+                /* find out the setuid/setgid attempt */
+                setuid = (pud->pud_uid != pud->pud_fsuid);
+                setgid = (pud->pud_gid != pud->pud_fsgid ||
+                          pud->pud_gid != identity->mi_gid);
+
+                setxid_perm = mdt_identity_get_setxid_perm(identity,
+                                                           med->med_rmtclient,
+                                                           peernid);
+
+                /* check permission of setuid */
+                if (setuid && !(setxid_perm & LUSTRE_SETUID_PERM)) {
+                        CWARN("mdt blocked setuid attempt (%u -> %u) from "
+                              LPX64"\n", pud->pud_uid, pud->pud_fsuid, peernid);
+                        GOTO(out, rc = -EACCES);
+                }
+
+                /* check permission of setgid */
+                if (setgid && !(setxid_perm & LUSTRE_SETGID_PERM)) {
+                        CWARN("mdt blocked setgid attempt (%u:%u/%u:%u -> %u) "
+                              "from "LPX64"\n", pud->pud_uid, pud->pud_gid,
+                              pud->pud_fsuid, pud->pud_fsgid, identity->mi_gid,
+                              peernid);
+                        GOTO(out, rc = -EACCES);
+                }
+        }
+
+check_squash:
+        /* FIXME: The exact behavior of root_squash is not defined. */
+        if (!req->rq_auth_usr_mdt)
+                root_squashed = mdt_squash_root(mdt, ucred, pud, peernid);
+        if (!root_squashed) {
+                ucred->mu_uid   = pud->pud_uid;
+                ucred->mu_gid   = pud->pud_gid;
+                ucred->mu_fsuid = pud->pud_fsuid;
+                ucred->mu_fsgid = pud->pud_fsgid;
+                ucred->mu_cap   = pud->pud_cap;
+                /* remove fs privilege for non-root user */
+                if (pud->pud_fsuid)
+                        ucred->mu_cap &= ~CAP_FS_MASK;
+        }
+
+        /* by now every fields other than groups have been granted */
+        ucred->mu_identity = identity;
+
+        /* setgroups for local client with LUSTRE_SETGRP_PERM, and no_squash_
+         * root, otherwise install groups from local user supplementary groups.
+         *
+         * NB: remote client not allowed to setgroups anyway.
+         */
+        if (req->rq_auth_usr_mdt ||
+            (pud->pud_ngroups && !med->med_rmtclient && !root_squashed &&
+             (setxid_perm & LUSTRE_SETGRP_PERM))) {
+                struct group_info *ginfo;
+
+                /* setgroups for local client */
+                ginfo = groups_alloc(pud->pud_ngroups);
+                if (!ginfo) {
+                        CERROR("failed to alloc %d groups\n",
+                               pud->pud_ngroups);
+                        GOTO(out, rc = -ENOMEM);
+                }
+                groups_from_list(ginfo, pud->pud_groups);
+                groups_sort(ginfo);
+                ucred->mu_ginfo = ginfo;
+        } else {
+                ucred->mu_ginfo = NULL;
+        }
+
+        ucred->mu_valid = UCRED_NEW;
+
+out:
+        if (rc)
+                mdt_identity_put(mdt->mdt_identity_cache, identity);
+
+        RETURN(rc);
+}
+
+static int old_init_ucred(struct mdt_thread_info *info,
+                                 struct mdt_body *body)
+{
+        struct md_ucred     *uc = &info->mti_uc;
+        struct mdt_device   *mdt = info->mti_mdt;
+        struct mdt_identity *identity = NULL;
+
+        ENTRY;
+
+        uc->mu_valid = UCRED_INVALID;
+
+        if (!mdt->no_gss_support) {
+                /* get identity info of this user */
+                identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                            body->fsuid);
+                if (!identity) {
+                        CERROR("Deny access without identity: uid %d\n",
+                               body->fsuid);
+                        RETURN(-EACCES);
+                }
+        }
+
+        uc->mu_valid = UCRED_OLD;
+        uc->mu_o_uid = body->uid;
+        uc->mu_o_gid = body->gid;
+        uc->mu_o_fsuid = body->fsuid;
+        uc->mu_o_fsgid = body->fsgid;
+        uc->mu_uid = body->uid;
+        uc->mu_gid = body->gid;
+        uc->mu_fsuid = body->fsuid;
+        uc->mu_fsgid = body->fsgid;
+        uc->mu_cap = body->capability;
+        if (identity)
+                uc->mu_ginfo = identity->mi_ginfo;
+        else
+                uc->mu_ginfo = NULL;
+        uc->mu_identity = identity;
+
+        RETURN(0);
+}
+
+int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct md_ucred       *uc = &info->mti_uc;
+
+        if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))
+                return 0;
+
+        mdt_exit_ucred(info);
+
+        /* !rq_user_desc means null security */
+        return req->rq_user_desc ? new_init_ucred(info) :
+                                   old_init_ucred(info, body);
+}
+
+static int old_init_ucred_reint(struct mdt_thread_info *info)
+{
+        struct md_ucred     *uc = &info->mti_uc;
+        struct mdt_device   *mdt = info->mti_mdt;
+        struct mdt_identity *identity = NULL;
+
+        ENTRY;
+
+        uc->mu_valid = UCRED_INVALID;
+
+        if (!mdt->no_gss_support) {
+                /* get identity info of this user */
+                identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                            uc->mu_fsuid);
+                if (!identity) {
+                        CERROR("Deny access without identity: uid %d\n",
+                               uc->mu_fsuid);
+                        RETURN(-EACCES);
+                }
+        }
+
+        uc->mu_valid = UCRED_OLD;
+        uc->mu_o_uid = uc->mu_o_fsuid = uc->mu_uid = uc->mu_fsuid;
+        uc->mu_o_gid = uc->mu_o_fsgid = uc->mu_gid = uc->mu_fsgid;
+        if (identity)
+                uc->mu_ginfo = identity->mi_ginfo;
+        else
+                uc->mu_ginfo = NULL;
+        uc->mu_identity = identity;
+
+        RETURN(0);
+}
+
+int mdt_init_ucred_reint(struct mdt_thread_info *info)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct md_ucred       *uc = &info->mti_uc;
+
+        if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))
+                return 0;
+
+        mdt_exit_ucred(info);
+
+        /* !rq_user_desc means null security */
+        return req->rq_user_desc ? new_init_ucred(info) :
+                                   old_init_ucred_reint(info);
+}
+
 /* copied from lov/lov_ea.c, just for debugging, will be removed later */
 void mdt_dump_lmm(int level, const struct lov_mds_md *lmm)
 {
@@ -99,6 +484,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
 
         if (ma->ma_valid & MA_INODE)
                 mdt_pack_attr2body(repbody, la, mdt_object_fid(mo));
+                mdt_body_reverse_idmap(info, repbody);
 
         if (ma->ma_valid & MA_LOV) {
                 __u32 mode;
@@ -168,6 +554,7 @@ static __u64 mdt_attr_valid_xlate(__u64 in, struct mdt_reint_record *rr,
 
 static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct md_attr          *ma = &info->mti_attr;
         struct lu_attr          *la = &ma->ma_attr;
         struct req_capsule      *pill = &info->mti_pill;
@@ -179,6 +566,12 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
         if (rec == NULL)
                 RETURN(-EFAULT);
 
+        uc->mu_fsuid = rec->sa_fsuid;
+        uc->mu_fsgid = rec->sa_fsgid;
+        uc->mu_uid   = rec->sa_uid;
+        uc->mu_gid   = rec->sa_gid;
+        uc->mu_cap   = rec->sa_cap;
         rr->rr_fid1 = &rec->sa_fid;
         la->la_valid = mdt_attr_valid_xlate(rec->sa_valid, rr, ma);
         la->la_mode  = rec->sa_mode;
@@ -255,6 +648,7 @@ int mdt_close_unpack(struct mdt_thread_info *info)
 
 static int mdt_create_unpack(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct mdt_rec_create   *rec;
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
@@ -264,6 +658,10 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
 
         rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
         if (rec != NULL) {
+                uc->mu_fsuid = rec->cr_fsuid;
+                uc->mu_fsgid = rec->cr_fsgid;
+                uc->mu_cap   = rec->cr_cap;
                 rr->rr_fid1 = &rec->cr_fid1;
                 rr->rr_fid2 = &rec->cr_fid2;
                 attr->la_mode = rec->cr_mode;
@@ -315,6 +713,7 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
 
 static int mdt_link_unpack(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct mdt_rec_link     *rec;
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
@@ -324,6 +723,10 @@ static int mdt_link_unpack(struct mdt_thread_info *info)
 
         rec = req_capsule_client_get(pill, &RMF_REC_LINK);
         if (rec != NULL) {
+                uc->mu_fsuid = rec->lk_fsuid;
+                uc->mu_fsgid = rec->lk_fsgid;
+                uc->mu_cap   = rec->lk_cap;
                 attr->la_uid = rec->lk_fsuid;
                 attr->la_gid = rec->lk_fsgid;
                 rr->rr_fid1 = &rec->lk_fid1;
@@ -341,6 +744,7 @@ static int mdt_link_unpack(struct mdt_thread_info *info)
 
 static int mdt_unlink_unpack(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct mdt_rec_unlink   *rec;
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
@@ -350,6 +754,10 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info)
 
         rec = req_capsule_client_get(pill, &RMF_REC_UNLINK);
         if (rec != NULL) {
+                uc->mu_fsuid = rec->ul_fsuid;
+                uc->mu_fsgid = rec->ul_fsgid;
+                uc->mu_cap   = rec->ul_cap;
                 attr->la_uid = rec->ul_fsuid;
                 attr->la_gid = rec->ul_fsgid;
                 rr->rr_fid1 = &rec->ul_fid1;
@@ -370,6 +778,7 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info)
 
 static int mdt_rename_unpack(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct mdt_rec_rename   *rec;
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
@@ -379,6 +788,10 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
 
         rec = req_capsule_client_get(pill, &RMF_REC_RENAME);
         if (rec != NULL) {
+                uc->mu_fsuid = rec->rn_fsuid;
+                uc->mu_fsgid = rec->rn_fsgid;
+                uc->mu_cap   = rec->rn_cap;
                 attr->la_uid = rec->rn_fsuid;
                 attr->la_gid = rec->rn_fsgid;
                 rr->rr_fid1 = &rec->rn_fid1;
@@ -400,6 +813,7 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
 
 static int mdt_open_unpack(struct mdt_thread_info *info)
 {
+        struct md_ucred         *uc = &info->mti_uc;
         struct mdt_rec_create   *rec;
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct req_capsule      *pill = &info->mti_pill;
@@ -409,6 +823,10 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
 
         rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
         if (rec != NULL) {
+                uc->mu_fsuid = rec->cr_fsuid;
+                uc->mu_fsgid = rec->cr_fsgid;
+                uc->mu_cap   = rec->cr_cap;
                 rr->rr_fid1   = &rec->cr_fid1;
                 rr->rr_fid2   = &rec->cr_fid2;
                 attr->la_mode = rec->cr_mode;
index 1fa7628..41ca490 100644 (file)
@@ -85,8 +85,9 @@ static int mdt_create_data(struct mdt_thread_info *info,
                 RETURN(0);
 
         ma->ma_need = MA_INODE | MA_LOV;
-        rc = mdo_create_data(info->mti_ctxt, p ? mdt_object_child(p) : NULL,
-                             mdt_object_child(o), spec, ma);
+        rc = mdo_create_data(info->mti_ctxt,
+                             p ? mdt_object_child(p) : NULL,
+                             mdt_object_child(o), spec, ma, &info->mti_uc);
         RETURN(rc);
 }
 
@@ -332,6 +333,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
         isdir = S_ISDIR(la->la_mode);
         islnk = S_ISLNK(la->la_mode);
         mdt_pack_attr2body(repbody, la, mdt_object_fid(o));
+        mdt_body_reverse_idmap(info, repbody);
 
         /* if we are following a symlink, don't open; and
          * do not return open handle for special nodes as client required
@@ -339,7 +341,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
         if (islnk || (!isreg && !isdir &&
             (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH))) {
                 lustre_msg_set_transno(req->rq_repmsg, 0);
-                RETURN(0);
+                GOTO(out, rc = 0);
         }
 
         mdt_set_disposition(info, rep, DISP_OPEN_OPEN);
@@ -402,7 +404,9 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
         if (rc)
                 RETURN(rc);
 
-        rc = mo_open(info->mti_ctxt, mdt_object_child(o), flags);
+        rc = mo_open(info->mti_ctxt, mdt_object_child(o),
+                     created ? flags | MDS_OPEN_CREATED : flags,
+                     &info->mti_uc);
         if (rc)
                 RETURN(rc);
         
@@ -427,6 +431,24 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
                 mdt_open_transno(info);
         } else
                 rc = -ENOMEM;
+
+out:
+        if (!rc) {
+                struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+
+                if (med->med_rmtclient) {
+                        void *buf = req_capsule_server_get(&info->mti_pill,
+                                                           &RMF_ACL);
+
+                        rc = mdt_pack_remote_perm(info, o, buf);
+                        if (!rc) {
+                                repbody->valid |= OBD_MD_FLRMTPERM;
+                                repbody->aclsize =
+                                                sizeof(struct mdt_remote_perm);
+                        }
+                }
+        }
+
         RETURN(rc);
 }
 
@@ -462,7 +484,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info,
                 mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
                 GOTO(out, 0);
         }
-
         /*
          * XXX: is this correct to set here transno and status to zero? This is
          * especially suspicious after calling mdt_req_from_mcd() above, which
@@ -470,7 +492,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info,
          */
         lustre_msg_set_transno(req->rq_repmsg, 0);
         lustre_msg_set_status(req->rq_repmsg, 0);
-        
+
         ldlm_rep->lock_policy_res1 = 0;
         ldlm_rep->lock_policy_res2 = 0;
         result = mdt_reint_open(info, lhc);
@@ -502,7 +524,7 @@ static int mdt_open_by_fid(struct mdt_thread_info* info,
                 mdt_set_disposition(info, rep, DISP_IT_EXECD);
                 mdt_set_disposition(info, rep, DISP_LOOKUP_EXECD);
                 mdt_set_disposition(info, rep, DISP_LOOKUP_POS);
-                rc = mo_attr_get(ctxt, mdt_object_child(o), ma);
+                rc = mo_attr_get(ctxt, mdt_object_child(o), ma, &info->mti_uc);
                 if (rc == 0)
                         rc = mdt_mfd_open(info, NULL, o, flags, 0, rep);
         } else if (rc == 0) {
@@ -541,7 +563,8 @@ static int mdt_cross_open(struct mdt_thread_info* info,
 
         rc = lu_object_exists(&o->mot_obj.mo_lu);
         if (rc > 0) {
-                rc = mo_attr_get(info->mti_ctxt, mdt_object_child(o), ma);
+                rc = mo_attr_get(info->mti_ctxt, mdt_object_child(o), ma,
+                                 &info->mti_uc);
                 if (rc == 0)
                         rc = mdt_mfd_open(info, NULL, o, flags, 0, rep);
         } else if (rc == 0) {
@@ -648,7 +671,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                 GOTO(out, result = PTR_ERR(parent));
 
         result = mdo_lookup(info->mti_ctxt, mdt_object_child(parent),
-                            rr->rr_name, child_fid);
+                            rr->rr_name, child_fid, &info->mti_uc);
         if (result != 0 && result != -ENOENT && result != -ESTALE)
                 GOTO(out_parent, result);
 
@@ -686,7 +709,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                                     rr->rr_name,
                                     mdt_object_child(child),
                                     &info->mti_spec,
-                                    &info->mti_attr);
+                                    &info->mti_attr,
+                                    &info->mti_uc);
                 if (result == -ERESTART) {
                         mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE);        
                         GOTO(out_child, result);
@@ -698,8 +722,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                 created = 1;
         } else {
                 /* We have to get attr & lov ea for this object */
-                result = mo_attr_get(info->mti_ctxt, 
-                                     mdt_object_child(child), ma);
+                result = mo_attr_get(info->mti_ctxt, mdt_object_child(child),
+                                     ma, &info->mti_uc);
                 /*
                  * The object is on remote node, return its FID for remote open.
                  */
@@ -743,7 +767,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                         GOTO(out_child, result);
                 }
         }
-        
+
         /* Try to open it now. */
         result = mdt_mfd_open(info, parent, child, create_flags, 
                               created, ldlm_rep);
@@ -753,9 +777,13 @@ finish_open:
         if (result != 0 && created) {
                 int rc2;
                 ma->ma_need = 0;
-                rc2 = mdo_unlink(info->mti_ctxt, mdt_object_child(parent),
-                                     mdt_object_child(child), rr->rr_name,
-                                     &info->mti_attr);
+                ma->ma_cookie_size = 0;
+                rc2 = mdo_unlink(info->mti_ctxt,
+                                 mdt_object_child(parent),
+                                 mdt_object_child(child),
+                                 rr->rr_name,
+                                 &info->mti_attr,
+                                 &info->mti_uc);
                 if (rc2 != 0)
                         CERROR("error in cleanup of open");
         }
@@ -765,6 +793,8 @@ out_parent:
         mdt_object_unlock_put(info, parent, lh, result);
 out:
         mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+        if (result)
+                lustre_msg_set_transno(req->rq_repmsg, 0);
         return result;
 }
 
@@ -800,9 +830,9 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
         ma->ma_need |= MA_INODE;
                 
         if (!MFD_CLOSED(mode))
-                rc = mo_close(info->mti_ctxt, next, ma);
+                rc = mo_close(info->mti_ctxt, next, ma, &info->mti_uc);
         else if (ret == -EAGAIN)
-                rc = mo_attr_get(info->mti_ctxt, next, ma);
+                rc = mo_attr_get(info->mti_ctxt, next, ma, &info->mti_uc);
 
         /* If the object is unlinked, do not try to re-enable SIZEONMDS */
         if ((ret == -EAGAIN) && (ma->ma_valid & MA_INODE) &&
index c879799..54e7e9a 100644 (file)
@@ -880,7 +880,8 @@ static int mdt_txn_commit_cb(const struct lu_context *ctx,
         return 0;
 }
 
-int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt)
+int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt,
+                 struct obd_device *obd)
 {
         struct lu_fid last_fid;
         struct dt_object *last;
@@ -909,6 +910,11 @@ int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt)
                 CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc);
         }
 
+        OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+        obd->obd_lvfs_ctxt.pwdmnt = current->fs->pwdmnt;
+        obd->obd_lvfs_ctxt.pwd = current->fs->pwd;
+        obd->obd_lvfs_ctxt.fs = get_ds();
+
         RETURN (rc);
 }
 
@@ -976,13 +982,14 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti,
 
         body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
         rc = mo_attr_get(mti->mti_ctxt, mdt_object_child(child),
-                         &mti->mti_attr);
+                         &mti->mti_attr, &mti->mti_uc);
         if (rc == -EREMOTE) {
                 /* object was created on remote server */
                 req->rq_status = rc;
                 body->valid |= OBD_MD_MDS;
         }
         mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(child));
+        mdt_body_reverse_idmap(mti, body);
         mdt_object_put(mti->mti_ctxt, child);
 }
 
@@ -1002,8 +1009,10 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti,
         body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
         obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1);
         LASSERT(!IS_ERR(obj));
-        mo_attr_get(mti->mti_ctxt, mdt_object_child(obj), &mti->mti_attr);
+        mo_attr_get(mti->mti_ctxt, mdt_object_child(obj),
+                    &mti->mti_attr, &mti->mti_uc);
         mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(obj));
+        mdt_body_reverse_idmap(mti, body);
 
         /* Don't return OST-specific attributes if we didn't just set them */
 /*
index 7838193..f32fca3 100644 (file)
@@ -67,13 +67,14 @@ static int mdt_md_create(struct mdt_thread_info *info)
                                OBD_FAIL_MDS_REINT_CREATE_WRITE);
 
                 rc = mdo_create(info->mti_ctxt, next, rr->rr_name,
-                                mdt_object_child(child), &info->mti_spec,
-                                ma);
+                                mdt_object_child(child),
+                                &info->mti_spec, ma, &info->mti_uc);
                 if (rc == 0) {
                         /* return fid & attr to client. */
                         if (ma->ma_valid & MA_INODE)
-                                mdt_pack_attr2body(repbody, &ma->ma_attr, 
+                                mdt_pack_attr2body(repbody, &ma->ma_attr,
                                                    mdt_object_fid(child));
+                                mdt_body_reverse_idmap(info, repbody);
                 }
                 mdt_object_put(info->mti_ctxt, child);
         } else
@@ -99,13 +100,14 @@ static int mdt_md_mkobj(struct mdt_thread_info *info)
                 struct md_object *next = mdt_object_child(o);
 
                 ma->ma_need = MA_INODE;
-                rc = mo_object_create(info->mti_ctxt, next,
-                                      &info->mti_spec, ma);
+                rc = mo_object_create(info->mti_ctxt, next, &info->mti_spec,
+                                      ma, &info->mti_uc);
                 if (rc == 0) {
                         /* return fid & attr to client. */
                         if (ma->ma_valid & MA_INODE)
                                 mdt_pack_attr2body(repbody, &ma->ma_attr,
                                                    mdt_object_fid(o));
+                                mdt_body_reverse_idmap(info, repbody);
                 }
                 mdt_object_put(info->mti_ctxt, o);
         } else
@@ -162,7 +164,8 @@ int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, int flags)
                        OBD_FAIL_MDS_REINT_SETATTR_WRITE);
 
         /* all attrs are packed into mti_attr in unpack_setattr */
-        rc = mo_attr_set(info->mti_ctxt, mdt_object_child(mo), ma);
+        rc = mo_attr_set(info->mti_ctxt, mdt_object_child(mo), ma,
+                         &info->mti_uc);
         if (rc != 0)
                 GOTO(out, rc);
 
@@ -260,11 +263,12 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
 
         ma->ma_need = MA_INODE;
         next = mdt_object_child(mo);
-        rc = mo_attr_get(info->mti_ctxt, next, ma);
+        rc = mo_attr_get(info->mti_ctxt, next, ma, &info->mti_uc);
         if (rc != 0)
                 GOTO(out, rc);
 
         mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo));
+        mdt_body_reverse_idmap(info, repbody);
         EXIT;
 out:
         mdt_object_put(info->mti_ctxt, mo);
@@ -348,13 +352,14 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
 
         if (strlen(rr->rr_name) == 0) {
                 /* remote partial operation */
-                rc = mo_ref_del(info->mti_ctxt, mdt_object_child(mp), ma);
+                rc = mo_ref_del(info->mti_ctxt, mdt_object_child(mp), ma,
+                                &info->mti_uc);
                 GOTO(out_unlock_parent, rc);
         }
 
         /* step 2: find & lock the child */
         rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mp),
-                        rr->rr_name, child_fid);
+                        rr->rr_name, child_fid, &info->mti_uc);
         if (rc != 0)
                  GOTO(out_unlock_parent, rc);
 
@@ -377,7 +382,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
          */
         ma->ma_need = MA_INODE;
         rc = mdo_unlink(info->mti_ctxt, mdt_object_child(mp),
-                        mdt_object_child(mc), rr->rr_name, ma);
+                        mdt_object_child(mc), rr->rr_name, ma, &info->mti_uc);
         if (rc)
                 GOTO(out_unlock_child, rc);
 
@@ -425,7 +430,8 @@ static int mdt_reint_link(struct mdt_thread_info *info,
 
         if (strlen(rr->rr_name) == 0) {
                 /* remote partial operation */
-                rc = mo_ref_add(info->mti_ctxt, mdt_object_child(ms));
+                rc = mo_ref_add(info->mti_ctxt, mdt_object_child(ms),
+                                &info->mti_uc);
                 GOTO(out_unlock_source, rc);
         }
         /*step 2: find & lock the target parent dir*/
@@ -442,7 +448,7 @@ static int mdt_reint_link(struct mdt_thread_info *info,
                        OBD_FAIL_MDS_REINT_LINK_WRITE);
 
         rc = mdo_link(info->mti_ctxt, mdt_object_child(mp),
-                      mdt_object_child(ms), rr->rr_name, ma);
+                      mdt_object_child(ms), rr->rr_name, ma, &info->mti_uc);
         GOTO(out_unlock_target, rc);
 
 out_unlock_target:
@@ -482,7 +488,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
 
         /*step 2: find & lock the target object if exists*/
         rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mtgtdir),
-                        rr->rr_tgt, tgt_fid);
+                        rr->rr_tgt, tgt_fid, &info->mti_uc);
         if (rc != 0 && rc != -ENOENT) {
                 GOTO(out_unlock_tgtdir, rc);
         } else if (rc == 0) {
@@ -494,12 +500,13 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
                         GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt));
 
                 rc = mdo_rename_tgt(info->mti_ctxt, mdt_object_child(mtgtdir),
-                                    mdt_object_child(mtgt),
-                                    rr->rr_fid2, rr->rr_tgt, ma);
+                                    mdt_object_child(mtgt), rr->rr_fid2,
+                                    rr->rr_tgt, ma, &info->mti_uc);
         } else /* -ENOENT */ {
                 rc = mdo_name_insert(info->mti_ctxt, mdt_object_child(mtgtdir),
                                      rr->rr_tgt, rr->rr_fid2,
-                                     S_ISDIR(ma->ma_attr.la_mode));
+                                     S_ISDIR(ma->ma_attr.la_mode),
+                                     &info->mti_uc);
         }
 
         /* handle last link of tgt object */
@@ -578,8 +585,9 @@ static int mdt_rename_check(struct mdt_thread_info *info, struct lu_fid *fid)
         do {
                 dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid);
                 if (!IS_ERR(dst)) {
-                        rc = mdo_is_subdir(info->mti_ctxt, mdt_object_child(dst),
-                                           fid, &dst_fid);
+                        rc = mdo_is_subdir(info->mti_ctxt,
+                                           mdt_object_child(dst),
+                                           fid, &dst_fid, NULL);
                         mdt_object_put(info->mti_ctxt, dst);
                         if (rc < 0) {
                                 CERROR("Error while doing mdo_is_subdir(), rc %d\n",
@@ -667,7 +675,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
 
         /*step 3: find & lock the old object*/
         rc = mdo_lookup(info->mti_ctxt, mdt_object_child(msrcdir),
-                        rr->rr_name, old_fid);
+                        rr->rr_name, old_fid, &info->mti_uc);
         if (rc != 0)
                 GOTO(out_unlock_target, rc);
 
@@ -684,7 +692,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         /*step 4: find & lock the new object*/
         /* new target object may not exist now */
         rc = mdo_lookup(info->mti_ctxt, mdt_object_child(mtgtdir),
-                        rr->rr_tgt, new_fid);
+                        rr->rr_tgt, new_fid, &info->mti_uc);
         if (rc == 0) {
                 /* the new_fid should have been filled at this moment*/
                 if (lu_fid_eq(old_fid, new_fid))
@@ -734,7 +742,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
 
         rc = mdo_rename(info->mti_ctxt, mdt_object_child(msrcdir),
                         mdt_object_child(mtgtdir), old_fid, rr->rr_name,
-                        (mnew ? mdt_object_child(mnew) : NULL), rr->rr_tgt, ma);
+                        (mnew ? mdt_object_child(mnew) : NULL),
+                        rr->rr_tgt, ma, &info->mti_uc);
         
         /* handle last link of tgt object */
         if (rc == 0 && mnew)
diff --git a/lustre/mdt/mdt_rmtacl.c b/lustre/mdt/mdt_rmtacl.c
new file mode 100644 (file)
index 0000000..9e9f082
--- /dev/null
@@ -0,0 +1,248 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *   Author: Fan Yong <fanyong@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/kmod.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/version.h>
+#include <linux/unistd.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <asm/segment.h>
+
+#include <libcfs/kp30.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_lib.h>
+#include <lustre_ucache.h>
+
+#include "mdt_internal.h"
+
+#define MAX_CMD_LEN     256
+
+static void mdt_rmtacl_entry_init(struct upcall_cache_entry *entry, void *args)
+{
+        struct rmtacl_upcall_data *data = args;
+        struct mdt_rmtacl *acl = &entry->u.acl;
+        char *cmd;
+
+        /* we use address of this cache entry as handle */
+        acl->ra_handle = (__u32)entry;
+        OBD_ALLOC(cmd, strlen(data->aud_cmd) + 1);
+        if (!cmd)
+                return; /* upcall will fail later! */
+
+        strcpy(cmd, data->aud_cmd);
+        entry->u.acl.ra_cmd = cmd;
+}
+
+static void mdt_rmtacl_entry_free(struct upcall_cache *cache,
+                                  struct upcall_cache_entry *entry)
+{
+        struct mdt_rmtacl *acl = &entry->u.acl;
+        int len;
+
+        if (acl->ra_cmd) {
+                len = strlen(acl->ra_cmd) + 1;
+                OBD_FREE(acl->ra_cmd, len);
+        }
+
+        if (acl->ra_buf) {
+                len = strlen(acl->ra_buf) + 1;
+                OBD_FREE(acl->ra_buf, len);
+        }
+}
+
+static int mdt_rmtacl_upcall_compare(struct upcall_cache *cache,
+                                     struct upcall_cache_entry *entry,
+                                     __u64 key, void *args)
+{
+        struct rmtacl_upcall_data *data = args;
+
+        LASSERT(entry && data);
+        LASSERT(entry->u.acl.ra_cmd && data->aud_cmd);
+        return strncmp(entry->u.acl.ra_cmd, data->aud_cmd, MAX_CMD_LEN);
+}
+
+static int mdt_rmtacl_downcall_compare(struct upcall_cache *cache,
+                                       struct upcall_cache_entry *entry,
+                                       __u64 key, void *args)
+{
+        struct rmtacl_downcall_data *data = args;
+
+        return entry->u.acl.ra_handle - data->add_handle;
+}
+
+static int mdt_rmtacl_do_upcall(struct upcall_cache *cache,
+                                struct upcall_cache_entry *entry)
+{
+        struct mdt_rmtacl *acl = &entry->u.acl;
+        char handle[20] = "";
+        char keystr[20] = "";
+        char *argv[] = {
+                  [0] = cache->uc_upcall,
+                  [1] = cache->uc_name,
+                  [2] = keystr,
+                  [3] = handle,
+                  [4] = acl->ra_cmd,
+                  [5] = NULL
+        };
+        char *envp[] = {
+                  [0] = "HOME=/",
+                  [1] = "PATH=/bin:/usr/bin:/sbin:/usr/sbin",
+                  [2] = NULL
+        };
+        int rc;
+        ENTRY;
+
+        if (!acl->ra_cmd)
+                RETURN(-ENOMEM);
+
+        snprintf(keystr, sizeof(keystr), LPU64, entry->ue_key);
+        snprintf(handle, sizeof(handle), "%u", acl->ra_handle);
+
+        LASSERTF(strcmp(cache->uc_upcall, "NONE"), "no upcall set!");
+
+        CDEBUG(D_INFO, "%s: remote acl upcall %s %s %s %s %s\n",
+               cache->uc_name, argv[0], argv[1], argv[2], argv[3], argv[4]);
+
+        rc = USERMODEHELPER(argv[0], argv, envp);
+        if (rc < 0) {
+                CERROR("%s: error invoking upcall %s %s %s %s %s: rc %d; "
+                       "check /proc/fs/lustre/mdt/%s/rmtacl_upcall\n",
+                       cache->uc_name, argv[0], argv[1], argv[2], argv[3],
+                       argv[4], rc, cache->uc_name);
+        } else {
+                CDEBUG(D_HA, "%s: invoked upcall %s %s %s %s %s\n",
+                       cache->uc_name, argv[0], argv[1], argv[2], argv[3],
+                       argv[4]);
+                rc = 0;
+        }
+        RETURN(rc);
+}
+
+static int mdt_rmtacl_parse_downcall(struct upcall_cache *cache,
+                                     struct upcall_cache_entry *entry,
+                                     void *args)
+{
+        struct mdt_rmtacl *acl = &entry->u.acl;
+        struct rmtacl_downcall_data *data;
+        char *buf;
+        int len;
+        ENTRY;
+
+        data = (struct rmtacl_downcall_data *)args;
+        LASSERT(data);
+
+        len = strlen(data->add_buf) + 1;
+        OBD_ALLOC(buf, len);
+        if (!buf)
+                RETURN(-ENOMEM);
+
+        memcpy(buf, data->add_buf, len);
+        acl->ra_buf = buf;
+
+        CDEBUG(D_OTHER, "parse mdt acl@%p: %s %s\n",
+               acl, acl->ra_cmd, acl->ra_buf);
+
+        RETURN(0);
+}
+
+struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops = {
+        .init_entry       = mdt_rmtacl_entry_init,
+        .free_entry       = mdt_rmtacl_entry_free,
+        .upcall_compare   = mdt_rmtacl_upcall_compare,
+        .downcall_compare = mdt_rmtacl_downcall_compare,
+        .do_upcall        = mdt_rmtacl_do_upcall,
+        .parse_downcall   = mdt_rmtacl_parse_downcall,
+};
+
+int mdt_rmtacl_upcall(struct mdt_thread_info *info, unsigned long key,
+                      char *cmd, char *buf, int buflen)
+{
+        struct ptlrpc_request           *req = mdt_info_req(info);
+        struct obd_device               *obd = req->rq_export->exp_obd;
+        struct mdt_device               *mdt = info->mti_mdt;
+        struct lvfs_ucred               uc;
+        struct lvfs_run_ctxt            saved;
+        struct rmtacl_upcall_data       data;
+        struct upcall_cache_entry       *entry;
+        char                            *tmp = NULL;
+        int                             rc = 0;
+        ENTRY;
+
+        OBD_ALLOC(tmp, PAGE_SIZE);
+        if (!tmp)
+                RETURN(-ENOMEM);
+
+        data.aud_cmd = cmd;
+
+        uc.luc_uid      = info->mti_uc.mu_uid;
+        uc.luc_gid      = info->mti_uc.mu_gid;
+        uc.luc_fsuid    = info->mti_uc.mu_fsuid;
+        uc.luc_fsgid    = info->mti_uc.mu_fsgid;
+        uc.luc_cap      = info->mti_uc.mu_cap;
+        uc.luc_umask    = info->mti_uc.mu_umask;
+        uc.luc_ginfo    = info->mti_uc.mu_ginfo;
+        uc.luc_identity = info->mti_uc.mu_identity;
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc);
+        entry = upcall_cache_get_entry(mdt->mdt_rmtacl_cache, (__u64)key,
+                                       &data);
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc);
+
+        if (IS_ERR(entry))
+                GOTO(out, rc = PTR_ERR(entry));
+
+        if (buflen <= strlen(entry->u.acl.ra_buf))
+                GOTO(out, rc = -EFAULT);
+
+        memcpy(buf, entry->u.acl.ra_buf, strlen(entry->u.acl.ra_buf));
+        /* remote acl operation expire at once! */
+        UC_CACHE_SET_EXPIRED(entry);
+        upcall_cache_put_entry(mdt->mdt_rmtacl_cache, entry);
+
+out:
+        if (rc)
+                sprintf(buf, "server processing error: %d\n", rc);
+        OBD_FREE(tmp, PAGE_SIZE);
+        RETURN(0);
+}
index 694042d..190ea74 100644 (file)
@@ -68,13 +68,16 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
                             sizeof(user_string) - 1) == 0)
                         return -EOPNOTSUPP;
 
-                rc = mo_xattr_get(info->mti_ctxt,
-                                  mdt_object_child(info->mti_object),
-                                  NULL, 0, xattr_name);
+                if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL))
+                        rc = RMTACL_SIZE_MAX;
+                else
+                        rc = mo_xattr_get(info->mti_ctxt,
+                                          mdt_object_child(info->mti_object),
+                                          NULL, 0, xattr_name, &info->mti_uc);
         } else if ((valid & OBD_MD_FLXATTRLS) == OBD_MD_FLXATTRLS) {
                 rc = mo_xattr_list(info->mti_ctxt,
                                    mdt_object_child(info->mti_object),
-                                   NULL, 0);
+                                   NULL, 0, &info->mti_uc);
         } else {
                 CERROR("valid bits: "LPX64"\n", info->mti_body->valid);
                 return -EINVAL;
@@ -96,9 +99,33 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
         return rc = !rc1? rc1 : rc;
 }
 
+static int do_remote_getfacl(struct mdt_thread_info *info,
+                             struct lu_fid *fid, int offset,
+                             void *buf, int buflen)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        char *cmd;
+        int rc;
+        ENTRY;
+
+        if (!buf || (buflen != RMTACL_SIZE_MAX))
+                RETURN(-EINVAL);
+
+        cmd = lustre_msg_string(req->rq_reqmsg, offset, 0);
+        if (!cmd) {
+                CERROR("missing getfacl command!\n");
+                RETURN(-EFAULT);
+        }
+
+        rc = mdt_rmtacl_upcall(info, fid_oid(fid), cmd, buf, buflen);
+        lustre_shrink_reply(req, REPLY_REC_OFF + 1, strlen(buf) + 1, 0);
+        RETURN(rc ?: strlen(buf) + 1);
+}
 
 int mdt_getxattr(struct mdt_thread_info *info)
 {
+        struct mdt_body *body = (struct mdt_body *)info->mti_body;
+        struct  mdt_body *reqbody;
         int     rc;
         struct  md_object *next;
         char   *buf;
@@ -113,11 +140,19 @@ int mdt_getxattr(struct mdt_thread_info *info)
         CDEBUG(D_INODE, "getxattr "DFID"\n",
                         PFID(&info->mti_body->fid1));
 
+        reqbody = req_capsule_client_get(&info->mti_pill, &RMF_MDT_BODY);
+        if (reqbody == NULL)
+                RETURN(-EFAULT);
+
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         next = mdt_object_child(info->mti_object);
 
         rc = mdt_getxattr_pack_reply(info);
         if (rc < 0)
-                RETURN(rc);
+                GOTO(out, rc);
 
         rep_body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
         /*No EA, just go back*/
@@ -132,8 +167,13 @@ int mdt_getxattr(struct mdt_thread_info *info)
                                                           &RMF_NAME);
                 CDEBUG(D_INODE, "getxattr %s\n", xattr_name);
 
-                rc = mo_xattr_get(info->mti_ctxt, next,
-                                  buf, buflen, xattr_name);
+                if (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL)) {
+                        rc = do_remote_getfacl(info, &body->fid1,
+                                               REQ_REC_OFF + 2, buf, buflen);
+                } else {
+                        rc = mo_xattr_get(info->mti_ctxt, next, buf, buflen,
+                                          xattr_name, &info->mti_uc);
+                }
 
                 if (rc < 0 && rc != -ENODATA && rc != -EOPNOTSUPP &&
                     rc != -ERANGE)
@@ -141,7 +181,8 @@ int mdt_getxattr(struct mdt_thread_info *info)
         } else if (info->mti_body->valid & OBD_MD_FLXATTRLS) {
                 CDEBUG(D_INODE, "listxattr\n");
 
-                rc = mo_xattr_list(info->mti_ctxt, next, buf, buflen);
+                rc = mo_xattr_list(info->mti_ctxt, next, buf, buflen,
+                                   &info->mti_uc);
                 if (rc < 0)
                         CDEBUG(D_OTHER, "listxattr failed: %d\n", rc);
         } else
@@ -152,20 +193,48 @@ no_xattr:
                 rep_body->eadatasize = rc;
                 rc = 0;
         }
-
+out:
+        mdt_exit_ucred(info);
         RETURN(rc);
 }
 
+static int do_remote_setfacl(struct mdt_thread_info *info, struct lu_fid *fid,
+                             int offset)
+{
+        struct  ptlrpc_request *req = mdt_info_req(info);
+        char *cmd, *buf;
+        int rc, buflen;
+        ENTRY;
+
+        cmd = lustre_msg_string(req->rq_reqmsg, offset, 0);
+        if (!cmd) {
+                CERROR("missing setfacl command!\n");
+                RETURN(-EFAULT);
+        }
+
+        buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF);
+        buf = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, buflen);
+        if (!buf || (buflen != RMTACL_SIZE_MAX))
+                RETURN(-EINVAL);
+
+        rc = mdt_rmtacl_upcall(info, fid_oid(fid), cmd, buf, buflen);
+        if (rc)
+                CERROR("remote acl upcall failed: %d\n", rc);
+
+        lustre_shrink_reply(req, REPLY_REC_OFF, strlen(buf) + 1, 0);
+        RETURN(rc);
+}
 
 int mdt_setxattr(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request  *req = mdt_info_req(info);
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct mdt_body *reqbody;
         const char              user_string[] = "user.";
         const char              trust_string[] = "trusted.";
         struct mdt_lock_handle *lh;
         struct req_capsule       *pill = &info->mti_pill;
         struct mdt_object        *obj  = info->mti_object;
-        const struct mdt_body    *body = info->mti_body;
+        struct mdt_body *body = (struct mdt_body *)info->mti_body;
         const struct lu_context  *ctx  = info->mti_ctxt;
         struct md_object       *child  = mdt_object_child(obj);
         __u64                   valid  = body->valid;
@@ -180,6 +249,14 @@ int mdt_setxattr(struct mdt_thread_info *info)
         if (MDT_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
                 RETURN(-ENOMEM);
 
+        reqbody = req_capsule_client_get(pill, &RMF_MDT_BODY);
+        if (reqbody == NULL)
+                RETURN(-EFAULT);
+
+        rc = mdt_init_ucred(info, reqbody);
+        if (rc)
+                RETURN(rc);
+
         /* various sanity check for xattr name */
         xattr_name = req_capsule_client_get(pill, &RMF_NAME);
         if (!xattr_name)
@@ -188,6 +265,12 @@ int mdt_setxattr(struct mdt_thread_info *info)
         CDEBUG(D_INODE, "%s xattr %s\n",
                   body->valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);
 
+        if (((valid & OBD_MD_FLXATTR) == OBD_MD_FLXATTR) &&
+            (!strcmp(xattr_name, XATTR_NAME_LUSTRE_ACL))) {
+                rc = do_remote_setfacl(info, &body->fid1, REQ_REC_OFF + 2);
+                GOTO(out, rc);
+        }
+
         if (strncmp(xattr_name, trust_string, sizeof(trust_string) - 1) == 0) {
                 if (strcmp(xattr_name + 8, XATTR_NAME_LOV) == 0)
                         GOTO(out, rc = -EACCES);
@@ -225,14 +308,15 @@ int mdt_setxattr(struct mdt_thread_info *info)
 
                         if (body->flags & XATTR_CREATE)
                                 flags |= LU_XATTR_CREATE;
+
                         mdt_fail_write(ctx, info->mti_mdt->mdt_bottom,
                                        OBD_FAIL_MDS_SETXATTR_WRITE);
 
-                        rc = mo_xattr_set(ctx, child, xattr,
-                                          xattr_len, xattr_name, flags);
+                        rc = mo_xattr_set(ctx, child, xattr, xattr_len,
+                                          xattr_name, flags, &info->mti_uc);
                 }
         } else if ((valid & OBD_MD_FLXATTRRM) == OBD_MD_FLXATTRRM) {
-                rc = mo_xattr_del(ctx, child, xattr_name);
+                rc = mo_xattr_del(ctx, child, xattr_name, &info->mti_uc);
         } else {
                 CERROR("valid bits: "LPX64"\n", body->valid);
                 rc = -EINVAL;
@@ -241,5 +325,6 @@ int mdt_setxattr(struct mdt_thread_info *info)
 out_unlock:
         mdt_object_unlock(info, obj, lh, rc);
 out:
+        mdt_exit_ucred(info);
         return rc;
 }
index fe20569..d87da4c 100644 (file)
@@ -1892,6 +1892,28 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb,
                         GOTO(end_while, rc);
                 }
 
+                if (!class_match_param(ptr, PARAM_ROOTSQUASH, NULL)) {
+                        /* Change mds root_squash params */
+                        lustre_cfg_bufs_reset(&bufs, mti->mti_svname);
+                        lustre_cfg_bufs_set(&bufs, 1, ptr, strlen(ptr));
+                        lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+                        rc = mgs_write_log_direct(obd, fsdb, mti->mti_svname,
+                                                  mti->mti_svname, lcfg);
+                        lustre_cfg_free(lcfg);
+                        GOTO(end_while, rc);
+                }
+
+                if (!class_match_param(ptr, PARAM_GSS_SUPPORT, NULL)) {
+                        /* Change mdt gss_support params */
+                        lustre_cfg_bufs_reset(&bufs, mti->mti_svname);
+                        lustre_cfg_bufs_set(&bufs, 1, ptr, strlen(ptr));
+                        lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+                        rc = mgs_write_log_direct(obd, fsdb, mti->mti_svname,
+                                                  mti->mti_svname, lcfg);
+                        lustre_cfg_free(lcfg);
+                        GOTO(end_while, rc);
+                }
+
                 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
 
 end_while:
index 3077228..f079d15 100644 (file)
@@ -384,8 +384,8 @@ static const char *obd_connect_names[] = {
         "join_file",
         "getattr_by_fid",
         "no_oh_for_devices",
-        "local_1.8_client",
-        "remote_1.8_client",
+        "local_client",
+        "remote_client",
         "max_byte_per_rpc",
         "64bit_qdata",
         "fid_capability",
@@ -846,6 +846,7 @@ int lprocfs_alloc_md_stats(struct obd_device *obd,
         LPROCFS_MD_OP_INIT(num_private_stats, stats, set_open_replay_data);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, clear_open_replay_data);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, set_lock_data);
+        LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm);
 
         for (i = num_private_stats; i < num_stats; i++) {
                 if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
index 4f24d10..b0ce61d 100644 (file)
@@ -1207,6 +1207,9 @@ struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
 
         /* Default umount style */
         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
+
+        lsi->lsi_lmd->lmd_nllu = NOBODY_UID;
+        lsi->lsi_lmd->lmd_nllg = NOBODY_GID;
         RETURN(lsi);
 }
 
@@ -1892,6 +1895,12 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                         if (rc)
                                 goto invalid;
                         clear++;
+                } else if (strncmp(s1, "nllu=", 5) == 0) {
+                        lmd->lmd_nllu = simple_strtoul(s1 + 5, NULL, 10);
+                        clear++;
+                } else if (strncmp(s1, "nllg=", 5) == 0) {
+                        lmd->lmd_nllg = simple_strtoul(s1 + 5, NULL, 10);
+                        clear++;
                 } else if (strncmp(s1, "sec", 3) == 0) {
                         rc = lmd_parse_sec_opts(lmd, s1);
                         if (rc)
index f5f7018..4cbf891 100644 (file)
@@ -1903,7 +1903,7 @@ int gss_svc_accept(struct ptlrpc_request *req)
 
                 req->rq_auth_gss = 1;
                 req->rq_auth_remote = grctx->src_ctx->gsc_remote;
-                req->rq_auth_usr_mds = grctx->src_ctx->gsc_usr_mds;
+                req->rq_auth_usr_mdt = grctx->src_ctx->gsc_usr_mds;
                 req->rq_auth_usr_root = grctx->src_ctx->gsc_usr_root;
                 req->rq_auth_uid = grctx->src_ctx->gsc_uid;
                 req->rq_auth_mapped_uid = grctx->src_ctx->gsc_mapped_uid;
index 5869fcf..e17610a 100644 (file)
@@ -1713,6 +1713,24 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q)
         lustre_swab_obd_dqblk (&q->qc_dqblk);
 }
 
+void lustre_swab_mds_remote_perm (struct mds_remote_perm *p)
+{
+        __swab32s (&p->rp_uid);
+        __swab32s (&p->rp_gid);
+        __swab32s (&p->rp_fsuid);
+        __swab32s (&p->rp_fsgid);
+        __swab32s (&p->rp_access_perm);
+};
+
+void lustre_swab_mdt_remote_perm (struct mdt_remote_perm *p)
+{
+        __swab32s (&p->rp_uid);
+        __swab32s (&p->rp_gid);
+        __swab32s (&p->rp_fsuid);
+        __swab32s (&p->rp_fsgid);
+        __swab32s (&p->rp_access_perm);
+};
+
 void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa)
 {
         __swab32s (&sa->sa_opcode);
index 882bdae..018bdfb 100644 (file)
@@ -219,6 +219,8 @@ EXPORT_SYMBOL(lustre_swab_mds_body);
 EXPORT_SYMBOL(lustre_swab_mdt_body);
 EXPORT_SYMBOL(lustre_swab_mdt_epoch);
 EXPORT_SYMBOL(lustre_swab_obd_quotactl);
+EXPORT_SYMBOL(lustre_swab_mds_remote_perm);
+EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
 EXPORT_SYMBOL(lustre_swab_mds_rec_setattr);
 EXPORT_SYMBOL(lustre_swab_mdt_rec_setattr);
 EXPORT_SYMBOL(lustre_swab_mds_rec_create);
index adf0418..59eb56c 100644 (file)
@@ -65,6 +65,8 @@ MOUNT=${MOUNT:-/mnt/${FSNAME}}
 MOUNT1=${MOUNT1:-$MOUNT}
 MOUNT2=${MOUNT2:-${MOUNT}2}
 MOUNTOPT=${MOUNTOPT:-"user_xattr,acl"}
+[ "x$RMTCLIENT" != "x" ] &&
+       MOUNTOPT=$MOUNTOPT",remote_client"
 DIR=${DIR:-$MOUNT}
 DIR1=${DIR:-$MOUNT1}
 DIR2=${DIR2:-$MOUNT2}
index 56ca580..68d9ae1 100644 (file)
@@ -59,6 +59,8 @@ MOUNT=${MOUNT:-/mnt/${FSNAME}}
 MOUNT1=${MOUNT1:-$MOUNT}
 MOUNT2=${MOUNT2:-${MOUNT}2}
 MOUNTOPT=${MOUNTOPT:-"user_xattr,"}
+[ "x$RMTCLIENT" != "x" ] &&
+       MOUNTOPT=$MOUNTOPT",remote_client"
 DIR=${DIR:-$MOUNT}
 DIR1=${DIR:-$MOUNT1}
 DIR2=${DIR2:-$MOUNT2}
index 4db7617..416d251 100644 (file)
 #endif
 
 static const char usage[] =
-"Usage: %s -u user_id [-g grp_id] [-G[gid0,gid1,...]] command\n"
+"Usage: %s -u user_id [-g grp_id] [-v euid] [-j egid] [-G[gid0,gid1,...]] command\n"
 "  -u user_id           switch to UID user_id\n"
 "  -g grp_id            switch to GID grp_id\n"
+"  -v euid              switch euid to UID\n"
+"  -j egid              switch egid to GID\n"
 "  -G[gid0,gid1,...]    set supplementary groups\n";
 
 void Usage_and_abort(const char *name)
@@ -37,6 +39,9 @@ int main(int argc, char **argv)
         int gid_is_set = 0, uid_is_set = 0, num_supp = -1;
         uid_t user_id = 0;
         gid_t grp_id = 0, supp_groups[NGROUPS_MAX] = { 0 };
+        int euid_is_set = 0, egid_is_set = 0;
+        uid_t euid = 0;
+        gid_t egid = 0;
 
         if (argc == 1) {
                 fprintf(stderr, "No parameter count\n");
@@ -44,7 +49,7 @@ int main(int argc, char **argv)
         }
 
         // get UID and GID
-        while ((c = getopt(argc, argv, "+u:g:hG::")) != -1) {
+        while ((c = getopt(argc, argv, "+u:g:v:j:hG::")) != -1) {
                 switch (c) {
                 case 'u':
                         if (!isdigit(optarg[0])) {
@@ -78,6 +83,36 @@ int main(int argc, char **argv)
                         gid_is_set = 1;
                         break;
 
+                case 'v':
+                        if (!isdigit(optarg[0])) {
+                                struct passwd *pw = getpwnam(optarg);
+                                if (pw == NULL) {
+                                        fprintf(stderr, "parameter '%s' bad\n",
+                                                optarg);
+                                        Usage_and_abort(name);
+                                }
+                                euid = pw->pw_uid;
+                        } else {
+                                euid = (uid_t)atoi(optarg);
+                        }
+                        euid_is_set = 1;
+                        break;
+
+                case 'j':
+                        if (!isdigit(optarg[0])) {
+                                struct group *gr = getgrnam(optarg);
+                                if (gr == NULL) {
+                                        fprintf(stderr, "getgrname %s failed\n",
+                                                optarg);
+                                        Usage_and_abort(name);
+                                }
+                                egid = gr->gr_gid;
+                        } else {
+                                egid = (gid_t)atoi(optarg);
+                        }
+                        egid_is_set = 1;
+                        break;
+
                 case 'G':
                         num_supp = 0;
                         if (optarg == NULL || !isdigit(optarg[0]))
@@ -126,10 +161,12 @@ int main(int argc, char **argv)
 #endif
 
         // set GID
-        status = setregid(grp_id, grp_id);
+        if (!egid_is_set)
+                egid = grp_id;
+        status = setregid(grp_id, egid);
         if (status == -1) {
-                 fprintf(stderr, "Cannot change grp_ID to %d, errno=%d (%s)\n",
-                         grp_id, errno, strerror(errno) );
+                 fprintf(stderr, "Cannot change gid to %d/%d, errno=%d (%s)\n",
+                         grp_id, egid, errno, strerror(errno) );
                  exit(-1);
         }
 
@@ -142,16 +179,19 @@ int main(int argc, char **argv)
         }
 
         // set UID
-        status = setreuid(user_id, user_id );
+        if (!euid_is_set)
+                euid = user_id;
+        status = setreuid(user_id, euid);
         if(status == -1) {
-                  fprintf(stderr,"Cannot change user_ID to %d, errno=%d (%s)\n",
-                           user_id, errno, strerror(errno) );
+                  fprintf(stderr,"Cannot change uid to %d/%d, errno=%d (%s)\n",
+                           user_id, euid, errno, strerror(errno) );
                   exit(-1);
         }
 
-        fprintf(stderr, "running as UID %d, GID %d", user_id, grp_id);
+        fprintf(stderr, "running as uid/gid/euid/egid %d/%d/%d/%d, groups:",
+                user_id, grp_id, euid, egid);
         for (i = 0; i < num_supp; i++)
-                fprintf(stderr, ":%d", supp_groups[i]);
+                fprintf(stderr, " %d", supp_groups[i]);
         fprintf(stderr, "\n");
 
         for (i = 0; i < argc - optind; i++)
diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh
new file mode 100644 (file)
index 0000000..27478d7
--- /dev/null
@@ -0,0 +1,285 @@
+#!/bin/bash
+#
+# Run select tests by setting ONLY, or as arguments to the script.
+# Skip specific tests by setting EXCEPT.
+#
+# TODO: support rootsquash test
+set -e
+
+SRCDIR=`dirname $0`
+export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
+
+ONLY=${ONLY:-"$*"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""}
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+
+[ "$ALWAYS_EXCEPT$EXCEPT" ] && \
+       echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`"
+
+TMP=${TMP:-/tmp}
+
+LFS=${LFS:-lfs}
+LCTL=${LCTL:-lctl}
+RUNAS=${RUNAS:-runas}
+
+log() {
+       echo "$*"
+       $LCTL mark "$*" 2> /dev/null || true
+}
+
+run_one() {
+       BEFORE=`date +%s`
+       log "== test $2= `date +%H:%M:%S` ($BEFORE)"
+       export TESTNAME=test_$1
+       test_$1 || error "exit with rc=$?"
+       unset TESTNAME
+       pass "($((`date +%s` - $BEFORE))s)"
+}
+
+build_test_filter() {
+        for O in $ONLY; do
+            eval ONLY_${O}=true
+        done
+        for E in $EXCEPT $ALWAYS_EXCEPT; do
+            eval EXCEPT_${E}=true
+        done
+}
+
+_basetest() {
+       echo $*
+}
+
+basetest() {
+       IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
+run_test() {
+         base=`basetest $1`
+         if [ "$ONLY" ]; then
+                 testname=ONLY_$1
+                 if [ ${!testname}x != x ]; then
+                       run_one $1 "$2"
+                       return $?
+                 fi
+                 testname=ONLY_$base
+                 if [ ${!testname}x != x ]; then
+                         run_one $1 "$2"
+                         return $?
+                 fi
+                 echo -n "."
+                 return 0
+       fi
+        testname=EXCEPT_$1
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1"
+                 return 0
+        fi
+        testname=EXCEPT_$base
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1 (base $base)"
+                 return 0
+        fi
+        run_one $1 "$2"
+       return $?
+}
+
+error() { 
+       sysctl -w lustre.fail_loc=0
+       log "FAIL: $TESTNAME $@"
+       exit 1
+}
+
+pass() { 
+       echo PASS $@
+}
+
+mounted_lustre_filesystems() {
+       awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
+}
+MOUNT="`mounted_lustre_filesystems`"
+if [ -z "$MOUNT" ]; then
+       sh llmount.sh
+       MOUNT="`mounted_lustre_filesystems`"
+       [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
+       I_MOUNTED=yes
+fi
+
+[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once"
+
+DIR=${DIR:-$MOUNT}
+[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99
+
+if [ -z "`lsmod|grep mds`" ]; then
+       echo "skipping $TESTNAME (remote MDS)"
+       exit 0
+fi
+
+LPROC=/proc/fs/lustre
+LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1`
+MDS=$(\ls $LPROC/mds 2> /dev/null | grep -v num_refs | tail -n 1)
+TSTDIR="$MOUNT/remote_user_dir"
+LUSTRE_CONF_DIR=/etc/lustre
+SETXID_CONF=$LUSTRE_CONF_DIR/setxid.conf
+IDENTITY_FLUSH=$LPROC/mds/$MDS/identity_flush
+ROOTSQUASH_UID=$LPROC/mds/$MDS/rootsquash_uid
+ROOTSQUASH_GID=$LPROC/mds/$MDS/rootsquash_gid
+ROOTSQUASH_SKIPS=$LPROC/mds/$MDS/rootsquash_skips
+KRB5_REALM=`cat /etc/krb5.conf |grep default_realm| awk '{ print $3 }'`
+USER1=`cat /etc/passwd|grep :500:|cut -d: -f1`
+USER2=`cat /etc/passwd|grep :501:|cut -d: -f1`
+
+build_test_filter
+
+setup() {
+       rm -f $SETXID_CONF
+       echo 1 > $IDENTITY_FLUSH
+       $RUNAS -u 500 ls $DIR
+       $RUNAS -u 501 ls $DIR
+}
+setup
+
+# run as different user
+test_0() {
+       rm -rf $DIR/d0
+       mkdir $DIR/d0
+
+       chown $USER1 $DIR/d0 || error
+       $RUNAS -u 500 ls $DIR || error
+       $RUNAS -u 500 touch $DIR/f0 && error
+       $RUNAS -u 500 touch $DIR/d0/f1 || error
+       $RUNAS -u 501 touch $DIR/d0/f2 && error
+       touch $DIR/d0/f3 || error
+       chown root $DIR/d0
+       chgrp $USER1 $DIR/d0
+       chmod 775 $DIR/d0
+       $RUNAS -u 500 touch $DIR/d0/f4 || error
+       $RUNAS -u 501 touch $DIR/d0/f5 && error
+       touch $DIR/d0/f6 || error
+
+       rm -rf $DIR/d0
+}
+run_test 0 "uid permission ============================="
+
+# setuid/gid
+test_1() {
+       rm -rf $DIR/d1
+       mkdir $DIR/d1
+
+       chown $USER1 $DIR/d1 || error
+       $RUNAS -u 501 -v 500 touch $DIR/d1/f0 && error
+       echo "* 501 setuid" > $SETXID_CONF
+       echo "enable uid 501 setuid"
+       echo 1 > $IDENTITY_FLUSH
+       $RUNAS -u 501 -v 500 touch $DIR/d1/f1 || error
+
+       chown root $DIR/d1
+       chgrp $USER1 $DIR/d1
+       chmod 770 $DIR/d1
+       $RUNAS -u 501 -g 501 touch $DIR/d1/f2 && error
+       echo "* 501 setuid,setgid" > $SETXID_CONF
+       echo "enable uid 501 setuid,setgid"
+       echo 1 > $IDENTITY_FLUSH
+       $RUNAS -u 501 -g 501 -j 500 touch $DIR/d1/f3 || error
+       $RUNAS -u 501 -v 500 -g 501 -j 500 touch $DIR/d1/f4 || error
+
+       rm -f $SETXID_CONF
+       rm -rf $DIR/d1
+       echo 1 > $IDENTITY_FLUSH
+}
+run_test 1 "setuid/gid ============================="
+
+# lfs getfacl/setfacl
+test_2() {
+       rm -rf $DIR/d2
+       mkdir $DIR/d2
+       chmod 755 $DIR/d2
+       echo xxx > $DIR/d2/f0
+       chmod 644 $DIR/d2/f0
+
+       $LFS getfacl $DIR/d2/f0 || error
+       $RUNAS -u 500 cat $DIR/d2/f0 || error
+       $RUNAS -u 500 touch $DIR/d2/f0 && error
+
+       $LFS setfacl -m u:$USER1:w $DIR/d2/f0 || error
+       $LFS getfacl $DIR/d2/f0 || error
+       echo "set user $USER1 write permission on file $DIR/d2/fo"
+       $RUNAS -u 500 touch $DIR/d2/f0 || error
+       $RUNAS -u 500 cat $DIR/d2/f0 && error
+
+       rm -rf $DIR/d2
+}
+run_test 2 "lfs getfacl/setfacl ============================="
+
+# rootsquash
+test_3() {
+       [ -n "$SEC" ] && echo "ignore rootsquash test for single node" && return
+
+       $LCTL conf_param $MDS security.rootsquash.skips=none
+       while grep LNET_NID_ANY $ROOTSQUASH_SKIPS > /dev/null; do sleep 1; done
+       $LCTL conf_param $MDS security.rootsquash.uid=0
+       while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done
+       $LCTL conf_param $MDS security.rootsquash.gid=0
+       while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done
+
+       rm -rf $DIR/d3
+       mkdir $DIR/d3
+       chown $USER1 $DIR/d3
+       chmod 700 $DIR/d3
+       $LCTL conf_param $MDS security.rootsquash.uid=500
+       echo "set rootsquash uid = 500"
+       while [ "`cat $ROOTSQUASH_UID`" -ne 500 ]; do sleep 1; done
+       touch $DIR/f3_0 && error
+       touch $DIR/d3/f3_1 || error
+
+       $LCTL conf_param $MDS security.rootsquash.uid=0
+       echo "disable rootsquash"
+       while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done
+       chown root $DIR/d3
+       chgrp $USER2 $DIR/d3
+       chmod 770 $DIR/d3
+
+       $LCTL conf_param $MDS security.rootsquash.uid=500
+       echo "set rootsquash uid = 500"
+       while [ "`cat $ROOTSQUASH_UID`" -ne 500 ]; do sleep 1; done
+       touch $DIR/d3/f3_2 && error
+       $LCTL conf_param $MDS security.rootsquash.gid=501
+       echo "set rootsquash gid = 501"
+       while [ "`cat $ROOTSQUASH_GID`" -ne 501 ]; do sleep 1; done
+       touch $DIR/d3/f3_3 || error
+
+       $LCTL conf_param $MDS security.rootsquash.skips=*
+       echo "add host in rootsquash skip list"
+       while ! grep LNET_NID_ANY $ROOTSQUASH_SKIPS > /dev/null;
+               do sleep 1;
+       done
+       touch $DIR/f3_4 || error
+
+       $LCTL conf_param $MDS security.rootsquash.uid=0
+       while [ "`cat $ROOTSQUASH_UID`" -ne 0 ]; do sleep 1; done
+       $LCTL conf_param $MDS security.rootsquash.gid=0
+       while [ "`cat $ROOTSQUASH_GID`" -ne 0 ]; do sleep 1; done
+       $LCTL conf_param $MDS security.rootsquash.skips=none
+       rm -rf $DIR/d3
+       rm -f $DIR/f3_?
+}
+run_test 3 "rootsquash ============================="
+
+# bug 3285 - supplementary group should always succeed (see do_init_ucred),
+# NB: the supplementary groups are set for local client only, as for remote
+# client, the groups of the specified uid on MDS will be obtained by
+# upcall /sbin/l_getidentity and used.
+test_4() {
+        mkdir $DIR/d4
+        chmod 771 $DIR/d4
+        chgrp 500 $DIR/d4
+       $RUNAS -u 500 -G1,2,500 ls $DIR/d4 || error "setgroups failed"
+       rm -rf $DIR/d4
+}
+run_test 4 "set supplementary group ==============="
+
+log "cleanup: ======================================================"
+if [ "$I_MOUNTED" = "yes" ]; then
+       llmountcleanup.sh || error "cleanup failed"
+fi
+
+echo '=========================== finished ==============================='
index e7aed66..29974ed 100644 (file)
@@ -18,7 +18,7 @@ noinst_PROGRAMS = llog_reader lr_reader wirecheck wiretest lload obdio obdbarrie
 # mount only finds helpers in /sbin
 rootsbin_PROGRAMS = mount.lustre
 sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \
-       l_getgroups llverfs llverdev
+       l_getidentity l_facl llverfs llverdev
 bin_PROGRAMS = lfs req_layout
 sbin_SCRIPTS = $(sbin_scripts)
 endif # UTILS
@@ -87,6 +87,14 @@ tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS)
 tunefs_lustre_LDADD := $(mkfs_lustre_LDADD)
 tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES)
 
+l_getidentity_SOURCES = l_getidentity.c
+l_getidentity_LDADD := $(LIBPTLCTL)
+l_getidentity_DEPENDENCIES := $(LIBPTLCTL)
+
+l_facl_SOURCES = l_facl.c
+l_facl_LDADD := liblustreapi.a
+l_facl_DEPENDENCIES := liblustreapi.a
+
 EXTRA_DIST = $(sbin_scripts)
 
 # NOTE: this should only be run on i386.
diff --git a/lustre/utils/l_facl.c b/lustre/utils/l_facl.c
new file mode 100644 (file)
index 0000000..fb1f023
--- /dev/null
@@ -0,0 +1,242 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <mntent.h>
+
+#include <lustre/liblustreapi.h>
+#include <lustre/lustre_user.h>
+
+#include "obdctl.h"
+
+static char *progname;
+
+static void usage(void)
+{
+        fprintf(stderr,
+                "\nusage: %s {mdsname} {ino} {handle} {cmd}\n"
+                "Normally invoked as an upcall from Lustre, set via:\n"
+                "  /proc/fs/lustre/mds/{mdsname}/rmtacl_upcall\n",
+                progname);
+}
+
+static inline void show_result(struct rmtacl_downcall_data *data)
+{
+        fprintf(stdout, "buflen %d\n\n%s\n", data->add_buflen, data->add_buf);
+}
+
+#define MDS_ERR "server processing error"
+
+static void errlog(char *buf, const char *fmt, ...)
+{
+        va_list args;
+
+        va_start(args, fmt);
+        vsprintf(buf, fmt, args);
+        va_end(args);
+}
+
+static char *get_lustre_mount(void)
+{
+        FILE *fp;
+        struct mntent *mnt;
+        static char mntpath[PATH_MAX] = "";
+
+        fp = setmntent(MOUNTED, "r");
+        if (fp == NULL) {
+                fprintf(stderr, "setmntent %s failed: %s\n",
+                        MOUNTED, strerror(errno));
+                return NULL;
+        }
+
+        while (1) {
+                mnt = getmntent(fp);
+                if (!mnt)
+                        break;
+
+                if (!llapi_is_lustre_mnttype(mnt))
+                        continue;
+
+                if (strstr(mnt->mnt_fsname, ":/lustre")) {
+                        /* save the mountpoint dir part */
+                        strncpy(mntpath, mnt->mnt_dir, sizeof(mntpath));
+                        endmntent(fp);
+                        return mntpath;
+                }
+        }
+        endmntent(fp);
+
+        return NULL;
+}
+
+int main(int argc, char **argv)
+{
+        struct rmtacl_downcall_data *data;
+        char procname[1024], *buf, *mntpath;
+        int out_pipe[2], err_pipe[2], pid, size, buflen, fd, rc;
+
+        progname = basename(argv[0]);
+
+        if (argc != 5) {
+                usage();
+                return 1;
+        }
+
+        size = offsetof(struct rmtacl_downcall_data, add_buf[RMTACL_SIZE_MAX]);
+        data = malloc(size);
+        if (!data) {
+                fprintf(stderr, "malloc %d failed\n", size);
+                return 1;
+        }
+        memset(data, 0, size);
+        data->add_magic = RMTACL_DOWNCALL_MAGIC;
+        data->add_ino = strtoll(argv[2], NULL, 10);
+        data->add_handle = strtoul(argv[3], NULL, 10);
+        buf = data->add_buf;
+
+        mntpath = get_lustre_mount();
+        if (!mntpath) {
+                errlog(buf, MDS_ERR"(no lustre mounted on MDS)\n");
+                goto downcall;
+        }
+
+        /* create pipe */
+        if (pipe(out_pipe) < 0 || pipe(err_pipe) < 0) {
+                errlog(buf, MDS_ERR"(pipe failed): %s\n", strerror(errno));
+                goto downcall;
+        }
+
+        if ((pid = fork()) < 0) {
+                errlog(buf, MDS_ERR"(fork failed): %s\n", strerror(errno));
+                goto downcall;
+        } else if (pid == 0) {
+                close(out_pipe[0]);
+                if (out_pipe[1] != STDOUT_FILENO) {
+                        dup2(out_pipe[1], STDOUT_FILENO);
+                        close(out_pipe[1]);
+                }
+                close(err_pipe[0]);
+                if (err_pipe[1] != STDERR_FILENO) {
+                        dup2(err_pipe[1], STDERR_FILENO);
+                        close(err_pipe[1]);
+                }
+                close(STDIN_FILENO);
+
+                if (chdir(mntpath) < 0) {
+                        fprintf(stderr, "chdir %s failed: %s\n",
+                                mntpath, strerror(errno));
+                        return 1;
+                }
+
+                execl("/bin/sh", "sh", "-c", argv[4], NULL);
+                fprintf(stderr, "execl %s failed: %s\n",
+                        argv[4], strerror(errno));
+
+                return 1;
+        }
+
+        /* parent process handling */
+        close(out_pipe[1]);
+        close(err_pipe[1]);
+
+        buflen = 0;
+        while (1) {
+                rc = read(out_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen);
+                if (rc < 0) {
+                        errlog(buf, MDS_ERR"(read failed): %s\n",
+                               strerror(errno));
+                        break;
+                }
+                if (rc == 0)
+                        break;
+                buflen += rc;
+                if (buflen >= RMTACL_SIZE_MAX)
+                        break;
+        }
+
+        if (buflen != 0) {
+                wait(&rc);
+                goto downcall;
+        }
+
+        while (1) {
+                rc = read(err_pipe[0], buf + buflen, RMTACL_SIZE_MAX - buflen);
+                if (rc < 0) {
+                        errlog(buf, MDS_ERR"(read failed): %s\n",
+                               strerror(errno));
+                        break;
+                }
+                if (rc == 0)
+                        break;
+                buflen += rc;
+                if (buflen >= RMTACL_SIZE_MAX)
+                        break;
+        }
+
+        wait(&rc);
+
+downcall:
+        buf[RMTACL_SIZE_MAX - 1] = 0;
+        data->add_buflen = strlen(buf) + 1;
+        if (getenv("L_FACL_TEST")) {
+                show_result(data);
+                free(data);
+                return 0;
+        }
+
+        snprintf(procname, sizeof(procname),
+                 "/proc/fs/lustre/mds/%s/rmtacl_info", argv[1]);
+        fd = open(procname, O_WRONLY);
+        if (fd < 0) {
+                fprintf(stderr, "open %s failed: %s\n",
+                        procname, strerror(errno));
+                free(data);
+                return 1;
+        }
+
+        buflen = offsetof(struct rmtacl_downcall_data,
+                          add_buf[data->add_buflen]);
+        rc = write(fd, data, buflen);
+        close(fd);
+        if (rc != buflen) {
+                fprintf(stderr, "write %s len %d return %d: %s\n",
+                        procname, buflen, rc, strerror(errno));
+                free(data);
+                return 1;
+        }
+
+        free(data);
+        return 0;
+}
diff --git a/lustre/utils/l_getidentity.c b/lustre/utils/l_getidentity.c
new file mode 100644 (file)
index 0000000..b0cf680
--- /dev/null
@@ -0,0 +1,356 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <libgen.h>
+#include <syslog.h>
+
+#include <liblustre.h>
+#include <lustre/lustre_user.h>
+#include <lustre/lustre_idl.h>
+#include <libcfs/kp30.h>
+
+#define SETXID_PATHNAME "/etc/lustre/setxid.conf"
+
+/* setxid permission file format is like this:
+ * {nid} {uid} {perms}
+ * the valid values for perms are setuid/setgid/setgrp, and they can be listed
+ * together, seperated by ','.
+ */
+
+static char *progname;
+
+static void usage(void)
+{
+        fprintf(stderr,
+                "\nusage: %s {mdsname} {uid}\n"
+                "Normally invoked as an upcall from Lustre, set via:\n"
+                "  /proc/fs/lustre/mds/{mdsname}/identity_upcall\n",
+                progname);
+}
+
+static int compare_u32(const void *v1, const void *v2)
+{
+        return (*(__u32 *)v1 - *(__u32 *)v2);
+}
+
+static void errlog(const char *fmt, ...)
+{
+        va_list args;
+
+        openlog(progname, LOG_PERROR, LOG_AUTHPRIV);
+
+        va_start(args, fmt);
+        vsyslog(LOG_NOTICE, fmt, args);
+        fprintf(stderr, fmt, args);
+        va_end(args);
+
+        closelog();
+}
+
+int get_groups_local(struct identity_downcall_data *data)
+{
+        int maxgroups;
+        gid_t *groups;
+        unsigned int ngroups = 0;
+        struct passwd *pw;
+        struct group *gr;
+        int i;
+
+        pw = getpwuid(data->idd_uid);
+        if (!pw) {
+                errlog("no such user %u\n", data->idd_uid);
+                data->idd_err = errno ? errno : EIDRM;
+                return -1;
+        }
+        data->idd_gid = pw->pw_gid;
+
+        maxgroups = sysconf(_SC_NGROUPS_MAX);
+        if (maxgroups > NGROUPS_MAX)
+                maxgroups = NGROUPS_MAX;
+        groups = data->idd_groups;
+
+        groups[ngroups++] = pw->pw_gid;
+        while ((gr = getgrent())) {
+                if (gr->gr_gid == pw->pw_gid)
+                        continue;
+                if (!gr->gr_mem)
+                        continue;
+                for (i = 0; gr->gr_mem[i]; i++) {
+                        if (!strcmp(gr->gr_mem[i], pw->pw_name)) {
+                                groups[ngroups++] = gr->gr_gid;
+                                break;
+                        }
+                }
+                if (ngroups == maxgroups)
+                        break;
+        }
+        endgrent();
+        qsort(groups, ngroups, sizeof(*groups), compare_u32);
+        data->idd_ngroups = ngroups;
+
+        return 0;
+}
+
+static inline int comment_line(char *line)
+{
+        char *p = line;
+
+        while (*p && (*p == ' ' || *p == '\t')) p++;
+
+        if (!*p || *p == '\n' || *p == '#')
+                return 1;
+        return 0;
+}
+
+static inline int match_uid(uid_t uid, const char *str)
+{
+        char *end;
+        uid_t uid2;
+
+        uid2 = strtoul(str, &end, 0);
+        if (*end)
+                return 0;
+
+        return (uid == uid2);
+}
+
+static struct setxid_perm_type_t {
+        char   *name;
+        __u32   bit;
+} setxid_perm_types[] =  {
+        { "setuid", LUSTRE_SETUID_PERM },
+        { "setgid", LUSTRE_SETGID_PERM },
+        { "setgrp", LUSTRE_SETGRP_PERM },
+        { NULL },
+};
+
+int parse_setxid_perm(__u32 *perm, char *str)
+{
+        char *start, *end;
+        char name[64];
+        struct setxid_perm_type_t *pt;
+
+        *perm = 0;
+        start = str;
+        while (1) {
+                memset(name, 0, sizeof(name));
+                end = strchr(start, ',');
+                if (!end)
+                        end = str + strlen(str);
+                if (start >= end)
+                        break;
+                strncpy(name, start, end - start);
+                for (pt = setxid_perm_types; pt->name; pt++) {
+                        if (!strcasecmp(name, pt->name)) {
+                                *perm |= pt->bit;
+                                break;
+                        }
+                }
+
+                if (!pt->name) {
+                        printf("unkown perm type: %s\n", name);
+                        return -1;
+                }
+
+                start = end + 1;
+        }
+        return 0;
+}
+
+int parse_setxid_perm_line(struct identity_downcall_data *data, char *line)
+{
+        char uid_str[256], nid_str[256], perm_str[256];
+        lnet_nid_t nid;
+        __u32 perm;
+        struct setxid_perm_downcall_data *pdd =
+                              &data->idd_perms[data->idd_nperms];
+        int rc, i;
+
+        if (data->idd_nperms >= N_SETXID_PERMS_MAX) {
+                errlog("setxid permission count %d > max %d\n",
+                        data->idd_nperms, N_SETXID_PERMS_MAX);
+                return -1;
+        }
+
+        rc = sscanf(line, "%s %s %s", nid_str, uid_str, perm_str);
+        if (rc != 3) {
+                errlog("can't parse line %s\n", line);
+                return -1;
+        }
+
+        if (!match_uid(data->idd_uid, uid_str))
+                return 0;
+
+        if (!strcmp(nid_str, "*")) {
+                nid = LNET_NID_ANY;
+        } else {
+                nid = libcfs_str2nid(nid_str);
+                if (nid == LNET_NID_ANY) {
+                        errlog("can't parse nid %s\n", nid_str);
+                        return -1;
+                }
+        }
+
+        if (parse_setxid_perm(&perm, perm_str)) {
+                errlog("invalid setxid perm %s\n", perm_str);
+                return -1;
+        }
+
+        /* merge the perms with the same nid */
+        for (i = 0; i < data->idd_nperms; i++) {
+                if (data->idd_perms[i].pdd_nid == nid) {
+                        data->idd_perms[i].pdd_perm |= perm;
+                        return 0;
+                }
+        }
+
+        pdd->pdd_nid = nid;
+        pdd->pdd_perm = perm;
+        data->idd_nperms++;
+        return 0;
+}
+
+int get_setxid_perms(FILE *fp, struct identity_downcall_data *data)
+{
+        char line[1024];
+
+        while (fgets(line, 1024, fp)) {
+                if (comment_line(line))
+                        continue;
+
+                if (parse_setxid_perm_line(data, line)) {
+                        errlog("parse line %s failed!\n", line);
+                        return -1;
+                }
+        }
+
+        return 0;
+}
+
+static void show_result(struct identity_downcall_data *data)
+{
+        int i;
+
+        if (data->idd_err) {
+                errlog("failed to get identity for uid %d: %s\n",
+                       data->idd_uid, strerror(data->idd_err));
+                return;
+        }
+
+        printf("uid=%d gid=", data->idd_uid);
+        for (i = 0; i < data->idd_ngroups; i++)
+                printf("%s%u", i > 0 ? "," : "", data->idd_groups[i]);
+        printf("\n");
+        printf("setxid permissions:\n"
+               "  nid\t\t\tperm\n");
+        for (i = 0; i < data->idd_nperms; i++) {
+                struct setxid_perm_downcall_data *pdd;
+
+                pdd = &data->idd_perms[i];
+
+                printf("  %#llx\t0x%x\n", pdd->pdd_nid, pdd->pdd_perm);
+        }
+        printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+        FILE *perms_fp;
+        char *end;
+        struct identity_downcall_data *data;
+        char procname[1024];
+        unsigned long uid;
+        int fd, rc;
+
+        progname = basename(argv[0]);
+
+        if (argc != 3) {
+                usage();
+                return 1;
+        }
+
+        uid = strtoul(argv[2], &end, 0);
+        if (*end) {
+                errlog("%s: invalid uid '%s'\n", progname, argv[2]);
+                usage();
+                return 1;
+        }
+
+        data = malloc(sizeof(*data));
+        if (!data) {
+                errlog("malloc identity downcall data(%d) failed!\n",
+                       sizeof(*data));
+                return 1;
+        }
+        memset(data, 0, sizeof(*data));
+        data->idd_magic = IDENTITY_DOWNCALL_MAGIC;
+        data->idd_uid = uid;
+
+        /* get groups for uid */
+        rc = get_groups_local(data);
+        if (rc)
+                goto downcall;
+
+        /* read permission database */
+        perms_fp = fopen(SETXID_PATHNAME, "r");
+        if (perms_fp) {
+                get_setxid_perms(perms_fp, data);
+                fclose(perms_fp);
+        } else if (errno != ENOENT) {
+                errlog("open %s failed: %s\n",
+                       SETXID_PATHNAME, strerror(errno));
+        }
+
+downcall:
+        if (getenv("L_GETIDENTITY_TEST")) {
+                show_result(data);
+                return 0;
+        }
+
+        snprintf(procname, sizeof(procname),
+                 "/proc/fs/lustre/mds/%s/identity_info", argv[1]);
+        fd = open(procname, O_WRONLY);
+        if (fd < 0) {
+                errlog("can't open file %s: %s\n", procname, strerror(errno));
+                return 1;
+        }
+
+        rc = write(fd, data, sizeof(*data));
+        close(fd);
+        if (rc != sizeof(*data)) {
+                errlog("partial write ret %d: %s\n", rc, strerror(errno));
+                return 1;
+        }
+
+        return 0;
+}
index 2e35469..8fa3500 100644 (file)
@@ -37,6 +37,7 @@
 #include <fcntl.h>
 #include <dirent.h>
 #include <time.h>
+#include <libgen.h>
 
 #include <lnet/api-support.h>
 #include <lnet/lnetctl.h>
@@ -69,6 +70,8 @@ static int lfs_quota(int argc, char **argv);
 #endif
 static int lfs_flushctx(int argc, char **argv);
 static int lfs_join(int argc, char **argv);
+static int lfs_getfacl(int argc, char **argv);
+static int lfs_setfacl(int argc, char **argv);
 
 /* all avaialable commands */
 command_t cmdlist[] = {
@@ -134,6 +137,12 @@ command_t cmdlist[] = {
 #endif
         {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
          "usage: flushctx [-k] [mountpoint...]"},
+        {"getfacl", lfs_getfacl, 0,
+         "Get file access control list in remote client.\n"
+         "usage: getfacl [-dRLPvh] file"},
+        {"setfacl", lfs_setfacl, 0,
+         "Set file access control list in remote client.\n"
+         "usage: setfacl [-bkndRLPvh] [{-m|-x} acl_spec] [{-M|-X} acl_file] file"},
         {"help", Parser_help, 0, "help"},
         {"exit", Parser_quit, 0, "quit"},
         {"quit", Parser_quit, 0, "quit"},
@@ -1567,6 +1576,119 @@ static int lfs_flushctx(int argc, char **argv)
         return rc;
 }
 
+/*
+ * We assume one and only one filename is supplied as the
+ * last parameter.
+ */
+static int acl_cmd_parse(int argc, char **argv, char *fname, char *cmd)
+{
+        char *dname, *rpath = NULL;
+        char path[PATH_MAX], cwd[PATH_MAX];
+        FILE *fp;
+        struct mntent *mnt;
+        int i;
+
+        if (argc < 2)
+                return -1;
+
+        /* FIXME the premise is there is no sub-mounted filesystems under this
+         * mounted lustre tree. */
+        strncpy(fname, argv[argc - 1], PATH_MAX);
+
+        /* get path prefix */
+        dname = dirname(fname);
+
+        /* try to resolve the pathname into relative to the root of the mounted
+         * lustre filesystem.
+         */
+        if (getcwd(cwd, sizeof(cwd)) == NULL) {
+                fprintf(stderr, "getcwd %s failed: %s\n", cwd, strerror(errno));
+                return -1;
+        }
+
+        if (chdir(dname) == -1) {
+                fprintf(stderr, "chdir to %s failed: %s\n",
+                        dname, strerror(errno));
+                return -1;
+        }
+
+        if (getcwd(path, sizeof(path)) == NULL) {
+                fprintf(stderr, "getcwd %s: %s\n", path, strerror(errno));
+                return -1;
+        }
+
+        if (chdir(cwd) == -1) {
+                fprintf(stderr, "chdir back to %s: %s\n",
+                        cwd, strerror(errno));
+                return -1;
+        }
+
+        strncat(path, "/", PATH_MAX);
+        strncpy(fname, argv[argc - 1], PATH_MAX);
+        strncat(path, basename(fname), PATH_MAX);
+
+        fp = setmntent(MOUNTED, "r");
+        if (fp == NULL) {
+                fprintf(stderr, "setmntent %s failed: %s\n",
+                        MOUNTED, strerror(errno));
+                return -1;
+        }
+
+        while (1) {
+                mnt = getmntent(fp);
+                if (!mnt)
+                        break;
+
+                if (!llapi_is_lustre_mnttype(mnt))
+                        continue;
+
+                if (!strncmp(mnt->mnt_dir, path, strlen(mnt->mnt_dir))) {
+                        rpath = path + strlen(mnt->mnt_dir);
+                        break;
+                }
+        }
+        endmntent(fp);
+
+        /* remove char '/' from rpath to be a relative path */
+        while (rpath && *rpath == '/') rpath++;
+
+        if (!rpath) {
+                fprintf(stderr,
+                        "%s: file %s doesn't belong to a lustre file system!\n",
+                        argv[0], argv[argc - 1]);
+                return -1;
+        }
+
+        for (i = 0; i < argc - 1; i++) {
+                strncat(cmd, argv[i], PATH_MAX);
+                strncat(cmd, " ", PATH_MAX);
+        }
+        strncat(cmd, *rpath ? rpath : ".", PATH_MAX);
+        strncpy(fname, argv[argc - 1], sizeof(fname));
+
+        return 0;
+}
+
+static int lfs_getfacl(int argc, char **argv)
+{
+        char fname[PATH_MAX] = "", cmd[PATH_MAX] = "";
+
+        if (acl_cmd_parse(argc, argv, fname, cmd))
+                return CMD_HELP;
+
+        return llapi_getfacl(fname, cmd);
+}
+
+static int lfs_setfacl(int argc, char **argv)
+{
+        char fname[PATH_MAX] = "", cmd[PATH_MAX] = "";
+
+        if (acl_cmd_parse(argc, argv, fname, cmd))
+                return CMD_HELP;
+
+        return llapi_setfacl(fname, cmd);
+}
+
 int main(int argc, char **argv)
 {
         int rc;
index 53f5c67..1499ca0 100644 (file)
@@ -56,7 +56,6 @@
 #include <lustre_lib.h>
 #include <lustre/liblustreapi.h>
 #include <obd_lov.h>
-#include <lustre/liblustreapi.h>
 
 static void err_msg(char *fmt, ...)
 {
@@ -1253,3 +1252,59 @@ out:
         find_param_fini(&param);
         return ret;
 }
+
+int llapi_getfacl(char *fname, char *cmd)
+{
+        struct rmtacl_ioctl_data data;
+        char out[RMTACL_SIZE_MAX] = "";
+        int fd, rc;
+
+        data.cmd = cmd;
+        data.cmd_len = strlen(cmd) + 1;
+        data.res = out;
+        data.res_len = sizeof(out);
+
+        fd = open(fname, 0);
+        if (fd == -1) {
+                err_msg("open %s failed", fname);
+                return -1;
+        }
+
+        rc = ioctl(fd, LL_IOC_GETFACL, &data);
+        close(fd);
+        if (rc) {
+                err_msg("getfacl %s failed", fname);
+        } else {
+                printf("%s", out);
+        }
+
+        return rc;
+}
+
+int llapi_setfacl(char *fname, char *cmd)
+{
+        struct rmtacl_ioctl_data data;
+        char out[RMTACL_SIZE_MAX] = "";
+        int fd, rc;
+
+        data.cmd = cmd;
+        data.cmd_len = strlen(cmd) + 1;
+        data.res = out;
+        data.res_len = sizeof(out);
+
+        fd = open(fname, 0);
+        if (fd == -1) {
+                err_msg("open %s failed", fname);
+                return -1;
+        }
+
+        rc = ioctl(fd, LL_IOC_SETFACL, &data);
+        close(fd);
+        if (rc) {
+                err_msg("setfacl %s failed", fname);
+        } else {
+                printf("%s", out);
+        }
+
+        return rc;
+}