Whamcloud - gitweb
LU-5564 mdt: skip permission check for close
[fs/lustre-release.git] / lustre / mdt / mdt_lib.c
index eb839a6..eb08efc 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 
 #define DEBUG_SUBSYSTEM S_MDS
 
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
+#endif
 #include "mdt_internal.h"
-#include <lnet/lib-lnet.h>
-
+#include <lnet/nidstr.h>
+#include <lustre_nodemap.h>
 
 typedef enum ucred_init_type {
         NONE_INIT       = 0,
@@ -80,7 +84,7 @@ void mdt_exit_ucred(struct mdt_thread_info *info)
 }
 
 static int match_nosquash_list(struct rw_semaphore *sem,
-                              cfs_list_t *nidlist,
+                              struct list_head *nidlist,
                               lnet_nid_t peernid)
 {
        int rc;
@@ -124,8 +128,20 @@ static int mdt_root_squash(struct mdt_thread_info *info, lnet_nid_t peernid)
        RETURN(0);
 }
 
+static void ucred_set_jobid(struct mdt_thread_info *info, struct lu_ucred *uc)
+{
+       struct ptlrpc_request   *req = mdt_info_req(info);
+       const char              *jobid = mdt_req_get_jobid(req);
+
+       /* set jobid if specified. */
+       if (jobid)
+               strlcpy(uc->uc_jobid, jobid, sizeof(uc->uc_jobid));
+       else
+               uc->uc_jobid[0] = '\0';
+}
+
 static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
-                          void *buf)
+                         void *buf, bool drop_fs_cap)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
         struct mdt_device       *mdt = info->mti_mdt;
@@ -133,7 +149,6 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
        struct lu_ucred         *ucred = mdt_ucred(info);
         lnet_nid_t               peernid = req->rq_peer.nid;
         __u32                    perm = 0;
-        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
         int                      setuid;
         int                      setgid;
         int                      rc = 0;
@@ -155,76 +170,48 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
        if (type == BODY_INIT) {
                struct mdt_body *body = (struct mdt_body *)buf;
 
-               ucred->uc_suppgids[0] = body->suppgid;
+               ucred->uc_suppgids[0] = body->mbo_suppgid;
                ucred->uc_suppgids[1] = -1;
        }
 
-       /* sanity check: we expect the uid which client claimed is true */
-       if (remote) {
-               if (!uid_valid(make_kuid(&init_user_ns, req->rq_auth_mapped_uid))) {
-                       CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
-                       CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
-                       RETURN(-EACCES);
-               }
-
-               if (ptlrpc_user_desc_do_idmap(req, pud))
-                       RETURN(-EACCES);
-
-                if (req->rq_auth_mapped_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "remote client %s: auth/mapped uid %u/%u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               req->rq_auth_mapped_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        } else {
-                if (req->rq_auth_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "local client %s: auth uid %u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        }
+       if (!flvr_is_rootonly(req->rq_flvr.sf_rpc) &&
+           req->rq_auth_uid != pud->pud_uid) {
+               CDEBUG(D_SEC, "local client %s: auth uid %u "
+                      "while client claims %u:%u/%u:%u\n",
+                      libcfs_nid2str(peernid), req->rq_auth_uid,
+                      pud->pud_uid, pud->pud_gid,
+                      pud->pud_fsuid, pud->pud_fsgid);
+               RETURN(-EACCES);
+       }
 
-        if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (remote) {
-                        CDEBUG(D_SEC, "remote client must run with identity_get "
-                               "enabled!\n");
-                        RETURN(-EACCES);
-                } else {
-                       ucred->uc_identity = NULL;
-                        perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
-                               CFS_SETGRP_PERM;
-                }
-        } else {
-                struct md_identity *identity;
+       if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
+               ucred->uc_identity = NULL;
+               perm = CFS_SETUID_PERM | CFS_SETGID_PERM | CFS_SETGRP_PERM;
+       } else {
+               struct md_identity *identity;
 
-                identity = mdt_identity_get(mdt->mdt_identity_cache,
-                                            pud->pud_uid);
-                if (IS_ERR(identity)) {
-                        if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                                     !remote)) {
+               identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                           pud->pud_uid);
+               if (IS_ERR(identity)) {
+                       if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
                                ucred->uc_identity = NULL;
-                                perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
-                                       CFS_SETGRP_PERM;
-                        } else {
-                                CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
-                                       pud->pud_uid);
-                                RETURN(-EACCES);
-                        }
-                } else {
+                               perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
+                                      CFS_SETGRP_PERM;
+                       } else {
+                               CDEBUG(D_SEC,
+                                      "Deny access without identity: uid %u\n",
+                                      pud->pud_uid);
+                               RETURN(-EACCES);
+                       }
+               } else {
                        ucred->uc_identity = identity;
                        perm = mdt_identity_get_perm(ucred->uc_identity,
-                                                    remote, peernid);
-                }
-        }
+                                                    peernid);
+               }
+       }
 
-        /* find out the setuid/setgid attempt */
-        setuid = (pud->pud_uid != pud->pud_fsuid);
+       /* find out the setuid/setgid attempt */
+       setuid = (pud->pud_uid != pud->pud_fsuid);
        setgid = ((pud->pud_gid != pud->pud_fsgid) ||
                  (ucred->uc_identity &&
                   (pud->pud_gid != ucred->uc_identity->mi_gid)));
@@ -245,10 +232,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
                GOTO(out, rc = -EACCES);
         }
 
-        /*
-         * NB: remote client not allowed to setgroups anyway.
-         */
-       if (!remote && perm & CFS_SETGRP_PERM) {
+       if (perm & CFS_SETGRP_PERM) {
                if (pud->pud_ngroups) {
                        /* setgroups for local client */
                        ucred->uc_ginfo = groups_alloc(pud->pud_ngroups);
@@ -279,14 +263,12 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
        mdt_root_squash(info, peernid);
 
        /* remove fs privilege for non-root user. */
-       if (ucred->uc_fsuid)
+       if (ucred->uc_fsuid && drop_fs_cap)
                ucred->uc_cap = pud->pud_cap & ~CFS_CAP_FS_MASK;
        else
                ucred->uc_cap = pud->pud_cap;
-       if (remote && !(perm & CFS_RMTOWN_PERM))
-               ucred->uc_cap &= ~(CFS_CAP_SYS_RESOURCE_MASK |
-                                  CFS_CAP_CHOWN_MASK);
        ucred->uc_valid = UCRED_NEW;
+       ucred_set_jobid(info, ucred);
 
        EXIT;
 
@@ -306,6 +288,38 @@ out:
        return rc;
 }
 
+/**
+ * Check whether allow the client to set supplementary group IDs or not.
+ *
+ * \param[in] info     pointer to the thread context
+ * \param[in] uc       pointer to the RPC user descriptor
+ *
+ * \retval             true if allow to set supplementary group IDs
+ * \retval             false for other cases
+ */
+bool allow_client_chgrp(struct mdt_thread_info *info, struct lu_ucred *uc)
+{
+       __u32 perm;
+
+       /* 1. If identity_upcall is disabled,
+        *    permit local client to do anything. */
+       if (is_identity_get_disabled(info->mti_mdt->mdt_identity_cache))
+               return true;
+
+       /* 2. If fail to get related identities, then forbid any client to
+        *    set supplementary group IDs. */
+       if (uc->uc_identity == NULL)
+               return false;
+
+       /* 3. Check the permission in the identities. */
+       perm = mdt_identity_get_perm(uc->uc_identity,
+                                    mdt_info_req(info)->rq_peer.nid);
+       if (perm & CFS_SETGRP_PERM)
+               return true;
+
+       return false;
+}
+
 int mdt_check_ucred(struct mdt_thread_info *info)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
@@ -315,7 +329,6 @@ int mdt_check_ucred(struct mdt_thread_info *info)
         struct md_identity      *identity = NULL;
         lnet_nid_t               peernid = req->rq_peer.nid;
         __u32                    perm = 0;
-        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
         int                      setuid;
         int                      setgid;
         int                      rc = 0;
@@ -326,62 +339,36 @@ int mdt_check_ucred(struct mdt_thread_info *info)
        if ((ucred->uc_valid == UCRED_OLD) || (ucred->uc_valid == UCRED_NEW))
                RETURN(0);
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                RETURN(0);
-
-        /* sanity check: if we use strong authentication, we expect the
-         * uid which client claimed is true */
-        if (remote) {
-               if (!uid_valid(make_kuid(&init_user_ns, req->rq_auth_mapped_uid))) {
-                        CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
-                        RETURN(-EACCES);
-                }
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               RETURN(0);
 
-                if (ptlrpc_user_desc_do_idmap(req, pud))
-                        RETURN(-EACCES);
-
-                if (req->rq_auth_mapped_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "remote client %s: auth/mapped uid %u/%u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               req->rq_auth_mapped_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        } else {
-                if (req->rq_auth_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "local client %s: auth uid %u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        }
+       /* sanity check: if we use strong authentication, we expect the
+        * uid which client claimed is true */
+       if (!flvr_is_rootonly(req->rq_flvr.sf_rpc) &&
+           req->rq_auth_uid != pud->pud_uid) {
+               CDEBUG(D_SEC, "local client %s: auth uid %u "
+                      "while client claims %u:%u/%u:%u\n",
+                      libcfs_nid2str(peernid), req->rq_auth_uid,
+                      pud->pud_uid, pud->pud_gid,
+                      pud->pud_fsuid, pud->pud_fsgid);
+               RETURN(-EACCES);
+       }
 
-        if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (remote) {
-                        CDEBUG(D_SEC, "remote client must run with identity_get "
-                               "enabled!\n");
-                        RETURN(-EACCES);
-                }
-                RETURN(0);
-        }
+       if (is_identity_get_disabled(mdt->mdt_identity_cache))
+               RETURN(0);
 
-        identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
-        if (IS_ERR(identity)) {
-                if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                             !remote)) {
-                        RETURN(0);
-                } else {
-                        CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
-                               pud->pud_uid);
-                        RETURN(-EACCES);
-               }
-        }
+       identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
+       if (IS_ERR(identity)) {
+               if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
+                       RETURN(0);
+               } else {
+                       CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
+                              pud->pud_uid);
+                       RETURN(-EACCES);
+               }
+       }
 
-        perm = mdt_identity_get_perm(identity, remote, peernid);
+       perm = mdt_identity_get_perm(identity, peernid);
         /* find out the setuid/setgid attempt */
         setuid = (pud->pud_uid != pud->pud_fsuid);
         setgid = (pud->pud_gid != pud->pud_fsgid ||
@@ -410,29 +397,20 @@ out:
         return rc;
 }
 
-static int old_init_ucred(struct mdt_thread_info *info,
-                         struct mdt_body *body)
+static int old_init_ucred_common(struct mdt_thread_info *info,
+                                struct lu_nodemap *nodemap,
+                                bool drop_fs_cap)
 {
-       struct lu_ucred *uc = mdt_ucred(info);
-       struct mdt_device  *mdt = info->mti_mdt;
-       struct md_identity *identity = NULL;
-
-       ENTRY;
+       struct lu_ucred         *uc = mdt_ucred(info);
+       struct mdt_device       *mdt = info->mti_mdt;
+       struct md_identity      *identity = NULL;
 
-       LASSERT(uc != NULL);
-       uc->uc_valid = UCRED_INVALID;
-       uc->uc_o_uid = uc->uc_uid = body->uid;
-       uc->uc_o_gid = uc->uc_gid = body->gid;
-       uc->uc_o_fsuid = uc->uc_fsuid = body->fsuid;
-       uc->uc_o_fsgid = uc->uc_fsgid = body->fsgid;
-       uc->uc_suppgids[0] = body->suppgid;
-       uc->uc_suppgids[1] = -1;
-       uc->uc_ginfo = NULL;
        if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
                identity = mdt_identity_get(mdt->mdt_identity_cache,
                                            uc->uc_fsuid);
                if (IS_ERR(identity)) {
-                       if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
+                       if (unlikely(PTR_ERR(identity) == -EREMCHG ||
+                                    uc->uc_cap & CFS_CAP_FS_MASK)) {
                                identity = NULL;
                        } else {
                                CDEBUG(D_SEC, "Deny access without identity: "
@@ -443,90 +421,149 @@ static int old_init_ucred(struct mdt_thread_info *info,
        }
        uc->uc_identity = identity;
 
+       if (nodemap && uc->uc_o_uid == nodemap->nm_squash_uid) {
+               uc->uc_fsuid = nodemap->nm_squash_uid;
+               uc->uc_fsgid = nodemap->nm_squash_gid;
+               uc->uc_cap = 0;
+               uc->uc_suppgids[0] = -1;
+               uc->uc_suppgids[1] = -1;
+       }
+
        /* process root_squash here. */
        mdt_root_squash(info, mdt_info_req(info)->rq_peer.nid);
 
        /* remove fs privilege for non-root user. */
-       if (uc->uc_fsuid)
-               uc->uc_cap = body->capability & ~CFS_CAP_FS_MASK;
-       else
-               uc->uc_cap = body->capability;
+       if (uc->uc_fsuid && drop_fs_cap)
+               uc->uc_cap &= ~CFS_CAP_FS_MASK;
        uc->uc_valid = UCRED_OLD;
+       ucred_set_jobid(info, uc);
 
-       RETURN(0);
+       return 0;
 }
 
-static int old_init_ucred_reint(struct mdt_thread_info *info)
+static int old_init_ucred(struct mdt_thread_info *info,
+                         struct mdt_body *body, bool drop_fs_cap)
 {
        struct lu_ucred *uc = mdt_ucred(info);
-       struct mdt_device  *mdt = info->mti_mdt;
-       struct md_identity *identity = NULL;
+       struct lu_nodemap *nodemap;
+       int rc;
+       ENTRY;
+
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
 
+       body->mbo_uid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_uid);
+       body->mbo_gid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_gid);
+       body->mbo_fsuid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_fsuid);
+       body->mbo_fsgid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_fsgid);
+
+       LASSERT(uc != NULL);
+       uc->uc_valid = UCRED_INVALID;
+       uc->uc_o_uid = uc->uc_uid = body->mbo_uid;
+       uc->uc_o_gid = uc->uc_gid = body->mbo_gid;
+       uc->uc_o_fsuid = uc->uc_fsuid = body->mbo_fsuid;
+       uc->uc_o_fsgid = uc->uc_fsgid = body->mbo_fsgid;
+       uc->uc_suppgids[0] = body->mbo_suppgid;
+       uc->uc_suppgids[1] = -1;
+       uc->uc_ginfo = NULL;
+       uc->uc_cap = body->mbo_capability;
+
+       rc = old_init_ucred_common(info, nodemap, drop_fs_cap);
+       nodemap_putref(nodemap);
+
+       RETURN(rc);
+}
+
+static int old_init_ucred_reint(struct mdt_thread_info *info)
+{
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct lu_nodemap *nodemap;
+       int rc;
        ENTRY;
 
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
+
        LASSERT(uc != NULL);
+
+       uc->uc_fsuid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                     NODEMAP_CLIENT_TO_FS, uc->uc_fsuid);
+       uc->uc_fsgid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                     NODEMAP_CLIENT_TO_FS, uc->uc_fsgid);
+
        uc->uc_valid = UCRED_INVALID;
        uc->uc_o_uid = uc->uc_o_fsuid = uc->uc_uid = uc->uc_fsuid;
        uc->uc_o_gid = uc->uc_o_fsgid = uc->uc_gid = uc->uc_fsgid;
        uc->uc_ginfo = NULL;
-       if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
-               identity = mdt_identity_get(mdt->mdt_identity_cache,
-                                           uc->uc_fsuid);
-               if (IS_ERR(identity)) {
-                       if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
-                               identity = NULL;
-                       } else {
-                               CDEBUG(D_SEC, "Deny access without identity: "
-                                      "uid %u\n", uc->uc_fsuid);
-                               RETURN(-EACCES);
-                       }
-               }
-       }
-       uc->uc_identity = identity;
-
-       /* process root_squash here. */
-       mdt_root_squash(info, mdt_info_req(info)->rq_peer.nid);
 
-       /* remove fs privilege for non-root user. */
-       if (uc->uc_fsuid)
-               uc->uc_cap &= ~CFS_CAP_FS_MASK;
-       uc->uc_valid = UCRED_OLD;
+       rc = old_init_ucred_common(info, nodemap, true); /* drop_fs_cap=true */
+       nodemap_putref(nodemap);
 
-       RETURN(0);
+       RETURN(rc);
 }
 
-int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body)
+static inline int __mdt_init_ucred(struct mdt_thread_info *info,
+                                  struct mdt_body *body,
+                                  bool drop_fs_cap)
 {
-        struct ptlrpc_request *req = mdt_info_req(info);
-       struct lu_ucred       *uc  = mdt_ucred(info);
+       struct ptlrpc_request   *req = mdt_info_req(info);
+       struct lu_ucred         *uc  = mdt_ucred(info);
 
        LASSERT(uc != NULL);
        if ((uc->uc_valid == UCRED_OLD) || (uc->uc_valid == UCRED_NEW))
                return 0;
 
-        mdt_exit_ucred(info);
+       mdt_exit_ucred(info);
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                return old_init_ucred(info, body);
-        else
-                return new_init_ucred(info, BODY_INIT, body);
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               return old_init_ucred(info, body, drop_fs_cap);
+       else
+               return new_init_ucred(info, BODY_INIT, body, drop_fs_cap);
+}
+
+int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body)
+{
+       return __mdt_init_ucred(info, body, true);
+}
+
+/* LU-6528 when "no_subtree_check" is set for NFS export, nfsd_set_fh_dentry()
+ * doesn't set correct fsuid explicitely, but raise capability to allow
+ * exportfs_decode_fh() to reconnect disconnected dentry into dcache. So for
+ * lookup (i.e. intent_getattr), we should keep FS capability, otherwise it
+ * will fail permission check. */
+int mdt_init_ucred_intent_getattr(struct mdt_thread_info *info,
+                                 struct mdt_body *body)
+{
+       return __mdt_init_ucred(info, body, false);
 }
 
 int mdt_init_ucred_reint(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request *req = mdt_info_req(info);
+       struct ptlrpc_request *req = mdt_info_req(info);
        struct lu_ucred       *uc  = mdt_ucred(info);
+       struct md_attr        *ma  = &info->mti_attr;
 
        LASSERT(uc != NULL);
        if ((uc->uc_valid == UCRED_OLD) || (uc->uc_valid == UCRED_NEW))
                return 0;
 
-        mdt_exit_ucred(info);
+       /* LU-5564: for normal close request, skip permission check */
+       if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE &&
+           !(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
+               uc->uc_cap |= CFS_CAP_FS_MASK;
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                return old_init_ucred_reint(info);
-        else
-                return new_init_ucred(info, REC_INIT, NULL);
+       mdt_exit_ucred(info);
+
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               return old_init_ucred_reint(info);
+       else
+               return new_init_ucred(info, REC_INIT, NULL, true);
 }
 
 /* copied from lov/lov_ea.c, just for debugging, will be removed later */
@@ -601,12 +638,13 @@ int mdt_fix_reply(struct mdt_thread_info *info)
         body = req_capsule_server_get(pill, &RMF_MDT_BODY);
         LASSERT(body != NULL);
 
-        if (body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE | OBD_MD_LINKNAME))
-                md_size = body->eadatasize;
-        else
-                md_size = 0;
+       if (body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE |
+                              OBD_MD_LINKNAME))
+               md_size = body->mbo_eadatasize;
+       else
+               md_size = 0;
 
-        acl_size = body->aclsize;
+       acl_size = body->mbo_aclsize;
 
         /* this replay - not send info to client */
        if (info->mti_spec.no_create) {
@@ -614,11 +652,8 @@ int mdt_fix_reply(struct mdt_thread_info *info)
                acl_size = 0;
        }
 
-        CDEBUG(D_INFO, "Shrink to md_size = %d cookie/acl_size = %d"
-                        " MDSCAPA = %llx, OSSCAPA = %llx\n",
-                        md_size, acl_size,
-                        (unsigned long long)(body->valid & OBD_MD_FLMDSCAPA),
-                        (unsigned long long)(body->valid & OBD_MD_FLOSSCAPA));
+       CDEBUG(D_INFO, "Shrink to md_size = %d cookie/acl_size = %d\n",
+              md_size, acl_size);
 /*
             &RMF_MDT_BODY,
             &RMF_MDT_MD,
@@ -631,15 +666,16 @@ int mdt_fix_reply(struct mdt_thread_info *info)
         /* MDT_MD buffer may be bigger than packed value, let's shrink all
          * buffers before growing it */
        if (info->mti_big_lmm_used) {
-                LASSERT(req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER));
+               /* big_lmm buffer may be used even without packing the result
+                * into reply, just for internal server needs */
+               if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
+                       md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                        RCL_SERVER);
 
-                /* free big lmm if md_size is not needed */
-               if (md_size == 0) {
+               /* free big lmm if md_size is not needed */
+               if (md_size == 0 || md_packed == 0) {
                        info->mti_big_lmm_used = 0;
                } else {
-                       md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
-                                                        RCL_SERVER);
-                       LASSERT(md_packed > 0);
                        /* buffer must be allocated separately */
                        LASSERT(info->mti_attr.ma_lmm !=
                                req_capsule_server_get(pill, &RMF_MDT_MD));
@@ -656,12 +692,12 @@ int mdt_fix_reply(struct mdt_thread_info *info)
                                    acl_size, RCL_SERVER);
 
         if (req_capsule_has_field(pill, &RMF_CAPA1, RCL_SERVER) &&
-            !(body->valid & OBD_MD_FLMDSCAPA))
-                req_capsule_shrink(pill, &RMF_CAPA1, 0, RCL_SERVER);
+           !(body->mbo_valid & OBD_MD_FLMDSCAPA))
+               req_capsule_shrink(pill, &RMF_CAPA1, 0, RCL_SERVER);
 
-        if (req_capsule_has_field(pill, &RMF_CAPA2, RCL_SERVER) &&
-            !(body->valid & OBD_MD_FLOSSCAPA))
-                req_capsule_shrink(pill, &RMF_CAPA2, 0, RCL_SERVER);
+       if (req_capsule_has_field(pill, &RMF_CAPA2, RCL_SERVER) &&
+           !(body->mbo_valid & OBD_MD_FLOSSCAPA))
+               req_capsule_shrink(pill, &RMF_CAPA2, 0, RCL_SERVER);
 
         /*
          * Some more field should be shrinked if needed.
@@ -680,7 +716,7 @@ int mdt_fix_reply(struct mdt_thread_info *info)
                         /* we can't answer with proper LOV EA, drop flags,
                          * the rc is also returned so this request is
                          * considered as failed */
-                        body->valid &= ~(OBD_MD_FLDIREA | OBD_MD_FLEASIZE);
+                       body->mbo_valid &= ~(OBD_MD_FLDIREA | OBD_MD_FLEASIZE);
                         /* don't return transno along with error */
                         lustre_msg_set_transno(pill->rc_req->rq_repmsg, 0);
                 } else {
@@ -717,7 +753,6 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
 {
         struct mdt_body       *repbody;
         const struct lu_attr *la = &ma->ma_attr;
-        int rc;
         ENTRY;
 
         repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
@@ -730,22 +765,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
                CERROR("No need in LOV EA upon unlink\n");
                dump_stack();
         }
-       repbody->eadatasize = 0;
-
-       if (info->mti_mdt->mdt_lut.lut_oss_capa &&
-           exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA &&
-           repbody->valid & OBD_MD_FLEASIZE) {
-                struct lustre_capa *capa;
-
-                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
-                LASSERT(capa);
-                capa->lc_opc = CAPA_OPC_OSS_DESTROY;
-                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
-                if (rc)
-                        RETURN(rc);
-
-                repbody->valid |= OBD_MD_FLOSSCAPA;
-        }
+       repbody->mbo_eadatasize = 0;
 
         RETURN(0);
 }
@@ -797,47 +817,6 @@ static __u64 mdt_attr_valid_xlate(__u64 in, struct mdt_reint_record *rr,
        return out;
 }
 
-void mdt_set_capainfo(struct mdt_thread_info *info, int offset,
-                     const struct lu_fid *fid, struct lustre_capa *capa)
-{
-       struct lu_capainfo *lci;
-
-       LASSERT(offset >= 0 && offset < LU_CAPAINFO_MAX);
-       if (!info->mti_mdt->mdt_lut.lut_mds_capa ||
-           !(exp_connect_flags(info->mti_exp) & OBD_CONNECT_MDS_CAPA))
-               return;
-
-       lci = lu_capainfo_get(info->mti_env);
-       LASSERT(lci);
-       lci->lci_fid[offset]  = *fid;
-       lci->lci_capa[offset] = capa;
-}
-
-#ifdef DEBUG_CAPA
-void mdt_dump_capainfo(struct mdt_thread_info *info)
-{
-       struct lu_capainfo *lci = lu_capainfo_get(info->mti_env);
-       int i;
-
-       if (lci == NULL)
-               return;
-
-       for (i = 0; i < LU_CAPAINFO_MAX; i++) {
-               if (lci->lci_capa[i] == NULL) {
-                       CERROR("no capa for index %d "DFID"\n",
-                              i, PFID(&lci->lci_fid[i]));
-                       continue;
-               }
-               if (lci->lci_capa[i] == BYPASS_CAPA) {
-                       CERROR("bypass for index %d "DFID"\n",
-                              i, PFID(&lci->lci_fid[i]));
-                       continue;
-               }
-               DEBUG_CAPA(D_ERROR, lci->lci_capa[i], "index %d", i);
-       }
-}
-#endif /* DEBUG_CAPA */
-
 /* unpacking */
 
 int mdt_name_unpack(struct req_capsule *pill,
@@ -869,12 +848,13 @@ int mdt_name_unpack(struct req_capsule *pill,
 
 static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
 {
-       struct lu_ucred         *uc  = mdt_ucred(info);
-        struct md_attr          *ma = &info->mti_attr;
-        struct lu_attr          *la = &ma->ma_attr;
-        struct req_capsule      *pill = info->mti_pill;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct mdt_rec_setattr  *rec;
+       struct lu_ucred         *uc = mdt_ucred(info);
+       struct md_attr          *ma = &info->mti_attr;
+       struct lu_attr          *la = &ma->ma_attr;
+       struct req_capsule      *pill = info->mti_pill;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct mdt_rec_setattr  *rec;
+       struct lu_nodemap       *nodemap;
         ENTRY;
 
         CLASSERT(sizeof(struct mdt_rec_setattr)== sizeof(struct mdt_rec_reint));
@@ -891,30 +871,25 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
 
         rr->rr_fid1 = &rec->sa_fid;
        la->la_valid = mdt_attr_valid_xlate(rec->sa_valid, rr, ma);
-       /*  If MDS_ATTR_xTIME is set without MDS_ATTR_xTIME_SET and
-        *  the client does not have OBD_CONNECT_FULL20, convert it
-        *  to LA_xTIME. LU-3036 */
-       if (!(exp_connect_flags(info->mti_exp) & OBD_CONNECT_FULL20)) {
-               if (!(rec->sa_valid & MDS_ATTR_ATIME_SET) &&
-                    (rec->sa_valid & MDS_ATTR_ATIME))
-                       la->la_valid |= LA_ATIME;
-               if (!(rec->sa_valid & MDS_ATTR_MTIME_SET) &&
-                    (rec->sa_valid & MDS_ATTR_MTIME))
-                       la->la_valid |= LA_MTIME;
-               if (!(rec->sa_valid & MDS_ATTR_CTIME_SET) &&
-                    (rec->sa_valid & MDS_ATTR_CTIME))
-                       la->la_valid |= LA_CTIME;
-       }
-        la->la_mode  = rec->sa_mode;
-        la->la_flags = rec->sa_attr_flags;
-        la->la_uid   = rec->sa_uid;
-        la->la_gid   = rec->sa_gid;
-        la->la_size  = rec->sa_size;
-        la->la_blocks = rec->sa_blocks;
-        la->la_ctime = rec->sa_ctime;
-        la->la_atime = rec->sa_atime;
-        la->la_mtime = rec->sa_mtime;
-        ma->ma_valid = MA_INODE;
+       la->la_mode  = rec->sa_mode;
+       la->la_flags = rec->sa_attr_flags;
+
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
+
+       la->la_uid   = nodemap_map_id(nodemap, NODEMAP_UID,
+                                     NODEMAP_CLIENT_TO_FS, rec->sa_uid);
+       la->la_gid   = nodemap_map_id(nodemap, NODEMAP_GID,
+                                     NODEMAP_CLIENT_TO_FS, rec->sa_gid);
+       nodemap_putref(nodemap);
+
+       la->la_size  = rec->sa_size;
+       la->la_blocks = rec->sa_blocks;
+       la->la_ctime = rec->sa_ctime;
+       la->la_atime = rec->sa_atime;
+       la->la_mtime = rec->sa_mtime;
+       ma->ma_valid = MA_INODE;
 
        if (rec->sa_bias & MDS_DATA_MODIFIED)
                ma->ma_attr_flags |= MDS_DATA_MODIFIED;
@@ -926,24 +901,31 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
        else
                ma->ma_attr_flags &= ~MDS_HSM_RELEASE;
 
-       if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-               mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                req_capsule_client_get(pill, &RMF_CAPA1));
+       if (rec->sa_bias & MDS_CLOSE_LAYOUT_SWAP)
+               ma->ma_attr_flags |= MDS_CLOSE_LAYOUT_SWAP;
+       else
+               ma->ma_attr_flags &= ~MDS_CLOSE_LAYOUT_SWAP;
 
        RETURN(0);
 }
 
-static int mdt_ioepoch_unpack(struct mdt_thread_info *info)
+static int mdt_close_handle_unpack(struct mdt_thread_info *info)
 {
-        struct req_capsule *pill = info->mti_pill;
-        ENTRY;
+       struct req_capsule *pill = info->mti_pill;
+       struct mdt_ioepoch *ioepoch;
+       ENTRY;
 
-        if (req_capsule_get_size(pill, &RMF_MDT_EPOCH, RCL_CLIENT))
-                info->mti_ioepoch =
-                        req_capsule_client_get(pill, &RMF_MDT_EPOCH);
-        else
-                info->mti_ioepoch = NULL;
-        RETURN(info->mti_ioepoch == NULL ? -EFAULT : 0);
+       if (req_capsule_get_size(pill, &RMF_MDT_EPOCH, RCL_CLIENT))
+               ioepoch = req_capsule_client_get(pill, &RMF_MDT_EPOCH);
+       else
+               ioepoch = NULL;
+
+       if (ioepoch == NULL)
+               RETURN(-EPROTO);
+
+       info->mti_close_handle = ioepoch->mio_handle;
+
+       RETURN(0);
 }
 
 static inline int mdt_dlmreq_unpack(struct mdt_thread_info *info) {
@@ -970,9 +952,6 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info)
         if (rc)
                 RETURN(rc);
 
-        /* Epoch may be absent */
-        mdt_ioepoch_unpack(info);
-
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
                 rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA);
                 rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
@@ -999,16 +978,16 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info)
        RETURN(rc);
 }
 
-static int mdt_hsm_release_unpack(struct mdt_thread_info *info)
+static int mdt_intent_close_unpack(struct mdt_thread_info *info)
 {
        struct md_attr          *ma = &info->mti_attr;
-       struct req_capsule      *pill = info->mti_pill;
+       struct req_capsule      *pill = info->mti_pill;
        ENTRY;
 
-       if (!(ma->ma_attr_flags & MDS_HSM_RELEASE))
+       if (!(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
                RETURN(0);
 
-       req_capsule_extend(pill, &RQF_MDS_RELEASE_CLOSE);
+       req_capsule_extend(pill, &RQF_MDS_INTENT_CLOSE);
 
        if (!(req_capsule_has_field(pill, &RMF_CLOSE_DATA, RCL_CLIENT) &&
            req_capsule_field_present(pill, &RMF_CLOSE_DATA, RCL_CLIENT)))
@@ -1019,10 +998,10 @@ static int mdt_hsm_release_unpack(struct mdt_thread_info *info)
 
 int mdt_close_unpack(struct mdt_thread_info *info)
 {
-        int rc;
-        ENTRY;
+       int rc;
+       ENTRY;
 
-        rc = mdt_ioepoch_unpack(info);
+       rc = mdt_close_handle_unpack(info);
         if (rc)
                 RETURN(rc);
 
@@ -1030,7 +1009,7 @@ int mdt_close_unpack(struct mdt_thread_info *info)
        if (rc)
                RETURN(rc);
 
-       rc = mdt_hsm_release_unpack(info);
+       rc = mdt_intent_close_unpack(info);
        if (rc)
                RETURN(rc);
 
@@ -1075,11 +1054,6 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
         memset(&sp->u, 0, sizeof(sp->u));
         sp->sp_cr_flags = get_mrc_cr_flags(rec);
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA);
-
        rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
        if (rc < 0)
                RETURN(rc);
@@ -1095,7 +1069,7 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
                 if (tgt == NULL)
                         RETURN(-EFAULT);
         } else {
-                req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_RMT_ACL);
+               req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_ACL);
                if (S_ISDIR(attr->la_mode) &&
                    req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) > 0) {
                        sp->u.sp_ea.eadata =
@@ -1141,13 +1115,6 @@ static int mdt_link_unpack(struct mdt_thread_info *info)
         attr->la_mtime = rec->lk_time;
         attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
-
        rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
        if (rc < 0)
                RETURN(rc);
@@ -1189,10 +1156,6 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info)
         attr->la_mode  = rec->ul_mode;
         attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-
        rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
        if (rc < 0)
                RETURN(rc);
@@ -1247,13 +1210,6 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
         attr->la_mode = rec->rn_mode;
         attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
-
        rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
        if (rc < 0)
                RETURN(rc);
@@ -1267,6 +1223,14 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
         else
                 ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
 
+       if (rec->rn_bias & MDS_RENAME_MIGRATE) {
+               req_capsule_extend(info->mti_pill, &RQF_MDS_REINT_MIGRATE);
+               rc = mdt_close_handle_unpack(info);
+               if (rc < 0)
+                       RETURN(rc);
+               info->mti_spec.sp_migrate_close = 1;
+       }
+
         info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
 
 
@@ -1340,28 +1304,9 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
         /* Do not trigger ASSERTION if client miss to set such flags. */
         if (unlikely(info->mti_spec.sp_cr_flags == 0))
                 RETURN(-EPROTO);
-        info->mti_replayepoch = rec->cr_ioepoch;
 
         info->mti_cross_ref = !!(rec->cr_bias & MDS_CROSS_REF);
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_is_replay(req) &&
-            req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) {
-#if 0
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
-#else
-                /*
-                 * FIXME: capa in replay open request might have expired,
-                 * bypass capa check. Security hole?
-                 */
-                mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA);
-                mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA);
-#endif
-        }
-
        mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, MNF_FIX_ANON);
 
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
@@ -1421,12 +1366,6 @@ static int mdt_setxattr_unpack(struct mdt_thread_info *info)
         attr->la_size = rec->sx_size;
         attr->la_flags = rec->sx_flags;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        else
-                mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA);
-
        rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
        if (rc < 0)
                RETURN(rc);