Whamcloud - gitweb
LU-12212 mdt: fix SECCTX reply buffer handling
[fs/lustre-release.git] / lustre / mdt / mdt_lib.c
index f5736a1..a7d5130 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 
 #define DEBUG_SUBSYSTEM S_MDS
 
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
+#endif
 #include "mdt_internal.h"
-#include <lnet/lib-lnet.h>
-
+#include <uapi/linux/lnet/nidstr.h>
+#include <lustre_nodemap.h>
 
 typedef enum ucred_init_type {
         NONE_INIT       = 0,
@@ -58,28 +58,34 @@ typedef enum ucred_init_type {
         REC_INIT        = 2
 } ucred_init_type_t;
 
+static __u64 get_mrc_cr_flags(struct mdt_rec_create *mrc)
+{
+       return (__u64)(mrc->cr_flags_l) | ((__u64)mrc->cr_flags_h << 32);
+}
+
 void mdt_exit_ucred(struct mdt_thread_info *info)
 {
-        struct md_ucred   *uc  = mdt_ucred(info);
-        struct mdt_device *mdt = info->mti_mdt;
-
-        if (uc->mu_valid != UCRED_INIT) {
-                uc->mu_suppgids[0] = uc->mu_suppgids[1] = -1;
-                if (uc->mu_ginfo) {
-                        cfs_put_group_info(uc->mu_ginfo);
-                        uc->mu_ginfo = NULL;
-                }
-                if (uc->mu_identity) {
-                        mdt_identity_put(mdt->mdt_identity_cache,
-                                         uc->mu_identity);
-                        uc->mu_identity = NULL;
-                }
-                uc->mu_valid = UCRED_INIT;
-        }
+       struct lu_ucred   *uc  = mdt_ucred(info);
+       struct mdt_device *mdt = info->mti_mdt;
+
+       LASSERT(uc != NULL);
+       if (uc->uc_valid != UCRED_INIT) {
+               uc->uc_suppgids[0] = uc->uc_suppgids[1] = -1;
+               if (uc->uc_ginfo) {
+                       put_group_info(uc->uc_ginfo);
+                       uc->uc_ginfo = NULL;
+               }
+               if (uc->uc_identity) {
+                       mdt_identity_put(mdt->mdt_identity_cache,
+                                        uc->uc_identity);
+                       uc->uc_identity = NULL;
+               }
+               uc->uc_valid = UCRED_INIT;
+       }
 }
 
 static int match_nosquash_list(struct rw_semaphore *sem,
-                              cfs_list_t *nidlist,
+                              struct list_head *nidlist,
                               lnet_nid_t peernid)
 {
        int rc;
@@ -93,213 +99,290 @@ static int match_nosquash_list(struct rw_semaphore *sem,
 /* root_squash for inter-MDS operations */
 static int mdt_root_squash(struct mdt_thread_info *info, lnet_nid_t peernid)
 {
-        struct md_ucred *ucred = mdt_ucred(info);
-        ENTRY;
+       struct lu_ucred *ucred = mdt_ucred(info);
+       struct root_squash_info *squash = &info->mti_mdt->mdt_squash;
+       ENTRY;
 
-        if (!info->mti_mdt->mdt_squash_uid || ucred->mu_fsuid)
-                RETURN(0);
+       LASSERT(ucred != NULL);
+       if (!squash->rsi_uid || ucred->uc_fsuid)
+               RETURN(0);
 
-        if (match_nosquash_list(&info->mti_mdt->mdt_squash_sem,
-                                &info->mti_mdt->mdt_nosquash_nids,
-                                peernid)) {
-                CDEBUG(D_OTHER, "%s is in nosquash_nids list\n",
-                       libcfs_nid2str(peernid));
-                RETURN(0);
-        }
+       if (match_nosquash_list(&squash->rsi_sem,
+                               &squash->rsi_nosquash_nids,
+                               peernid)) {
+               CDEBUG(D_OTHER, "%s is in nosquash_nids list\n",
+                      libcfs_nid2str(peernid));
+               RETURN(0);
+       }
 
-        CDEBUG(D_OTHER, "squash req from %s, (%d:%d/%x)=>(%d:%d/%x)\n",
-               libcfs_nid2str(peernid),
-               ucred->mu_fsuid, ucred->mu_fsgid, ucred->mu_cap,
-               info->mti_mdt->mdt_squash_uid, info->mti_mdt->mdt_squash_gid,
-               0);
+       CDEBUG(D_OTHER, "squash req from %s, (%d:%d/%x)=>(%d:%d/%x)\n",
+              libcfs_nid2str(peernid),
+              ucred->uc_fsuid, ucred->uc_fsgid, ucred->uc_cap,
+              squash->rsi_uid, squash->rsi_gid, 0);
 
-        ucred->mu_fsuid = info->mti_mdt->mdt_squash_uid;
-        ucred->mu_fsgid = info->mti_mdt->mdt_squash_gid;
-        ucred->mu_cap = 0;
-        ucred->mu_suppgids[0] = -1;
-        ucred->mu_suppgids[1] = -1;
+       ucred->uc_fsuid = squash->rsi_uid;
+       ucred->uc_fsgid = squash->rsi_gid;
+       ucred->uc_cap = 0;
+       ucred->uc_suppgids[0] = -1;
+       ucred->uc_suppgids[1] = -1;
 
-        RETURN(0);
+       RETURN(0);
 }
 
-static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
-                          void *buf)
+static void ucred_set_jobid(struct mdt_thread_info *info, struct lu_ucred *uc)
 {
-        struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_device       *mdt = info->mti_mdt;
-        struct ptlrpc_user_desc *pud = req->rq_user_desc;
-        struct md_ucred         *ucred = mdt_ucred(info);
-        lnet_nid_t               peernid = req->rq_peer.nid;
-        __u32                    perm = 0;
-        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
-        int                      setuid;
-        int                      setgid;
-        int                      rc = 0;
+       struct ptlrpc_request   *req = mdt_info_req(info);
+       const char              *jobid = mdt_req_get_jobid(req);
+
+       /* set jobid if specified. */
+       if (jobid)
+               strlcpy(uc->uc_jobid, jobid, sizeof(uc->uc_jobid));
+       else
+               uc->uc_jobid[0] = '\0';
+}
 
-        ENTRY;
+static void ucred_set_nid(struct mdt_thread_info *info, struct lu_ucred *uc)
+{
+       if (info && info->mti_exp && info->mti_exp->exp_connection)
+               uc->uc_nid = info->mti_exp->exp_connection->c_peer.nid;
+       else
+               uc->uc_nid = LNET_NID_ANY;
+}
 
-        LASSERT(req->rq_auth_gss);
-        LASSERT(!req->rq_auth_usr_mdt);
-        LASSERT(req->rq_user_desc);
+static void ucred_set_audit_enabled(struct mdt_thread_info *info,
+                                   struct lu_ucred *uc)
+{
+       struct lu_nodemap *nodemap = NULL;
+       bool audit = true;
+
+       if (info && info->mti_exp) {
+               nodemap = nodemap_get_from_exp(info->mti_exp);
+               if (nodemap && !IS_ERR(nodemap)) {
+                       audit = nodemap->nmf_enable_audit;
+                       nodemap_putref(nodemap);
+               }
+       }
 
-        ucred->mu_valid = UCRED_INVALID;
+       uc->uc_enable_audit = audit;
+}
 
-        ucred->mu_o_uid   = pud->pud_uid;
-        ucred->mu_o_gid   = pud->pud_gid;
-        ucred->mu_o_fsuid = pud->pud_fsuid;
-        ucred->mu_o_fsgid = pud->pud_fsgid;
+static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
+                         void *buf, bool drop_fs_cap)
+{
+       struct ptlrpc_request *req = mdt_info_req(info);
+       struct mdt_device *mdt = info->mti_mdt;
+       struct ptlrpc_user_desc *pud = req->rq_user_desc;
+       struct lu_ucred *ucred = mdt_ucred(info);
+       struct lu_nodemap *nodemap;
+       lnet_nid_t peernid = req->rq_peer.nid;
+       __u32 perm = 0;
+       int setuid;
+       int setgid;
+       bool is_nm_gid_squashed = false;
+       int rc = 0;
 
-        if (type == BODY_INIT) {
-                struct mdt_body *body = (struct mdt_body *)buf;
+       ENTRY;
 
-                ucred->mu_suppgids[0] = body->suppgid;
-                ucred->mu_suppgids[1] = -1;
-        }
+       LASSERT(req->rq_auth_gss);
+       LASSERT(!req->rq_auth_usr_mdt);
+       LASSERT(req->rq_user_desc);
+       LASSERT(ucred != NULL);
+
+       ucred->uc_valid = UCRED_INVALID;
+
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
+
+       pud->pud_uid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, pud->pud_uid);
+       pud->pud_gid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, pud->pud_gid);
+       pud->pud_fsuid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, pud->pud_fsuid);
+       pud->pud_fsgid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, pud->pud_fsgid);
+
+       ucred->uc_o_uid = pud->pud_uid;
+       ucred->uc_o_gid = pud->pud_gid;
+       ucred->uc_o_fsuid = pud->pud_fsuid;
+       ucred->uc_o_fsgid = pud->pud_fsgid;
+
+       if (nodemap && ucred->uc_o_uid == nodemap->nm_squash_uid) {
+               /* deny access before we get identity ref */
+               if (nodemap->nmf_deny_unknown) {
+                       nodemap_putref(nodemap);
+                       RETURN(-EACCES);
+               }
 
-        /* sanity check: we expect the uid which client claimed is true */
-        if (remote) {
-                if (req->rq_auth_mapped_uid == INVALID_UID) {
-                        CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
-                        RETURN(-EACCES);
-                }
+               ucred->uc_fsuid = nodemap->nm_squash_uid;
+               ucred->uc_fsgid = nodemap->nm_squash_gid;
+               ucred->uc_cap = 0;
+               ucred->uc_suppgids[0] = -1;
+               ucred->uc_suppgids[1] = -1;
+       }
 
-                if (ptlrpc_user_desc_do_idmap(req, pud))
-                        RETURN(-EACCES);
-
-                if (req->rq_auth_mapped_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "remote client %s: auth/mapped uid %u/%u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               req->rq_auth_mapped_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        } else {
-                if (req->rq_auth_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "local client %s: auth uid %u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        }
+       if (nodemap && ucred->uc_o_gid == nodemap->nm_squash_gid)
+               is_nm_gid_squashed = true;
 
-        if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (remote) {
-                        CDEBUG(D_SEC, "remote client must run with identity_get "
-                               "enabled!\n");
-                        RETURN(-EACCES);
-                } else {
-                        ucred->mu_identity = NULL;
-                        perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
-                               CFS_SETGRP_PERM;
-                }
-        } else {
-                struct md_identity *identity;
-
-                identity = mdt_identity_get(mdt->mdt_identity_cache,
-                                            pud->pud_uid);
-                if (IS_ERR(identity)) {
-                        if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                                     !remote)) {
-                                ucred->mu_identity = NULL;
-                                perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
-                                       CFS_SETGRP_PERM;
-                        } else {
-                                CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
-                                       pud->pud_uid);
-                                RETURN(-EACCES);
-                        }
-                } else {
-                        ucred->mu_identity = identity;
-                        perm = mdt_identity_get_perm(ucred->mu_identity,
-                                                     remote, peernid);
-                }
-        }
+       nodemap_putref(nodemap);
 
-        /* find out the setuid/setgid attempt */
-        setuid = (pud->pud_uid != pud->pud_fsuid);
-        setgid = ((pud->pud_gid != pud->pud_fsgid) ||
-                  (ucred->mu_identity &&
-                  (pud->pud_gid != ucred->mu_identity->mi_gid)));
+       if (type == BODY_INIT) {
+               struct mdt_body *body = (struct mdt_body *)buf;
 
-        /* check permission of setuid */
-        if (setuid && !(perm & CFS_SETUID_PERM)) {
-                CDEBUG(D_SEC, "mdt blocked setuid attempt (%u -> %u) from %s\n",
-                       pud->pud_uid, pud->pud_fsuid, libcfs_nid2str(peernid));
-                GOTO(out, rc = -EACCES);
-        }
+               ucred->uc_suppgids[0] = body->mbo_suppgid;
+               ucred->uc_suppgids[1] = -1;
+       }
 
-        /* check permission of setgid */
-        if (setgid && !(perm & CFS_SETGID_PERM)) {
-                CDEBUG(D_SEC, "mdt blocked setgid attempt (%u:%u/%u:%u -> %u) "
-                       "from %s\n", pud->pud_uid, pud->pud_gid,
-                       pud->pud_fsuid, pud->pud_fsgid,
-                       ucred->mu_identity->mi_gid, libcfs_nid2str(peernid));
-                GOTO(out, rc = -EACCES);
-        }
+       if (!flvr_is_rootonly(req->rq_flvr.sf_rpc) &&
+           req->rq_auth_uid != pud->pud_uid) {
+               CDEBUG(D_SEC, "local client %s: auth uid %u "
+                      "while client claims %u:%u/%u:%u\n",
+                      libcfs_nid2str(peernid), req->rq_auth_uid,
+                      pud->pud_uid, pud->pud_gid,
+                      pud->pud_fsuid, pud->pud_fsgid);
+               RETURN(-EACCES);
+       }
 
-        /*
-         * NB: remote client not allowed to setgroups anyway.
-         */
-        if (!remote && perm & CFS_SETGRP_PERM) {
-                if (pud->pud_ngroups) {
-                        /* setgroups for local client */
-                        ucred->mu_ginfo = cfs_groups_alloc(pud->pud_ngroups);
-                        if (!ucred->mu_ginfo) {
-                                CERROR("failed to alloc %d groups\n",
-                                       pud->pud_ngroups);
-                                GOTO(out, rc = -ENOMEM);
-                        }
-
-                        lustre_groups_from_list(ucred->mu_ginfo,
-                                                pud->pud_groups);
-                        lustre_groups_sort(ucred->mu_ginfo);
-                } else {
-                        ucred->mu_ginfo = NULL;
-                }
-        } else {
-                ucred->mu_suppgids[0] = -1;
-                ucred->mu_suppgids[1] = -1;
-                ucred->mu_ginfo = NULL;
-        }
+       if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
+               ucred->uc_identity = NULL;
+               perm = CFS_SETUID_PERM | CFS_SETGID_PERM | CFS_SETGRP_PERM;
+       } else {
+               struct md_identity *identity;
+
+               identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                           pud->pud_uid);
+               if (IS_ERR(identity)) {
+                       if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
+                               ucred->uc_identity = NULL;
+                               perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
+                                      CFS_SETGRP_PERM;
+                       } else {
+                               CDEBUG(D_SEC,
+                                      "Deny access without identity: uid %u\n",
+                                      pud->pud_uid);
+                               RETURN(-EACCES);
+                       }
+               } else {
+                       ucred->uc_identity = identity;
+                       perm = mdt_identity_get_perm(ucred->uc_identity,
+                                                    peernid);
+               }
+       }
 
-        ucred->mu_uid   = pud->pud_uid;
-        ucred->mu_gid   = pud->pud_gid;
-        ucred->mu_fsuid = pud->pud_fsuid;
-        ucred->mu_fsgid = pud->pud_fsgid;
+       /* find out the setuid/setgid attempt */
+       setuid = (pud->pud_uid != pud->pud_fsuid);
+       setgid = ((pud->pud_gid != pud->pud_fsgid) ||
+                 (ucred->uc_identity &&
+                  (pud->pud_gid != ucred->uc_identity->mi_gid)));
+
+       /* check permission of setuid */
+       if (setuid && !(perm & CFS_SETUID_PERM)) {
+               CDEBUG(D_SEC, "mdt blocked setuid attempt (%u -> %u) from %s\n",
+                      pud->pud_uid, pud->pud_fsuid, libcfs_nid2str(peernid));
+               GOTO(out, rc = -EACCES);
+       }
+
+       /* check permission of setgid */
+       if (setgid && !(perm & CFS_SETGID_PERM)) {
+               CDEBUG(D_SEC, "mdt blocked setgid attempt (%u:%u/%u:%u -> %u) "
+                      "from %s\n", pud->pud_uid, pud->pud_gid,
+                      pud->pud_fsuid, pud->pud_fsgid,
+                      ucred->uc_identity->mi_gid, libcfs_nid2str(peernid));
+               GOTO(out, rc = -EACCES);
+       }
 
-        /* process root_squash here. */
-        mdt_root_squash(info, peernid);
+       if (perm & CFS_SETGRP_PERM) {
+               /* only set groups if GID is not squashed */
+               if (pud->pud_ngroups && !is_nm_gid_squashed) {
+                       /* setgroups for local client */
+                       ucred->uc_ginfo = groups_alloc(pud->pud_ngroups);
+                       if (!ucred->uc_ginfo) {
+                               CERROR("failed to alloc %d groups\n",
+                                      pud->pud_ngroups);
+                               GOTO(out, rc = -ENOMEM);
+                       }
+
+                       lustre_groups_from_list(ucred->uc_ginfo,
+                                               pud->pud_groups);
+                       lustre_groups_sort(ucred->uc_ginfo);
+               } else {
+                       ucred->uc_suppgids[0] = -1;
+                       ucred->uc_suppgids[1] = -1;
+                       ucred->uc_ginfo = NULL;
+               }
+       } else {
+               ucred->uc_suppgids[0] = -1;
+               ucred->uc_suppgids[1] = -1;
+               ucred->uc_ginfo = NULL;
+       }
 
-        /* remove fs privilege for non-root user. */
-        if (ucred->mu_fsuid)
-                ucred->mu_cap = pud->pud_cap & ~CFS_CAP_FS_MASK;
-        else
-                ucred->mu_cap = pud->pud_cap;
-        if (remote && !(perm & CFS_RMTOWN_PERM))
-                ucred->mu_cap &= ~(CFS_CAP_SYS_RESOURCE_MASK |
-                                   CFS_CAP_CHOWN_MASK);
-        ucred->mu_valid = UCRED_NEW;
+       ucred->uc_uid = pud->pud_uid;
+       ucred->uc_gid = pud->pud_gid;
+       ucred->uc_fsuid = pud->pud_fsuid;
+       ucred->uc_fsgid = pud->pud_fsgid;
 
-        EXIT;
+       /* process root_squash here. */
+       mdt_root_squash(info, peernid);
+
+       /* remove fs privilege for non-root user. */
+       if (ucred->uc_fsuid && drop_fs_cap)
+               ucred->uc_cap = pud->pud_cap & ~CFS_CAP_FS_MASK;
+       else
+               ucred->uc_cap = pud->pud_cap;
+       ucred->uc_valid = UCRED_NEW;
+       ucred_set_jobid(info, ucred);
+       ucred_set_nid(info, ucred);
+       ucred_set_audit_enabled(info, ucred);
+
+       EXIT;
 
 out:
-        if (rc) {
-                if (ucred->mu_ginfo) {
-                        cfs_put_group_info(ucred->mu_ginfo);
-                        ucred->mu_ginfo = NULL;
-                }
-                if (ucred->mu_identity) {
-                        mdt_identity_put(mdt->mdt_identity_cache,
-                                         ucred->mu_identity);
-                        ucred->mu_identity = NULL;
-                }
-        }
+       if (rc) {
+               if (ucred->uc_ginfo) {
+                       put_group_info(ucred->uc_ginfo);
+                       ucred->uc_ginfo = NULL;
+               }
+               if (ucred->uc_identity) {
+                       mdt_identity_put(mdt->mdt_identity_cache,
+                                        ucred->uc_identity);
+                       ucred->uc_identity = NULL;
+               }
+       }
 
-        return rc;
+       return rc;
+}
+
+/**
+ * Check whether allow the client to set supplementary group IDs or not.
+ *
+ * \param[in] info     pointer to the thread context
+ * \param[in] uc       pointer to the RPC user descriptor
+ *
+ * \retval             true if allow to set supplementary group IDs
+ * \retval             false for other cases
+ */
+bool allow_client_chgrp(struct mdt_thread_info *info, struct lu_ucred *uc)
+{
+       __u32 perm;
+
+       /* 1. If identity_upcall is disabled,
+        *    permit local client to do anything. */
+       if (is_identity_get_disabled(info->mti_mdt->mdt_identity_cache))
+               return true;
+
+       /* 2. If fail to get related identities, then forbid any client to
+        *    set supplementary group IDs. */
+       if (uc->uc_identity == NULL)
+               return false;
+
+       /* 3. Check the permission in the identities. */
+       perm = mdt_identity_get_perm(uc->uc_identity,
+                                    mdt_info_req(info)->rq_peer.nid);
+       if (perm & CFS_SETGRP_PERM)
+               return true;
+
+       return false;
 }
 
 int mdt_check_ucred(struct mdt_thread_info *info)
@@ -307,76 +390,50 @@ int mdt_check_ucred(struct mdt_thread_info *info)
         struct ptlrpc_request   *req = mdt_info_req(info);
         struct mdt_device       *mdt = info->mti_mdt;
         struct ptlrpc_user_desc *pud = req->rq_user_desc;
-        struct md_ucred         *ucred = mdt_ucred(info);
+       struct lu_ucred         *ucred = mdt_ucred(info);
         struct md_identity      *identity = NULL;
         lnet_nid_t               peernid = req->rq_peer.nid;
         __u32                    perm = 0;
-        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
         int                      setuid;
         int                      setgid;
         int                      rc = 0;
 
         ENTRY;
 
-        if ((ucred->mu_valid == UCRED_OLD) || (ucred->mu_valid == UCRED_NEW))
-                RETURN(0);
-
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                RETURN(0);
-
-        /* sanity check: if we use strong authentication, we expect the
-         * uid which client claimed is true */
-        if (remote) {
-                if (req->rq_auth_mapped_uid == INVALID_UID) {
-                        CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
-                        RETURN(-EACCES);
-                }
-
-                if (ptlrpc_user_desc_do_idmap(req, pud))
-                        RETURN(-EACCES);
-
-                if (req->rq_auth_mapped_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "remote client %s: auth/mapped uid %u/%u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               req->rq_auth_mapped_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        } else {
-                if (req->rq_auth_uid != pud->pud_uid) {
-                        CDEBUG(D_SEC, "local client %s: auth uid %u "
-                               "while client claims %u:%u/%u:%u\n",
-                               libcfs_nid2str(peernid), req->rq_auth_uid,
-                               pud->pud_uid, pud->pud_gid,
-                               pud->pud_fsuid, pud->pud_fsgid);
-                        RETURN(-EACCES);
-                }
-        }
-
-        if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (remote) {
-                        CDEBUG(D_SEC, "remote client must run with identity_get "
-                               "enabled!\n");
-                        RETURN(-EACCES);
-                }
-                RETURN(0);
-        }
+       LASSERT(ucred != NULL);
+       if ((ucred->uc_valid == UCRED_OLD) || (ucred->uc_valid == UCRED_NEW))
+               RETURN(0);
+
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               RETURN(0);
+
+       /* sanity check: if we use strong authentication, we expect the
+        * uid which client claimed is true */
+       if (!flvr_is_rootonly(req->rq_flvr.sf_rpc) &&
+           req->rq_auth_uid != pud->pud_uid) {
+               CDEBUG(D_SEC, "local client %s: auth uid %u "
+                      "while client claims %u:%u/%u:%u\n",
+                      libcfs_nid2str(peernid), req->rq_auth_uid,
+                      pud->pud_uid, pud->pud_gid,
+                      pud->pud_fsuid, pud->pud_fsgid);
+               RETURN(-EACCES);
+       }
 
-        identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
-        if (IS_ERR(identity)) {
-                if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                             !remote)) {
-                        RETURN(0);
-                } else {
-                        CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
-                               pud->pud_uid);
-                        RETURN(-EACCES);
-               }
-        }
+       if (is_identity_get_disabled(mdt->mdt_identity_cache))
+               RETURN(0);
+
+       identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
+       if (IS_ERR(identity)) {
+               if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
+                       RETURN(0);
+               } else {
+                       CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
+                              pud->pud_uid);
+                       RETURN(-EACCES);
+               }
+       }
 
-        perm = mdt_identity_get_perm(identity, remote, peernid);
+       perm = mdt_identity_get_perm(identity, peernid);
         /* find out the setuid/setgid attempt */
         setuid = (pud->pud_uid != pud->pud_fsuid);
         setgid = (pud->pud_gid != pud->pud_fsgid ||
@@ -405,143 +462,261 @@ out:
         return rc;
 }
 
-static int old_init_ucred(struct mdt_thread_info *info,
-                          struct mdt_body *body)
+static int old_init_ucred_common(struct mdt_thread_info *info,
+                                struct lu_nodemap *nodemap,
+                                bool drop_fs_cap)
 {
-        struct md_ucred *uc = mdt_ucred(info);
-        struct mdt_device  *mdt = info->mti_mdt;
-        struct md_identity *identity = NULL;
+       struct lu_ucred         *uc = mdt_ucred(info);
+       struct mdt_device       *mdt = info->mti_mdt;
+       struct md_identity      *identity = NULL;
+
+       if (nodemap && uc->uc_o_uid == nodemap->nm_squash_uid) {
+               /* deny access before we get identity ref */
+               if (nodemap->nmf_deny_unknown)
+                       RETURN(-EACCES);
+
+               uc->uc_fsuid = nodemap->nm_squash_uid;
+               uc->uc_fsgid = nodemap->nm_squash_gid;
+               uc->uc_cap = 0;
+               uc->uc_suppgids[0] = -1;
+               uc->uc_suppgids[1] = -1;
+       }
 
-        ENTRY;
+       if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
+               identity = mdt_identity_get(mdt->mdt_identity_cache,
+                                           uc->uc_fsuid);
+               if (IS_ERR(identity)) {
+                       if (unlikely(PTR_ERR(identity) == -EREMCHG ||
+                                    uc->uc_cap & CFS_CAP_FS_MASK)) {
+                               identity = NULL;
+                       } else {
+                               CDEBUG(D_SEC, "Deny access without identity: "
+                                      "uid %u\n", uc->uc_fsuid);
+                               RETURN(-EACCES);
+                       }
+               }
+       }
+       uc->uc_identity = identity;
 
-        uc->mu_valid = UCRED_INVALID;
-        uc->mu_o_uid = uc->mu_uid = body->uid;
-        uc->mu_o_gid = uc->mu_gid = body->gid;
-        uc->mu_o_fsuid = uc->mu_fsuid = body->fsuid;
-        uc->mu_o_fsgid = uc->mu_fsgid = body->fsgid;
-        uc->mu_suppgids[0] = body->suppgid;
-        uc->mu_suppgids[1] = -1;
-        uc->mu_ginfo = NULL;
-        if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                identity = mdt_identity_get(mdt->mdt_identity_cache,
-                                            uc->mu_fsuid);
-                if (IS_ERR(identity)) {
-                        if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
-                                identity = NULL;
-                        } else {
-                                CDEBUG(D_SEC, "Deny access without identity: "
-                                       "uid %u\n", uc->mu_fsuid);
-                                RETURN(-EACCES);
-                        }
-                }
-        }
-        uc->mu_identity = identity;
+       /* process root_squash here. */
+       mdt_root_squash(info, mdt_info_req(info)->rq_peer.nid);
 
-        /* process root_squash here. */
-        mdt_root_squash(info, mdt_info_req(info)->rq_peer.nid);
+       /* remove fs privilege for non-root user. */
+       if (uc->uc_fsuid && drop_fs_cap)
+               uc->uc_cap &= ~CFS_CAP_FS_MASK;
+       uc->uc_valid = UCRED_OLD;
+       ucred_set_jobid(info, uc);
+       ucred_set_nid(info, uc);
+       ucred_set_audit_enabled(info, uc);
 
-        /* remove fs privilege for non-root user. */
-        if (uc->mu_fsuid)
-                uc->mu_cap = body->capability & ~CFS_CAP_FS_MASK;
-        else
-                uc->mu_cap = body->capability;
-        uc->mu_valid = UCRED_OLD;
+       EXIT;
 
-        RETURN(0);
+       return 0;
+}
+
+static int old_init_ucred(struct mdt_thread_info *info,
+                         struct mdt_body *body, bool drop_fs_cap)
+{
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct lu_nodemap *nodemap;
+       int rc;
+       ENTRY;
+
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
+
+       body->mbo_uid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_uid);
+       body->mbo_gid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_gid);
+       body->mbo_fsuid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_fsuid);
+       body->mbo_fsgid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                      NODEMAP_CLIENT_TO_FS, body->mbo_fsgid);
+
+       LASSERT(uc != NULL);
+       uc->uc_valid = UCRED_INVALID;
+       uc->uc_o_uid = uc->uc_uid = body->mbo_uid;
+       uc->uc_o_gid = uc->uc_gid = body->mbo_gid;
+       uc->uc_o_fsuid = uc->uc_fsuid = body->mbo_fsuid;
+       uc->uc_o_fsgid = uc->uc_fsgid = body->mbo_fsgid;
+       uc->uc_suppgids[0] = body->mbo_suppgid;
+       uc->uc_suppgids[1] = -1;
+       uc->uc_ginfo = NULL;
+       uc->uc_cap = body->mbo_capability;
+
+       rc = old_init_ucred_common(info, nodemap, drop_fs_cap);
+       nodemap_putref(nodemap);
+
+       RETURN(rc);
 }
 
 static int old_init_ucred_reint(struct mdt_thread_info *info)
 {
-        struct md_ucred *uc = mdt_ucred(info);
-        struct mdt_device  *mdt = info->mti_mdt;
-        struct md_identity *identity = NULL;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct lu_nodemap *nodemap;
+       int rc;
+       ENTRY;
 
-        ENTRY;
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
 
-        uc->mu_valid = UCRED_INVALID;
-        uc->mu_o_uid = uc->mu_o_fsuid = uc->mu_uid = uc->mu_fsuid;
-        uc->mu_o_gid = uc->mu_o_fsgid = uc->mu_gid = uc->mu_fsgid;
-        uc->mu_ginfo = NULL;
-        if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                identity = mdt_identity_get(mdt->mdt_identity_cache,
-                                            uc->mu_fsuid);
-                if (IS_ERR(identity)) {
-                        if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
-                                identity = NULL;
-                        } else {
-                                CDEBUG(D_SEC, "Deny access without identity: "
-                                       "uid %u\n", uc->mu_fsuid);
-                                RETURN(-EACCES);
-                        }
-                }
-        }
-        uc->mu_identity = identity;
+       LASSERT(uc != NULL);
 
-        /* process root_squash here. */
-        mdt_root_squash(info, mdt_info_req(info)->rq_peer.nid);
+       uc->uc_fsuid = nodemap_map_id(nodemap, NODEMAP_UID,
+                                     NODEMAP_CLIENT_TO_FS, uc->uc_fsuid);
+       uc->uc_fsgid = nodemap_map_id(nodemap, NODEMAP_GID,
+                                     NODEMAP_CLIENT_TO_FS, uc->uc_fsgid);
 
-        /* remove fs privilege for non-root user. */
-        if (uc->mu_fsuid)
-                uc->mu_cap &= ~CFS_CAP_FS_MASK;
-        uc->mu_valid = UCRED_OLD;
+       uc->uc_valid = UCRED_INVALID;
+       uc->uc_o_uid = uc->uc_o_fsuid = uc->uc_uid = uc->uc_fsuid;
+       uc->uc_o_gid = uc->uc_o_fsgid = uc->uc_gid = uc->uc_fsgid;
+       uc->uc_ginfo = NULL;
 
-        RETURN(0);
+       rc = old_init_ucred_common(info, nodemap, true); /* drop_fs_cap=true */
+       nodemap_putref(nodemap);
+
+       RETURN(rc);
 }
 
-int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body)
+static inline int __mdt_init_ucred(struct mdt_thread_info *info,
+                                  struct mdt_body *body,
+                                  bool drop_fs_cap)
 {
-        struct ptlrpc_request *req = mdt_info_req(info);
-        struct md_ucred       *uc  = mdt_ucred(info);
+       struct ptlrpc_request   *req = mdt_info_req(info);
+       struct lu_ucred         *uc  = mdt_ucred(info);
 
-        if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))
-                return 0;
+       LASSERT(uc != NULL);
+       if ((uc->uc_valid == UCRED_OLD) || (uc->uc_valid == UCRED_NEW))
+               return 0;
 
-        mdt_exit_ucred(info);
+       mdt_exit_ucred(info);
+
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               return old_init_ucred(info, body, drop_fs_cap);
+       else
+               return new_init_ucred(info, BODY_INIT, body, drop_fs_cap);
+}
+
+int mdt_init_ucred(struct mdt_thread_info *info, struct mdt_body *body)
+{
+       return __mdt_init_ucred(info, body, true);
+}
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                return old_init_ucred(info, body);
-        else
-                return new_init_ucred(info, BODY_INIT, body);
+/* LU-6528 when "no_subtree_check" is set for NFS export, nfsd_set_fh_dentry()
+ * doesn't set correct fsuid explicitely, but raise capability to allow
+ * exportfs_decode_fh() to reconnect disconnected dentry into dcache. So for
+ * lookup (i.e. intent_getattr), we should keep FS capability, otherwise it
+ * will fail permission check. */
+int mdt_init_ucred_intent_getattr(struct mdt_thread_info *info,
+                                 struct mdt_body *body)
+{
+       return __mdt_init_ucred(info, body, false);
 }
 
 int mdt_init_ucred_reint(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request *req = mdt_info_req(info);
-        struct md_ucred       *uc  = mdt_ucred(info);
+       struct ptlrpc_request *req = mdt_info_req(info);
+       struct lu_ucred       *uc  = mdt_ucred(info);
+       struct md_attr        *ma  = &info->mti_attr;
+
+       LASSERT(uc != NULL);
+       if ((uc->uc_valid == UCRED_OLD) || (uc->uc_valid == UCRED_NEW))
+               return 0;
 
-        if ((uc->mu_valid == UCRED_OLD) || (uc->mu_valid == UCRED_NEW))
-                return 0;
+       /* LU-5564: for normal close request, skip permission check */
+       if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE &&
+           !(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
+               uc->uc_cap |= CFS_CAP_FS_MASK;
 
-        mdt_exit_ucred(info);
+       mdt_exit_ucred(info);
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
-                return old_init_ucred_reint(info);
-        else
-                return new_init_ucred(info, REC_INIT, NULL);
+       if (!req->rq_auth_gss || req->rq_auth_usr_mdt || !req->rq_user_desc)
+               return old_init_ucred_reint(info);
+       else
+               return new_init_ucred(info, REC_INIT, NULL, true);
 }
 
 /* copied from lov/lov_ea.c, just for debugging, will be removed later */
-void mdt_dump_lmm(int level, const struct lov_mds_md *lmm)
+void mdt_dump_lmm(int level, const struct lov_mds_md *lmm, __u64 valid)
 {
-        const struct lov_ost_data_v1 *lod;
-        int                           i;
-        __u16                         count;
-
-        count = le16_to_cpu(((struct lov_user_md*)lmm)->lmm_stripe_count);
-
-        CDEBUG(level, "objid "LPX64", magic 0x%08X, pattern %#X\n",
-               le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic),
-               le32_to_cpu(lmm->lmm_pattern));
-        CDEBUG(level,"stripe_size=0x%x, stripe_count=0x%x\n",
-               le32_to_cpu(lmm->lmm_stripe_size), count);
-        if (count == LOV_ALL_STRIPES)
-                return;
-        LASSERT(count <= LOV_MAX_STRIPE_COUNT);
-        for (i = 0, lod = lmm->lmm_objects; i < count; i++, lod++)
-                CDEBUG(level, "stripe %u idx %u subobj "LPX64"/"LPX64"\n",
-                       i, le32_to_cpu(lod->l_ost_idx),
-                       le64_to_cpu(lod->l_object_seq),
-                       le64_to_cpu(lod->l_object_id));
+       const struct lov_ost_data_v1 *lod;
+       __u32 lmm_magic = le32_to_cpu(lmm->lmm_magic);
+       __u16 count;
+       int i;
+
+       if (likely(!cfs_cdebug_show(level, DEBUG_SUBSYSTEM)))
+               return;
+
+       CDEBUG(level, "objid "DOSTID", magic 0x%08X, pattern %#X\n",
+              POSTID(&lmm->lmm_oi), lmm_magic,
+              le32_to_cpu(lmm->lmm_pattern));
+
+       /* No support for compount layouts yet */
+       if (lmm_magic != LOV_MAGIC_V1 && lmm_magic != LOV_MAGIC_V3)
+               return;
+
+       count = le16_to_cpu(((struct lov_user_md *)lmm)->lmm_stripe_count);
+       CDEBUG(level, "stripe_size=0x%x, stripe_count=0x%x\n",
+              le32_to_cpu(lmm->lmm_stripe_size), count);
+
+       /* If it's a directory or a released file, then there are
+        * no actual objects to print, so bail out. */
+       if (valid & OBD_MD_FLDIREA ||
+           le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
+               return;
+
+       LASSERT(count <= LOV_MAX_STRIPE_COUNT);
+       for (i = 0, lod = lmm->lmm_objects; i < count; i++, lod++) {
+               struct ost_id oi;
+
+               ostid_le_to_cpu(&lod->l_ost_oi, &oi);
+               CDEBUG(level, "stripe %u idx %u subobj "DOSTID"\n",
+                      i, le32_to_cpu(lod->l_ost_idx), POSTID(&oi));
+       }
+}
+
+void mdt_dump_lmv(unsigned int level, const union lmv_mds_md *lmv)
+{
+       const struct lmv_mds_md_v1 *lmm1;
+       const struct lmv_foreign_md *lfm;
+       int                        i;
+
+       if (likely(!cfs_cdebug_show(level, DEBUG_SUBSYSTEM)))
+               return;
+
+       /* foreign LMV case */
+       lfm = &lmv->lmv_foreign_md;
+       if (le32_to_cpu(lfm->lfm_magic) == LMV_MAGIC_FOREIGN) {
+               CDEBUG_LIMIT(level,
+                            "foreign magic 0x%08X, length %u, type %u, flags %u, value '%.*s'\n",
+                            le32_to_cpu(lfm->lfm_magic),
+                            le32_to_cpu(lfm->lfm_length),
+                            le32_to_cpu(lfm->lfm_type),
+                            le32_to_cpu(lfm->lfm_flags),
+                            le32_to_cpu(lfm->lfm_length), lfm->lfm_value);
+               return;
+       }
+
+       lmm1 = &lmv->lmv_md_v1;
+       CDEBUG(level,
+              "magic 0x%08X, master %#X stripe_count %#x hash_type %#x\n",
+              le32_to_cpu(lmm1->lmv_magic),
+              le32_to_cpu(lmm1->lmv_master_mdt_index),
+              le32_to_cpu(lmm1->lmv_stripe_count),
+              le32_to_cpu(lmm1->lmv_hash_type));
+
+       if (le32_to_cpu(lmm1->lmv_magic) == LMV_MAGIC_STRIPE)
+               return;
+
+       for (i = 0; i < le32_to_cpu(lmm1->lmv_stripe_count); i++) {
+               struct lu_fid fid;
+
+               fid_le_to_cpu(&fid, &lmm1->lmv_stripe_fids[i]);
+               CDEBUG(level, "idx %u subobj "DFID"\n", i, PFID(&fid));
+       }
 }
 
 /* Shrink and/or grow reply buffers */
@@ -557,24 +732,22 @@ int mdt_fix_reply(struct mdt_thread_info *info)
         body = req_capsule_server_get(pill, &RMF_MDT_BODY);
         LASSERT(body != NULL);
 
-        if (body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE | OBD_MD_LINKNAME))
-                md_size = body->eadatasize;
-        else
-                md_size = 0;
+       if (body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE |
+                              OBD_MD_LINKNAME))
+               md_size = body->mbo_eadatasize;
+       else
+               md_size = 0;
 
-        acl_size = body->aclsize;
+       acl_size = body->mbo_aclsize;
 
-        /* this replay - not send info to client */
-        if (info->mti_spec.no_create == 1) {
-                md_size = 0;
-                acl_size = 0;
-        }
+       /* this replay - not send info to client */
+       if (info->mti_spec.no_create) {
+               md_size = 0;
+               acl_size = 0;
+       }
 
-        CDEBUG(D_INFO, "Shrink to md_size = %d cookie/acl_size = %d"
-                        " MDSCAPA = %llx, OSSCAPA = %llx\n",
-                        md_size, acl_size,
-                        (unsigned long long)(body->valid & OBD_MD_FLMDSCAPA),
-                        (unsigned long long)(body->valid & OBD_MD_FLOSSCAPA));
+       CDEBUG(D_INFO, "Shrink to md_size = %d cookie/acl_size = %d\n",
+              md_size, acl_size);
 /*
             &RMF_MDT_BODY,
             &RMF_MDT_MD,
@@ -587,41 +760,48 @@ int mdt_fix_reply(struct mdt_thread_info *info)
         /* MDT_MD buffer may be bigger than packed value, let's shrink all
          * buffers before growing it */
        if (info->mti_big_lmm_used) {
-                LASSERT(req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER));
-                md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
-                                                 RCL_SERVER);
-                LASSERT(md_packed > 0);
-                /* buffer must be allocated separately */
-                LASSERT(info->mti_attr.ma_lmm !=
-                        req_capsule_server_get(pill, &RMF_MDT_MD));
-                req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER);
-                /* free big lmm if md_size is not needed */
-                if (md_size == 0)
+               /* big_lmm buffer may be used even without packing the result
+                * into reply, just for internal server needs */
+               if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER))
+                       md_packed = req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                        RCL_SERVER);
+
+               /* free big lmm if md_size is not needed */
+               if (md_size == 0 || md_packed == 0) {
                        info->mti_big_lmm_used = 0;
-        } else if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) {
-                req_capsule_shrink(pill, &RMF_MDT_MD, md_size, RCL_SERVER);
-        }
-
-        if (req_capsule_has_field(pill, &RMF_ACL, RCL_SERVER))
-                req_capsule_shrink(pill, &RMF_ACL, acl_size, RCL_SERVER);
-        else if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER))
-                req_capsule_shrink(pill, &RMF_LOGCOOKIES,
-                                   acl_size, RCL_SERVER);
+               } else {
+                       /* buffer must be allocated separately */
+                       LASSERT(info->mti_attr.ma_lmm !=
+                               req_capsule_server_get(pill, &RMF_MDT_MD));
+                       req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER);
+               }
+       } else if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) {
+               req_capsule_shrink(pill, &RMF_MDT_MD, md_size, RCL_SERVER);
+       }
 
-        if (req_capsule_has_field(pill, &RMF_CAPA1, RCL_SERVER) &&
-            !(body->valid & OBD_MD_FLMDSCAPA))
-                req_capsule_shrink(pill, &RMF_CAPA1, 0, RCL_SERVER);
+       if (info->mti_big_acl_used) {
+               if (acl_size == 0)
+                       info->mti_big_acl_used = 0;
+               else
+                       req_capsule_shrink(pill, &RMF_ACL, 0, RCL_SERVER);
+       } else if (req_capsule_has_field(pill, &RMF_ACL, RCL_SERVER)) {
+               req_capsule_shrink(pill, &RMF_ACL, acl_size, RCL_SERVER);
+       } else if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) {
+               req_capsule_shrink(pill, &RMF_LOGCOOKIES, acl_size, RCL_SERVER);
+       }
 
-        if (req_capsule_has_field(pill, &RMF_CAPA2, RCL_SERVER) &&
-            !(body->valid & OBD_MD_FLOSSCAPA))
-                req_capsule_shrink(pill, &RMF_CAPA2, 0, RCL_SERVER);
+       /* Shrink optional SECCTX buffer if it is not used */
+       if (req_capsule_has_field(pill, &RMF_FILE_SECCTX, RCL_SERVER) &&
+           req_capsule_get_size(pill, &RMF_FILE_SECCTX, RCL_SERVER) != 0 &&
+           !(body->mbo_valid & OBD_MD_SECCTX))
+               req_capsule_shrink(pill, &RMF_FILE_SECCTX, 0, RCL_SERVER);
 
-        /*
-         * Some more field should be shrinked if needed.
-         * This should be done by those who added fields to reply message.
-         */
+       /*
+        * Some more field should be shrinked if needed.
+        * This should be done by those who added fields to reply message.
+        */
 
-        /* Grow MD buffer if needed finally */
+       /* Grow MD buffer if needed finally */
        if (info->mti_big_lmm_used) {
                 void *lmm;
 
@@ -633,221 +813,345 @@ int mdt_fix_reply(struct mdt_thread_info *info)
                         /* we can't answer with proper LOV EA, drop flags,
                          * the rc is also returned so this request is
                          * considered as failed */
-                        body->valid &= ~(OBD_MD_FLDIREA | OBD_MD_FLEASIZE);
+                       body->mbo_valid &= ~(OBD_MD_FLDIREA | OBD_MD_FLEASIZE);
                         /* don't return transno along with error */
                         lustre_msg_set_transno(pill->rc_req->rq_repmsg, 0);
                 } else {
-                        /* now we need to pack right LOV EA */
-                        lmm = req_capsule_server_get(pill, &RMF_MDT_MD);
-                        LASSERT(req_capsule_get_size(pill, &RMF_MDT_MD,
-                                                     RCL_SERVER) ==
-                                info->mti_attr.ma_lmm_size);
-                        memcpy(lmm, info->mti_attr.ma_lmm,
-                               info->mti_attr.ma_lmm_size);
-                }
-                /* update mdt_max_mdsize so clients will be aware about that */
-                if (info->mti_mdt->mdt_max_mdsize < info->mti_attr.ma_lmm_size)
-                        info->mti_mdt->mdt_max_mdsize =
-                                                    info->mti_attr.ma_lmm_size;
+                       /* now we need to pack right LOV/LMV EA */
+                       lmm = req_capsule_server_get(pill, &RMF_MDT_MD);
+                       if (info->mti_attr.ma_valid & MA_LOV) {
+                               LASSERT(req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                            RCL_SERVER) ==
+                                               info->mti_attr.ma_lmm_size);
+                               memcpy(lmm, info->mti_attr.ma_lmm,
+                                      info->mti_attr.ma_lmm_size);
+                       } else if (info->mti_attr.ma_valid & MA_LMV) {
+                               LASSERT(req_capsule_get_size(pill, &RMF_MDT_MD,
+                                                            RCL_SERVER) ==
+                                               info->mti_attr.ma_lmv_size);
+                               memcpy(lmm, info->mti_attr.ma_lmv,
+                                      info->mti_attr.ma_lmv_size);
+                       }
+               }
+
+               /* update mdt_max_mdsize so clients will be aware about that */
+               if (info->mti_mdt->mdt_max_mdsize < info->mti_attr.ma_lmm_size)
+                       info->mti_mdt->mdt_max_mdsize =
+                                               info->mti_attr.ma_lmm_size;
                info->mti_big_lmm_used = 0;
-        }
-        RETURN(rc);
+       }
+
+       if (info->mti_big_acl_used) {
+               CDEBUG(D_INFO, "Enlarge reply ACL buffer to %d bytes\n",
+                      acl_size);
+
+               rc = req_capsule_server_grow(pill, &RMF_ACL, acl_size);
+               if (rc) {
+                       body->mbo_valid &= ~OBD_MD_FLACL;
+               } else {
+                       void *acl = req_capsule_server_get(pill, &RMF_ACL);
+
+                       memcpy(acl, info->mti_big_acl, acl_size);
+               }
+
+               info->mti_big_acl_used = 0;
+       }
+
+       RETURN(rc);
 }
 
 
 /* if object is dying, pack the lov/llog data,
- * parameter info->mti_attr should be valid at this point! */
+ * parameter info->mti_attr should be valid at this point!
+ * Also implements RAoLU policy */
 int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
-                           const struct md_attr *ma)
+                          struct md_attr *ma)
 {
-        struct mdt_body       *repbody;
+       struct mdt_body *repbody = NULL;
         const struct lu_attr *la = &ma->ma_attr;
-        int rc;
+       struct coordinator *cdt = &info->mti_mdt->mdt_coordinator;
+       int rc;
+       __u64 need = 0;
+       struct hsm_action_item hai = {
+               .hai_len = sizeof(hai),
+               .hai_action = HSMA_REMOVE,
+               .hai_extent.length = -1,
+               .hai_cookie = 0,
+               .hai_gid = 0,
+       };
+       int archive_id;
+
         ENTRY;
 
-        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
-        LASSERT(repbody != NULL);
+       if (mdt_info_req(info) != NULL) {
+               repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+               LASSERT(repbody != NULL);
+       } else {
+               CDEBUG(D_INFO, "not running in a request/reply context\n");
+       }
 
-        if (ma->ma_valid & MA_INODE)
+       if ((ma->ma_valid & MA_INODE) && repbody != NULL)
                 mdt_pack_attr2body(info, repbody, la, mdt_object_fid(mo));
 
-        if (ma->ma_valid & MA_LOV) {
+       if (ma->ma_valid & MA_LOV) {
                CERROR("No need in LOV EA upon unlink\n");
                dump_stack();
-        }
-       repbody->eadatasize = 0;
+       }
+       if (repbody != NULL)
+               repbody->mbo_eadatasize = 0;
+
+       /* Only check unlinked and archived if RAoLU and upon last close */
+       if (!cdt->cdt_remove_archive_on_last_unlink ||
+           atomic_read(&mo->mot_open_count) != 0)
+               RETURN(0);
+
+       /* mdt_attr_get_complex will clear ma_valid, so check here first */
+       if ((ma->ma_valid & MA_INODE) && (ma->ma_attr.la_nlink != 0))
+               RETURN(0);
+
+       if ((ma->ma_valid & MA_HSM) && (!(ma->ma_hsm.mh_flags & HS_EXISTS)))
+               RETURN(0);
+
+       need |= (MA_INODE | MA_HSM) & ~ma->ma_valid;
+       if (need != 0) {
+               /* ma->ma_valid is missing either MA_INODE, MA_HSM, or both,
+                * try setting them */
+               ma->ma_need |= need;
+               rc = mdt_attr_get_complex(info, mo, ma);
+               if (rc) {
+                       CERROR("%s: unable to fetch missing attributes of"
+                              DFID": rc=%d\n", mdt_obd_name(info->mti_mdt),
+                              PFID(mdt_object_fid(mo)), rc);
+                       RETURN(0);
+               }
 
-        if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) {
-                repbody->aclsize = ma->ma_cookie_size;
-                repbody->valid |= OBD_MD_FLCOOKIE;
-        }
+               if (need & MA_INODE) {
+                       if (ma->ma_valid & MA_INODE) {
+                               if (ma->ma_attr.la_nlink != 0)
+                                       RETURN(0);
+                       } else {
+                               RETURN(0);
+                       }
+               }
+
+               if (need & MA_HSM) {
+                       if (ma->ma_valid & MA_HSM) {
+                               if (!(ma->ma_hsm.mh_flags & HS_EXISTS))
+                                       RETURN(0);
+                       } else {
+                               RETURN(0);
+                       }
+               }
+       }
 
-        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
-            info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
-            repbody->valid & OBD_MD_FLEASIZE) {
-                struct lustre_capa *capa;
+       /* RAoLU policy is active, last close on file has occured,
+        * file is unlinked, file is archived, so create remove request
+        * for copytool!
+        * If CDT is not running, requests will be logged for later. */
+       if (ma->ma_hsm.mh_arch_id != 0)
+               archive_id = ma->ma_hsm.mh_arch_id;
+       else
+               archive_id = cdt->cdt_default_archive_id;
 
-                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
-                LASSERT(capa);
-                capa->lc_opc = CAPA_OPC_OSS_DESTROY;
-                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
-                if (rc)
-                        RETURN(rc);
+       hai.hai_fid = *mdt_object_fid(mo);
 
-                repbody->valid |= OBD_MD_FLOSSCAPA;
-        }
+       rc = mdt_agent_record_add(info->mti_env, info->mti_mdt, archive_id, 0,
+                                 &hai);
+       if (rc)
+               CERROR("%s: unable to add HSM remove request for "DFID
+                      ": rc=%d\n", mdt_obd_name(info->mti_mdt),
+                      PFID(mdt_object_fid(mo)), rc);
 
         RETURN(0);
 }
 
-static inline unsigned int attr_unpack(__u64 sa_valid) {
-        unsigned int ia_valid = 0;
-
-        if (sa_valid & MDS_ATTR_MODE)
-                ia_valid |= ATTR_MODE;
-        if (sa_valid & MDS_ATTR_UID)
-                ia_valid |= ATTR_UID;
-        if (sa_valid & MDS_ATTR_GID)
-                ia_valid |= ATTR_GID;
-        if (sa_valid & MDS_ATTR_SIZE)
-                ia_valid |= ATTR_SIZE;
-        if (sa_valid & MDS_ATTR_ATIME)
-                ia_valid |= ATTR_ATIME;
-        if (sa_valid & MDS_ATTR_MTIME)
-                ia_valid |= ATTR_MTIME;
-        if (sa_valid & MDS_ATTR_CTIME)
-                ia_valid |= ATTR_CTIME;
-        if (sa_valid & MDS_ATTR_ATIME_SET)
-                ia_valid |= ATTR_ATIME_SET;
-        if (sa_valid & MDS_ATTR_MTIME_SET)
-                ia_valid |= ATTR_MTIME_SET;
-        if (sa_valid & MDS_ATTR_FORCE)
-                ia_valid |= ATTR_FORCE;
-        if (sa_valid & MDS_ATTR_ATTR_FLAG)
-                ia_valid |= ATTR_ATTR_FLAG;
-        if (sa_valid & MDS_ATTR_KILL_SUID)
-                ia_valid |=  ATTR_KILL_SUID;
-        if (sa_valid & MDS_ATTR_KILL_SGID)
-                ia_valid |= ATTR_KILL_SGID;
-        if (sa_valid & MDS_ATTR_CTIME_SET)
-                ia_valid |= ATTR_CTIME_SET;
-        if (sa_valid & MDS_ATTR_FROM_OPEN)
-                ia_valid |= ATTR_FROM_OPEN;
-        if (sa_valid & MDS_ATTR_BLOCKS)
-                ia_valid |= ATTR_BLOCKS;
-        if (sa_valid & MDS_OPEN_OWNEROVERRIDE)
-                ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-        return ia_valid;
-}
-
 static __u64 mdt_attr_valid_xlate(__u64 in, struct mdt_reint_record *rr,
                                   struct md_attr *ma)
 {
-        __u64 out;
-
-        out = 0;
-        if (in & ATTR_MODE)
-                out |= LA_MODE;
-        if (in & ATTR_UID)
-                out |= LA_UID;
-        if (in & ATTR_GID)
-                out |= LA_GID;
-        if (in & ATTR_SIZE)
-                out |= LA_SIZE;
-        if (in & ATTR_BLOCKS)
-                out |= LA_BLOCKS;
-
-        if (in & ATTR_FROM_OPEN)
-                rr->rr_flags |= MRF_OPEN_TRUNC;
-
-        if (in & ATTR_ATIME_SET)
-                out |= LA_ATIME;
-
-        if (in & ATTR_CTIME_SET)
-                out |= LA_CTIME;
-
-        if (in & ATTR_MTIME_SET)
-                out |= LA_MTIME;
-
-        if (in & ATTR_ATTR_FLAG)
-                out |= LA_FLAGS;
-
-        if (in & ATTR_KILL_SUID)
-                out |= LA_KILL_SUID;
-
-        if (in & ATTR_KILL_SGID)
-                out |= LA_KILL_SGID;
-
-        if (in & MDS_OPEN_OWNEROVERRIDE)
-                ma->ma_attr_flags |= MDS_OPEN_OWNEROVERRIDE;
-
-        if (in & ATTR_FORCE)
-                ma->ma_attr_flags |= MDS_PERM_BYPASS;
-
-        /*XXX need ATTR_RAW?*/
-        in &= ~(ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_BLOCKS|
-                ATTR_ATIME|ATTR_MTIME|ATTR_CTIME|ATTR_FROM_OPEN|
-                ATTR_ATIME_SET|ATTR_CTIME_SET|ATTR_MTIME_SET|
-                ATTR_ATTR_FLAG|ATTR_RAW|MDS_OPEN_OWNEROVERRIDE|
-                ATTR_FORCE|ATTR_KILL_SUID|ATTR_KILL_SGID);
-        if (in != 0)
-                CERROR("Unknown attr bits: "LPX64"\n", in);
-        return out;
+       __u64 out;
+
+       out = 0;
+       if (in & MDS_ATTR_MODE)
+               out |= LA_MODE;
+       if (in & MDS_ATTR_UID)
+               out |= LA_UID;
+       if (in & MDS_ATTR_GID)
+               out |= LA_GID;
+       if (in & MDS_ATTR_SIZE)
+               out |= LA_SIZE;
+       if (in & MDS_ATTR_BLOCKS)
+               out |= LA_BLOCKS;
+       if (in & MDS_ATTR_ATIME_SET)
+               out |= LA_ATIME;
+       if (in & MDS_ATTR_CTIME_SET)
+               out |= LA_CTIME;
+       if (in & MDS_ATTR_MTIME_SET)
+               out |= LA_MTIME;
+       if (in & MDS_ATTR_ATTR_FLAG)
+               out |= LA_FLAGS;
+       if (in & MDS_ATTR_KILL_SUID)
+               out |= LA_KILL_SUID;
+       if (in & MDS_ATTR_KILL_SGID)
+               out |= LA_KILL_SGID;
+       if (in & MDS_ATTR_PROJID)
+               out |= LA_PROJID;
+       if (in & MDS_ATTR_LSIZE)
+               out |= LA_LSIZE;
+       if (in & MDS_ATTR_LBLOCKS)
+               out |= LA_LBLOCKS;
+
+       if (in & MDS_ATTR_FROM_OPEN)
+               rr->rr_flags |= MRF_OPEN_TRUNC;
+       if (in & MDS_ATTR_OVERRIDE)
+               ma->ma_attr_flags |= MDS_OWNEROVERRIDE;
+       if (in & MDS_ATTR_FORCE)
+               ma->ma_attr_flags |= MDS_PERM_BYPASS;
+
+       in &= ~(MDS_ATTR_MODE | MDS_ATTR_UID | MDS_ATTR_GID | MDS_ATTR_PROJID |
+               MDS_ATTR_ATIME | MDS_ATTR_MTIME | MDS_ATTR_CTIME |
+               MDS_ATTR_ATIME_SET | MDS_ATTR_CTIME_SET | MDS_ATTR_MTIME_SET |
+               MDS_ATTR_SIZE | MDS_ATTR_BLOCKS | MDS_ATTR_ATTR_FLAG |
+               MDS_ATTR_FORCE | MDS_ATTR_KILL_SUID | MDS_ATTR_KILL_SGID |
+               MDS_ATTR_FROM_OPEN | MDS_ATTR_LSIZE | MDS_ATTR_LBLOCKS |
+               MDS_ATTR_OVERRIDE);
+       if (in != 0)
+               CDEBUG(D_INFO, "Unknown attr bits: %#llx\n", in);
+       return out;
 }
+
 /* unpacking */
 
+int mdt_name_unpack(struct req_capsule *pill,
+                   const struct req_msg_field *field,
+                   struct lu_name *ln,
+                   enum mdt_name_flags flags)
+{
+       ln->ln_name = req_capsule_client_get(pill, field);
+       ln->ln_namelen = req_capsule_get_size(pill, field, RCL_CLIENT) - 1;
+
+       if (!lu_name_is_valid(ln)) {
+               ln->ln_name = NULL;
+               ln->ln_namelen = 0;
+
+               return -EPROTO;
+       }
+
+       if ((flags & MNF_FIX_ANON) &&
+           ln->ln_namelen == 1 && ln->ln_name[0] == '/') {
+               /* Newer (3.x) kernels use a name of "/" for the
+                * "anonymous" disconnected dentries from NFS
+                * filehandle conversion. See d_obtain_alias(). */
+               ln->ln_name = NULL;
+               ln->ln_namelen = 0;
+       }
+
+       return 0;
+}
+
+static int mdt_file_secctx_unpack(struct req_capsule *pill,
+                                 const char **secctx_name,
+                                 void **secctx, size_t *secctx_size)
+{
+       const char *name;
+       size_t name_size;
+
+       *secctx_name = NULL;
+       *secctx = NULL;
+       *secctx_size = 0;
+
+       if (!req_capsule_has_field(pill, &RMF_FILE_SECCTX_NAME, RCL_CLIENT) ||
+           !req_capsule_field_present(pill, &RMF_FILE_SECCTX_NAME, RCL_CLIENT))
+               return 0;
+
+       name_size = req_capsule_get_size(pill, &RMF_FILE_SECCTX_NAME,
+                                        RCL_CLIENT);
+       if (name_size == 0)
+               return 0;
+
+       name = req_capsule_client_get(pill, &RMF_FILE_SECCTX_NAME);
+       if (strnlen(name, name_size) != name_size - 1)
+               return -EPROTO;
+
+       if (!req_capsule_has_field(pill, &RMF_FILE_SECCTX, RCL_CLIENT) ||
+           !req_capsule_field_present(pill, &RMF_FILE_SECCTX, RCL_CLIENT))
+               return -EPROTO;
+
+       *secctx_name = name;
+       *secctx = req_capsule_client_get(pill, &RMF_FILE_SECCTX);
+       *secctx_size = req_capsule_get_size(pill, &RMF_FILE_SECCTX, RCL_CLIENT);
+
+       return 0;
+}
+
 static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc  = mdt_ucred(info);
-        struct md_attr          *ma = &info->mti_attr;
-        struct lu_attr          *la = &ma->ma_attr;
-        struct req_capsule      *pill = info->mti_pill;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct mdt_rec_setattr  *rec;
-        ENTRY;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct md_attr *ma = &info->mti_attr;
+       struct lu_attr *la = &ma->ma_attr;
+       struct req_capsule *pill = info->mti_pill;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct mdt_rec_setattr *rec;
+       struct lu_nodemap *nodemap;
 
-        CLASSERT(sizeof(struct mdt_rec_setattr)== sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       ENTRY;
 
-        uc->mu_fsuid = rec->sa_fsuid;
-        uc->mu_fsgid = rec->sa_fsgid;
-        uc->mu_cap   = rec->sa_cap;
-        uc->mu_suppgids[0] = rec->sa_suppgid;
-        uc->mu_suppgids[1] = -1;
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        rr->rr_fid1 = &rec->sa_fid;
-        la->la_valid = mdt_attr_valid_xlate(attr_unpack(rec->sa_valid), rr, ma);
-        la->la_mode  = rec->sa_mode;
-        la->la_flags = rec->sa_attr_flags;
-        la->la_uid   = rec->sa_uid;
-        la->la_gid   = rec->sa_gid;
-        la->la_size  = rec->sa_size;
-        la->la_blocks = rec->sa_blocks;
-        la->la_ctime = rec->sa_ctime;
-        la->la_atime = rec->sa_atime;
-        la->la_mtime = rec->sa_mtime;
-        ma->ma_valid = MA_INODE;
-
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->sa_fsuid;
+       uc->uc_fsgid = rec->sa_fsgid;
+       uc->uc_cap   = rec->sa_cap;
+       uc->uc_suppgids[0] = rec->sa_suppgid;
+       uc->uc_suppgids[1] = -1;
 
-        RETURN(0);
+        rr->rr_fid1 = &rec->sa_fid;
+       la->la_valid = mdt_attr_valid_xlate(rec->sa_valid, rr, ma);
+       la->la_mode  = rec->sa_mode;
+       la->la_flags = rec->sa_attr_flags;
+
+       nodemap = nodemap_get_from_exp(info->mti_exp);
+       if (IS_ERR(nodemap))
+               RETURN(PTR_ERR(nodemap));
+
+       la->la_uid   = nodemap_map_id(nodemap, NODEMAP_UID,
+                                     NODEMAP_CLIENT_TO_FS, rec->sa_uid);
+       la->la_gid   = nodemap_map_id(nodemap, NODEMAP_GID,
+                                     NODEMAP_CLIENT_TO_FS, rec->sa_gid);
+       la->la_projid = rec->sa_projid;
+       nodemap_putref(nodemap);
+
+       la->la_size  = rec->sa_size;
+       la->la_blocks = rec->sa_blocks;
+       la->la_ctime = rec->sa_ctime;
+       la->la_atime = rec->sa_atime;
+       la->la_mtime = rec->sa_mtime;
+       ma->ma_valid = MA_INODE;
+
+       ma->ma_attr_flags |= rec->sa_bias & (MDS_CLOSE_INTENT |
+                               MDS_DATA_MODIFIED | MDS_TRUNC_KEEP_LEASE);
+       RETURN(0);
 }
 
-static int mdt_ioepoch_unpack(struct mdt_thread_info *info)
+static int mdt_close_handle_unpack(struct mdt_thread_info *info)
 {
-        struct req_capsule *pill = info->mti_pill;
-        ENTRY;
+       struct req_capsule *pill = info->mti_pill;
+       struct mdt_ioepoch *ioepoch;
+       ENTRY;
+
+       if (req_capsule_get_size(pill, &RMF_MDT_EPOCH, RCL_CLIENT))
+               ioepoch = req_capsule_client_get(pill, &RMF_MDT_EPOCH);
+       else
+               ioepoch = NULL;
 
-        if (req_capsule_get_size(pill, &RMF_MDT_EPOCH, RCL_CLIENT))
-                info->mti_ioepoch =
-                        req_capsule_client_get(pill, &RMF_MDT_EPOCH);
-        else
-                info->mti_ioepoch = NULL;
-        RETURN(info->mti_ioepoch == NULL ? -EFAULT : 0);
+       if (ioepoch == NULL)
+               RETURN(-EPROTO);
+
+       info->mti_open_handle = ioepoch->mio_open_handle;
+
+       RETURN(0);
 }
 
 static inline int mdt_dlmreq_unpack(struct mdt_thread_info *info) {
@@ -874,58 +1178,95 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info)
         if (rc)
                 RETURN(rc);
 
-        /* Epoch may be absent */
-        mdt_ioepoch_unpack(info);
-
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
                 rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA);
                 rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
                                                         RCL_CLIENT);
-                ma->ma_lmm_size = rr->rr_eadatalen;
-                if (ma->ma_lmm_size > 0) {
-                        ma->ma_lmm = (void *)rr->rr_eadata;
-                        ma->ma_valid |= MA_LOV;
-                }
-        }
 
-        rc = mdt_dlmreq_unpack(info);
-        RETURN(rc);
+               if (rr->rr_eadatalen > 0) {
+                       const struct lmv_user_md        *lum;
+
+                       lum = rr->rr_eadata;
+                       /* Sigh ma_valid(from req) does not indicate whether
+                        * it will set LOV/LMV EA, so we have to check magic */
+                       if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
+                               ma->ma_valid |= MA_LMV;
+                               ma->ma_lmv = (void *)rr->rr_eadata;
+                               ma->ma_lmv_size = rr->rr_eadatalen;
+                       } else {
+                               ma->ma_valid |= MA_LOV;
+                               ma->ma_lmm = (void *)rr->rr_eadata;
+                               ma->ma_lmm_size = rr->rr_eadatalen;
+                       }
+               }
+       }
+
+       rc = mdt_dlmreq_unpack(info);
+       RETURN(rc);
+}
+
+static int mdt_close_intent_unpack(struct mdt_thread_info *info)
+{
+       struct md_attr          *ma = &info->mti_attr;
+       struct req_capsule      *pill = info->mti_pill;
+       ENTRY;
+
+       if (!(ma->ma_attr_flags & MDS_CLOSE_INTENT))
+               RETURN(0);
+
+       req_capsule_extend(pill, &RQF_MDS_CLOSE_INTENT);
+
+       if (!(req_capsule_has_field(pill, &RMF_CLOSE_DATA, RCL_CLIENT) &&
+           req_capsule_field_present(pill, &RMF_CLOSE_DATA, RCL_CLIENT)))
+               RETURN(-EFAULT);
+
+       RETURN(0);
 }
 
 int mdt_close_unpack(struct mdt_thread_info *info)
 {
-        int rc;
-        ENTRY;
+       int rc;
+       ENTRY;
 
-        rc = mdt_ioepoch_unpack(info);
+       rc = mdt_close_handle_unpack(info);
         if (rc)
                 RETURN(rc);
 
-        RETURN(mdt_setattr_unpack_rec(info));
+       rc = mdt_setattr_unpack_rec(info);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdt_close_intent_unpack(info);
+       if (rc)
+               RETURN(rc);
+
+       RETURN(mdt_init_ucred_reint(info));
 }
 
 static int mdt_create_unpack(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc  = mdt_ucred(info);
-        struct mdt_rec_create   *rec;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct req_capsule      *pill = info->mti_pill;
-        struct md_op_spec       *sp = &info->mti_spec;
-        int rc;
-        ENTRY;
+       struct lu_ucred *uc  = mdt_ucred(info);
+       struct mdt_rec_create *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct req_capsule      *pill = info->mti_pill;
+       struct md_op_spec       *sp = &info->mti_spec;
+       int rc;
 
-        CLASSERT(sizeof(struct mdt_rec_create) == sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       ENTRY;
+
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        uc->mu_fsuid = rec->cr_fsuid;
-        uc->mu_fsgid = rec->cr_fsgid;
-        uc->mu_cap   = rec->cr_cap;
-        uc->mu_suppgids[0] = rec->cr_suppgid1;
-        uc->mu_suppgids[1] = -1;
-        uc->mu_umask = rec->cr_umask;
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->cr_fsuid;
+       uc->uc_fsgid = rec->cr_fsgid;
+       uc->uc_cap   = rec->cr_cap;
+       uc->uc_suppgids[0] = rec->cr_suppgid1;
+       uc->uc_suppgids[1] = -1;
+       uc->uc_umask = rec->cr_umask;
 
         rr->rr_fid1 = &rec->cr_fid1;
         rr->rr_fid2 = &rec->cr_fid2;
@@ -936,20 +1277,14 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
         attr->la_ctime = rec->cr_time;
         attr->la_mtime = rec->cr_time;
         attr->la_atime = rec->cr_time;
-        attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
-                         LA_CTIME | LA_MTIME | LA_ATIME;
+       attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | LA_TYPE |
+                        LA_CTIME | LA_MTIME | LA_ATIME;
         memset(&sp->u, 0, sizeof(sp->u));
         sp->sp_cr_flags = get_mrc_cr_flags(rec);
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA);
-
-       rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-       rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME,
-                                             RCL_CLIENT) - 1;
-       LASSERT(rr->rr_name && rr->rr_namelen > 0);
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
 
        if (S_ISLNK(attr->la_mode)) {
                 const char *tgt = NULL;
@@ -962,33 +1297,54 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
                 if (tgt == NULL)
                         RETURN(-EFAULT);
         } else {
-                req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_RMT_ACL);
-        }
+               req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_ACL);
+               if (S_ISDIR(attr->la_mode) &&
+                   req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) > 0) {
+                       sp->u.sp_ea.eadata =
+                               req_capsule_client_get(pill, &RMF_EADATA);
+                       sp->u.sp_ea.eadatalen =
+                               req_capsule_get_size(pill, &RMF_EADATA,
+                                                    RCL_CLIENT);
+                       sp->sp_cr_flags |= MDS_OPEN_HAS_EA;
+               }
+       }
 
-        rc = mdt_dlmreq_unpack(info);
-        RETURN(rc);
+       rc = mdt_file_secctx_unpack(pill, &sp->sp_cr_file_secctx_name,
+                                   &sp->sp_cr_file_secctx,
+                                   &sp->sp_cr_file_secctx_size);
+       if (rc < 0)
+               RETURN(rc);
+
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdt_dlmreq_unpack(info);
+       RETURN(rc);
 }
 
 static int mdt_link_unpack(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc  = mdt_ucred(info);
-        struct mdt_rec_link     *rec;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct req_capsule      *pill = info->mti_pill;
-        int rc;
-        ENTRY;
+       struct lu_ucred *uc  = mdt_ucred(info);
+       struct mdt_rec_link *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct req_capsule *pill = info->mti_pill;
+       int rc;
 
-        CLASSERT(sizeof(struct mdt_rec_link) == sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       ENTRY;
+
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        uc->mu_fsuid = rec->lk_fsuid;
-        uc->mu_fsgid = rec->lk_fsgid;
-        uc->mu_cap   = rec->lk_cap;
-        uc->mu_suppgids[0] = rec->lk_suppgid1;
-        uc->mu_suppgids[1] = rec->lk_suppgid2;
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->lk_fsuid;
+       uc->uc_fsgid = rec->lk_fsgid;
+       uc->uc_cap   = rec->lk_cap;
+       uc->uc_suppgids[0] = rec->lk_suppgid1;
+       uc->uc_suppgids[1] = rec->lk_suppgid2;
 
         attr->la_uid = rec->lk_fsuid;
         attr->la_gid = rec->lk_fsgid;
@@ -998,234 +1354,273 @@ static int mdt_link_unpack(struct mdt_thread_info *info)
         attr->la_mtime = rec->lk_time;
         attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
 
-        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-        if (rr->rr_name == NULL)
-                RETURN(-EFAULT);
-        rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1;
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
 
-       LASSERT(rr->rr_namelen > 0);
+       rc = mdt_dlmreq_unpack(info);
 
-        rc = mdt_dlmreq_unpack(info);
-        RETURN(rc);
+       RETURN(rc);
 }
 
 static int mdt_unlink_unpack(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc  = mdt_ucred(info);
-        struct mdt_rec_unlink   *rec;
-        struct md_attr          *ma = &info->mti_attr;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct req_capsule      *pill = info->mti_pill;
-        int rc;
-        ENTRY;
+       struct lu_ucred *uc  = mdt_ucred(info);
+       struct mdt_rec_unlink *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct req_capsule      *pill = info->mti_pill;
+       int rc;
 
-        CLASSERT(sizeof(struct mdt_rec_unlink) == sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       ENTRY;
 
-        uc->mu_fsuid = rec->ul_fsuid;
-        uc->mu_fsgid = rec->ul_fsgid;
-        uc->mu_cap   = rec->ul_cap;
-        uc->mu_suppgids[0] = rec->ul_suppgid1;
-        uc->mu_suppgids[1] = -1;
-
-        attr->la_uid = rec->ul_fsuid;
-        attr->la_gid = rec->ul_fsgid;
-        rr->rr_fid1 = &rec->ul_fid1;
-        rr->rr_fid2 = &rec->ul_fid2;
-        attr->la_ctime = rec->ul_time;
-        attr->la_mtime = rec->ul_time;
-        attr->la_mode  = rec->ul_mode;
-        attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
-
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-
-       rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-       rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1;
-       if (rr->rr_name == NULL || rr->rr_namelen == 0)
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
                RETURN(-EFAULT);
 
-        if (rec->ul_bias & MDS_VTX_BYPASS)
-                ma->ma_attr_flags |= MDS_VTX_BYPASS;
-        else
-                ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
-
-        info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->ul_fsuid;
+       uc->uc_fsgid = rec->ul_fsgid;
+       uc->uc_cap   = rec->ul_cap;
+       uc->uc_suppgids[0] = rec->ul_suppgid1;
+       uc->uc_suppgids[1] = -1;
+
+       attr->la_uid = rec->ul_fsuid;
+       attr->la_gid = rec->ul_fsgid;
+       rr->rr_fid1 = &rec->ul_fid1;
+       rr->rr_fid2 = &rec->ul_fid2;
+       attr->la_ctime = rec->ul_time;
+       attr->la_mtime = rec->ul_time;
+       attr->la_mode  = rec->ul_mode;
+       attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
+
+       info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
+
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdt_dlmreq_unpack(info);
+       RETURN(rc);
+}
 
-        rc = mdt_dlmreq_unpack(info);
-        RETURN(rc);
+static int mdt_rmentry_unpack(struct mdt_thread_info *info)
+{
+       info->mti_spec.sp_rm_entry = 1;
+       return mdt_unlink_unpack(info);
 }
 
 static int mdt_rename_unpack(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc = mdt_ucred(info);
-        struct mdt_rec_rename   *rec;
-        struct md_attr          *ma = &info->mti_attr;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct req_capsule      *pill = info->mti_pill;
-        int rc;
-        ENTRY;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct mdt_rec_rename *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct req_capsule *pill = info->mti_pill;
+       struct md_op_spec *spec = &info->mti_spec;
+       int rc;
 
-        CLASSERT(sizeof(struct mdt_rec_rename) == sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       ENTRY;
 
-        uc->mu_fsuid = rec->rn_fsuid;
-        uc->mu_fsgid = rec->rn_fsgid;
-        uc->mu_cap   = rec->rn_cap;
-        uc->mu_suppgids[0] = rec->rn_suppgid1;
-        uc->mu_suppgids[1] = rec->rn_suppgid2;
-
-        attr->la_uid = rec->rn_fsuid;
-        attr->la_gid = rec->rn_fsgid;
-        rr->rr_fid1 = &rec->rn_fid1;
-        rr->rr_fid2 = &rec->rn_fid2;
-        attr->la_ctime = rec->rn_time;
-        attr->la_mtime = rec->rn_time;
-        /* rename_tgt contains the mode already */
-        attr->la_mode = rec->rn_mode;
-        attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
-
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
-
-        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-        rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
-        if (rr->rr_name == NULL || rr->rr_tgt == NULL)
-                RETURN(-EFAULT);
-        rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1;
-        rr->rr_tgtlen = req_capsule_get_size(pill, &RMF_SYMTGT, RCL_CLIENT) - 1;
-       LASSERT(rr->rr_namelen > 0 && rr->rr_tgtlen > 0);
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        if (rec->rn_bias & MDS_VTX_BYPASS)
-                ma->ma_attr_flags |= MDS_VTX_BYPASS;
-        else
-                ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->rn_fsuid;
+       uc->uc_fsgid = rec->rn_fsgid;
+       uc->uc_cap   = rec->rn_cap;
+       uc->uc_suppgids[0] = rec->rn_suppgid1;
+       uc->uc_suppgids[1] = rec->rn_suppgid2;
 
-        info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
+       attr->la_uid = rec->rn_fsuid;
+       attr->la_gid = rec->rn_fsgid;
+       rr->rr_fid1 = &rec->rn_fid1;
+       rr->rr_fid2 = &rec->rn_fid2;
+       attr->la_ctime = rec->rn_time;
+       attr->la_mtime = rec->rn_time;
+       /* rename_tgt contains the mode already */
+       attr->la_mode = rec->rn_mode;
+       attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
 
-        rc = mdt_dlmreq_unpack(info);
-        RETURN(rc);
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
+
+       rc = mdt_name_unpack(pill, &RMF_SYMTGT, &rr->rr_tgt_name, 0);
+       if (rc < 0)
+               RETURN(rc);
+
+       spec->no_create = !!req_is_replay(mdt_info_req(info));
+
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdt_dlmreq_unpack(info);
+
+       RETURN(rc);
+}
+
+static int mdt_migrate_unpack(struct mdt_thread_info *info)
+{
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct mdt_rec_rename *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct req_capsule *pill = info->mti_pill;
+       struct md_op_spec *spec = &info->mti_spec;
+       int rc;
+
+       ENTRY;
+
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
+
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->rn_fsuid;
+       uc->uc_fsgid = rec->rn_fsgid;
+       uc->uc_cap   = rec->rn_cap;
+       uc->uc_suppgids[0] = rec->rn_suppgid1;
+       uc->uc_suppgids[1] = rec->rn_suppgid2;
+
+       attr->la_uid = rec->rn_fsuid;
+       attr->la_gid = rec->rn_fsgid;
+       rr->rr_fid1 = &rec->rn_fid1;
+       rr->rr_fid2 = &rec->rn_fid2;
+       attr->la_ctime = rec->rn_time;
+       attr->la_mtime = rec->rn_time;
+       /* rename_tgt contains the mode already */
+       attr->la_mode = rec->rn_mode;
+       attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
+
+       if (rec->rn_bias & MDS_CLOSE_MIGRATE) {
+               rc = mdt_close_handle_unpack(info);
+               if (rc)
+                       RETURN(rc);
+
+               spec->sp_migrate_close = 1;
+       }
+
+       /* lustre version > 2.11 migration packs lum */
+       if (req_capsule_has_field(pill, &RMF_EADATA, RCL_CLIENT)) {
+               if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
+                       rr->rr_eadatalen = req_capsule_get_size(pill,
+                                                               &RMF_EADATA,
+                                                               RCL_CLIENT);
+
+                       if (rr->rr_eadatalen > 0) {
+                               rr->rr_eadata = req_capsule_client_get(pill,
+                                                               &RMF_EADATA);
+                               spec->u.sp_ea.eadatalen = rr->rr_eadatalen;
+                               spec->u.sp_ea.eadata = rr->rr_eadata;
+                               spec->sp_cr_flags |= MDS_OPEN_HAS_EA;
+                       }
+               } else {
+                       /* old client doesn't provide lum. */
+                       RETURN(-EOPNOTSUPP);
+               }
+       }
+
+       spec->no_create = !!req_is_replay(mdt_info_req(info));
+
+       rc = mdt_dlmreq_unpack(info);
+
+       RETURN(rc);
 }
 
 /*
- * please see comment above LOV_MAGIC_V1_DEF
+ * please see comment above LOV_MAGIC_V1_DEFINED
  */
-static void mdt_fix_lov_magic(struct mdt_thread_info *info)
+void mdt_fix_lov_magic(struct mdt_thread_info *info, void *eadata)
 {
-       struct mdt_reint_record *rr = &info->mti_rr;
-       struct lov_user_md_v1   *v1;
+       struct lov_user_md_v1   *v1 = eadata;
 
-       v1 = (void *)rr->rr_eadata;
        LASSERT(v1);
 
        if (unlikely(req_is_replay(mdt_info_req(info)))) {
-               if (v1->lmm_magic == LOV_USER_MAGIC_V1) {
-                       v1->lmm_magic = LOV_MAGIC_V1_DEF;
-               } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
-                       v1->lmm_magic = __swab32(LOV_MAGIC_V1_DEF);
-               } else if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
-                       v1->lmm_magic = LOV_MAGIC_V3_DEF;
-               } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
-                       v1->lmm_magic = __swab32(LOV_MAGIC_V3_DEF);
-               }
+               if ((v1->lmm_magic & LOV_MAGIC_MASK) == LOV_MAGIC_MAGIC)
+                       v1->lmm_magic |= LOV_MAGIC_DEFINED;
+               else if ((v1->lmm_magic & __swab32(LOV_MAGIC_MAGIC)) ==
+                        __swab32(LOV_MAGIC_MAGIC))
+                       v1->lmm_magic |= __swab32(LOV_MAGIC_DEFINED);
        }
 }
 
 static int mdt_open_unpack(struct mdt_thread_info *info)
 {
-        struct md_ucred         *uc = mdt_ucred(info);
-        struct mdt_rec_create   *rec;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct req_capsule      *pill = info->mti_pill;
-        struct mdt_reint_record *rr   = &info->mti_rr;
-        struct ptlrpc_request   *req  = mdt_info_req(info);
-        struct md_op_spec       *sp   = &info->mti_spec;
-        ENTRY;
-
-        CLASSERT(sizeof(struct mdt_rec_create) == sizeof(struct mdt_rec_reint));
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct mdt_rec_create *rec;
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct req_capsule *pill = info->mti_pill;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct ptlrpc_request *req = mdt_info_req(info);
+       struct md_op_spec *sp = &info->mti_spec;
+       int rc;
+       ENTRY;
 
-        uc->mu_fsuid = rec->cr_fsuid;
-        uc->mu_fsgid = rec->cr_fsgid;
-        uc->mu_cap   = rec->cr_cap;
-        uc->mu_suppgids[0] = rec->cr_suppgid1;
-        uc->mu_suppgids[1] = rec->cr_suppgid2;
-        uc->mu_umask = rec->cr_umask;
+       CLASSERT(sizeof(struct mdt_rec_create) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        rr->rr_fid1   = &rec->cr_fid1;
-        rr->rr_fid2   = &rec->cr_fid2;
-        rr->rr_handle = &rec->cr_old_handle;
-        attr->la_mode = rec->cr_mode;
-        attr->la_rdev  = rec->cr_rdev;
-        attr->la_uid   = rec->cr_fsuid;
-        attr->la_gid   = rec->cr_fsgid;
-        attr->la_ctime = rec->cr_time;
-        attr->la_mtime = rec->cr_time;
-        attr->la_atime = rec->cr_time;
-        attr->la_valid = LA_MODE  | LA_RDEV  | LA_UID   | LA_GID |
-                         LA_CTIME | LA_MTIME | LA_ATIME;
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->cr_fsuid;
+       uc->uc_fsgid = rec->cr_fsgid;
+       uc->uc_cap   = rec->cr_cap;
+       uc->uc_suppgids[0] = rec->cr_suppgid1;
+       uc->uc_suppgids[1] = rec->cr_suppgid2;
+       uc->uc_umask = rec->cr_umask;
+
+       rr->rr_fid1   = &rec->cr_fid1;
+       rr->rr_fid2   = &rec->cr_fid2;
+       rr->rr_open_handle = &rec->cr_open_handle_old;
+       attr->la_mode = rec->cr_mode;
+       attr->la_rdev  = rec->cr_rdev;
+       attr->la_uid   = rec->cr_fsuid;
+       attr->la_gid   = rec->cr_fsgid;
+       attr->la_ctime = rec->cr_time;
+       attr->la_mtime = rec->cr_time;
+       attr->la_atime = rec->cr_time;
+       attr->la_valid = LA_MODE  | LA_RDEV  | LA_UID   | LA_GID |
+                        LA_CTIME | LA_MTIME | LA_ATIME;
         memset(&info->mti_spec.u, 0, sizeof(info->mti_spec.u));
         info->mti_spec.sp_cr_flags = get_mrc_cr_flags(rec);
         /* Do not trigger ASSERTION if client miss to set such flags. */
         if (unlikely(info->mti_spec.sp_cr_flags == 0))
                 RETURN(-EPROTO);
-        info->mti_replayepoch = rec->cr_ioepoch;
 
         info->mti_cross_ref = !!(rec->cr_bias & MDS_CROSS_REF);
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        if (req_is_replay(req) &&
-            req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) {
-#if 0
-                mdt_set_capainfo(info, 1, rr->rr_fid2,
-                                 req_capsule_client_get(pill, &RMF_CAPA2));
-#else
-                /*
-                 * FIXME: capa in replay open request might have expired,
-                 * bypass capa check. Security hole?
-                 */
-                mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA);
-                mdt_set_capainfo(info, 1, rr->rr_fid2, BYPASS_CAPA);
-#endif
-        }
-
-        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-        if (rr->rr_name == NULL)
-                RETURN(-EFAULT);
-        rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1;
+       mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, MNF_FIX_ANON);
 
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
                 rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
                                                         RCL_CLIENT);
+
                 if (rr->rr_eadatalen > 0) {
                         rr->rr_eadata = req_capsule_client_get(pill,
                                                                &RMF_EADATA);
                         sp->u.sp_ea.eadatalen = rr->rr_eadatalen;
                         sp->u.sp_ea.eadata = rr->rr_eadata;
                         sp->no_create = !!req_is_replay(req);
-                       mdt_fix_lov_magic(info);
+                       mdt_fix_lov_magic(info, rr->rr_eadata);
                 }
 
                 /*
@@ -1238,31 +1633,43 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
                        rr->rr_eadatalen = MIN_MD_SIZE;
        }
 
-        RETURN(0);
+       rc = mdt_file_secctx_unpack(pill, &sp->sp_cr_file_secctx_name,
+                                   &sp->sp_cr_file_secctx,
+                                   &sp->sp_cr_file_secctx_size);
+       if (rc < 0)
+               RETURN(rc);
+
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
+
+       RETURN(rc);
 }
 
 static int mdt_setxattr_unpack(struct mdt_thread_info *info)
 {
-        struct mdt_reint_record   *rr   = &info->mti_rr;
-        struct md_ucred           *uc   = mdt_ucred(info);
-        struct lu_attr            *attr = &info->mti_attr.ma_attr;
-        struct req_capsule        *pill = info->mti_pill;
-        struct mdt_rec_setxattr   *rec;
-        ENTRY;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct lu_attr *attr = &info->mti_attr.ma_attr;
+       struct req_capsule *pill = info->mti_pill;
+       struct mdt_rec_setxattr *rec;
+       int rc;
+       ENTRY;
 
 
-        CLASSERT(sizeof(struct mdt_rec_setxattr) ==
-                         sizeof(struct mdt_rec_reint));
+       CLASSERT(sizeof(struct mdt_rec_setxattr) ==
+                sizeof(struct mdt_rec_reint));
 
-        rec = req_capsule_client_get(pill, &RMF_REC_REINT);
-        if (rec == NULL)
-                RETURN(-EFAULT);
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
 
-        uc->mu_fsuid  = rec->sx_fsuid;
-        uc->mu_fsgid  = rec->sx_fsgid;
-        uc->mu_cap    = rec->sx_cap;
-        uc->mu_suppgids[0] = rec->sx_suppgid1;
-        uc->mu_suppgids[1] = -1;
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid  = rec->sx_fsuid;
+       uc->uc_fsgid  = rec->sx_fsgid;
+       uc->uc_cap    = rec->sx_cap;
+       uc->uc_suppgids[0] = rec->sx_suppgid1;
+       uc->uc_suppgids[1] = -1;
 
         rr->rr_opcode = rec->sx_opcode;
         rr->rr_fid1   = &rec->sx_fid;
@@ -1271,21 +1678,17 @@ static int mdt_setxattr_unpack(struct mdt_thread_info *info)
         attr->la_size = rec->sx_size;
         attr->la_flags = rec->sx_flags;
 
-        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
-                mdt_set_capainfo(info, 0, rr->rr_fid1,
-                                 req_capsule_client_get(pill, &RMF_CAPA1));
-        else
-                mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA);
-
-        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-        if (rr->rr_name == NULL)
-                RETURN(-EFAULT);
-        rr->rr_namelen = req_capsule_get_size(pill, &RMF_NAME, RCL_CLIENT) - 1;
-        LASSERT(rr->rr_namelen > 0);
+       rc = mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, 0);
+       if (rc < 0)
+               RETURN(rc);
 
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
                 rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA,
                                                         RCL_CLIENT);
+
+               if (rr->rr_eadatalen > info->mti_mdt->mdt_max_ea_size)
+                       RETURN(-E2BIG);
+
                 if (rr->rr_eadatalen > 0) {
                         rr->rr_eadata = req_capsule_client_get(pill,
                                                                &RMF_EADATA);
@@ -1299,20 +1702,60 @@ static int mdt_setxattr_unpack(struct mdt_thread_info *info)
                 RETURN(-EFAULT);
         }
 
+       rc = req_check_sepol(pill);
+       if (rc)
+               RETURN(rc);
+
+       if (mdt_dlmreq_unpack(info) < 0)
+               RETURN(-EPROTO);
+
         RETURN(0);
 }
 
+static int mdt_resync_unpack(struct mdt_thread_info *info)
+{
+       struct req_capsule      *pill = info->mti_pill;
+       struct mdt_reint_record *rr   = &info->mti_rr;
+       struct lu_ucred         *uc     = mdt_ucred(info);
+       struct mdt_rec_resync   *rec;
+       ENTRY;
+
+       CLASSERT(sizeof(*rec) == sizeof(struct mdt_rec_reint));
+       rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+       if (rec == NULL)
+               RETURN(-EFAULT);
+
+       /* This prior initialization is needed for old_init_ucred_reint() */
+       uc->uc_fsuid = rec->rs_fsuid;
+       uc->uc_fsgid = rec->rs_fsgid;
+       uc->uc_cap   = rec->rs_cap;
+
+       rr->rr_fid1   = &rec->rs_fid;
+       rr->rr_mirror_id = rec->rs_mirror_id;
+
+       /* cookie doesn't need to be swapped but it has been swapped
+        * in lustre_swab_mdt_rec_reint() as rr_mtime, so here it needs
+        * restoring. */
+       if (ptlrpc_req_need_swab(mdt_info_req(info)))
+               __swab64s(&rec->rs_lease_handle.cookie);
+       rr->rr_lease_handle = &rec->rs_lease_handle;
+
+       RETURN(mdt_dlmreq_unpack(info));
+}
 
 typedef int (*reint_unpacker)(struct mdt_thread_info *info);
 
 static reint_unpacker mdt_reint_unpackers[REINT_MAX] = {
-        [REINT_SETATTR]  = mdt_setattr_unpack,
-        [REINT_CREATE]   = mdt_create_unpack,
-        [REINT_LINK]     = mdt_link_unpack,
-        [REINT_UNLINK]   = mdt_unlink_unpack,
-        [REINT_RENAME]   = mdt_rename_unpack,
-        [REINT_OPEN]     = mdt_open_unpack,
-        [REINT_SETXATTR] = mdt_setxattr_unpack
+       [REINT_SETATTR]  = mdt_setattr_unpack,
+       [REINT_CREATE]   = mdt_create_unpack,
+       [REINT_LINK]     = mdt_link_unpack,
+       [REINT_UNLINK]   = mdt_unlink_unpack,
+       [REINT_RENAME]   = mdt_rename_unpack,
+       [REINT_OPEN]     = mdt_open_unpack,
+       [REINT_SETXATTR] = mdt_setxattr_unpack,
+       [REINT_RMENTRY]  = mdt_rmentry_unpack,
+       [REINT_MIGRATE]  = mdt_migrate_unpack,
+       [REINT_RESYNC]   = mdt_resync_unpack,
 };
 
 int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op)
@@ -1330,3 +1773,45 @@ int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op)
         }
         RETURN(rc);
 }
+
+void mdt_pack_secctx_in_reply(struct mdt_thread_info *info,
+                             struct mdt_object *child)
+{
+       char *secctx_name;
+       struct lu_buf *buffer;
+       struct mdt_body *repbody;
+       struct req_capsule *pill = info->mti_pill;
+       int rc;
+
+       if (req_capsule_has_field(pill, &RMF_FILE_SECCTX, RCL_SERVER) &&
+           req_capsule_get_size(pill, &RMF_FILE_SECCTX, RCL_SERVER) != 0) {
+               secctx_name =
+                       req_capsule_client_get(pill, &RMF_FILE_SECCTX_NAME);
+               buffer = &info->mti_buf;
+
+               /* fill reply buffer with security context now */
+               buffer->lb_len = req_capsule_get_size(pill, &RMF_FILE_SECCTX,
+                                                     RCL_SERVER);
+               buffer->lb_buf = req_capsule_server_get(info->mti_pill,
+                                                       &RMF_FILE_SECCTX);
+               rc = mo_xattr_get(info->mti_env, mdt_object_child(child),
+                                 buffer, secctx_name);
+               if (rc >= 0) {
+                       CDEBUG(D_SEC,
+                              "found security context of size %d for "DFID"\n",
+                              rc, PFID(mdt_object_fid(child)));
+
+                       repbody = req_capsule_server_get(pill, &RMF_MDT_BODY);
+                       repbody->mbo_valid |= OBD_MD_SECCTX;
+                       if (rc < buffer->lb_len)
+                               req_capsule_shrink(pill, &RMF_FILE_SECCTX, rc,
+                                                  RCL_SERVER);
+               } else {
+                       CDEBUG(D_SEC,
+                            "security context not found for "DFID": rc = %d\n",
+                            PFID(mdt_object_fid(child)), rc);
+                       req_capsule_shrink(pill, &RMF_FILE_SECCTX, 0,
+                                          RCL_SERVER);
+               }
+       }
+}