Whamcloud - gitweb
LU-11642 mdt: revoke remote LOOKUP lock in dir layout shrink
[fs/lustre-release.git] / lustre / mdt / mdt_xattr.c
index c1eae8d..e5f70e3 100644 (file)
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- *  linux/mdt/mdt_xattr.c
- *  Lustre Metadata Target (mdt) extended attributes management.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *  Copyright (C) 2002-2006 Cluster File Systems, Inc.
- *   Author: Peter Braam <braam@clusterfs.com>
- *   Author: Andreas Dilger <adilger@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Huang Hua <huanghua@clusterfs.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
  *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * Copyright (c) 2011, 2017, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mdt/mdt_xattr.c
+ *
+ * Lustre Metadata Target (mdt) extended attributes management.
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Huang Hua <huanghua@clusterfs.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_MDS
 
-/* prerequisite for linux/xattr.h */
-#include <linux/types.h>
-/* prerequisite for linux/xattr.h */
-#include <linux/fs.h>
-/* XATTR_{REPLACE,CREATE} */
 #include <linux/xattr.h>
-
+#include <obd_class.h>
+#include <lustre_nodemap.h>
+#include <lustre_acl.h>
 #include "mdt_internal.h"
 
 
 /* return EADATA length to the caller. negative value means error */
 static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
 {
-        struct req_capsule     *pill = info->mti_pill ;
-        struct ptlrpc_request  *req = mdt_info_req(info);
-        char                   *xattr_name;
-        __u64                   valid = info->mti_body->valid;
-        static const char       user_string[] = "user.";
-        int                     size, rc;
-        ENTRY;
-        
-        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
-                RETURN(-ENOMEM);
-
-        /* Determine how many bytes we need */
-        if (valid & OBD_MD_FLXATTR) {
-                xattr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (!xattr_name)
-                        RETURN(-EFAULT);
-
-                if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) &&
-                    !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
-                        RETURN(-EOPNOTSUPP);
-                
-                size = mo_xattr_get(info->mti_env,
-                                    mdt_object_child(info->mti_object),
-                                    &LU_BUF_NULL, xattr_name);
-        } else if (valid & OBD_MD_FLXATTRLS) {
-                size = mo_xattr_list(info->mti_env,
-                                     mdt_object_child(info->mti_object),
-                                     &LU_BUF_NULL);
-        } else {
-                CDEBUG(D_INFO, "Valid bits: "LPX64"\n", info->mti_body->valid);
-                RETURN(-EINVAL);
-        }
-
-        if (size < 0) {
-                if (size == -ENODATA)
-                        size = 0;
-                else if (size != -EOPNOTSUPP) {
-                        CDEBUG(D_INFO, "Error geting EA size: %d\n", size);
-                        RETURN(size);
-                }
-        }
-
-        if (info->mti_body->eadatasize != 0 &&
-            info->mti_body->eadatasize < size)
-                RETURN(-ERANGE);
-
-        req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
-                             min_t(int, size, info->mti_body->eadatasize));
+       struct req_capsule *pill = info->mti_pill;
+       struct ptlrpc_request *req = mdt_info_req(info);
+       const char *xattr_name;
+       u64 valid;
+       static const char user_string[] = "user.";
+       int size;
+       int rc = 0;
+       int rc2;
+       ENTRY;
+
+       valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
+
+       /* Determine how many bytes we need */
+        if (valid == OBD_MD_FLXATTR) {
+               xattr_name = req_capsule_client_get(pill, &RMF_NAME);
+               if (!xattr_name)
+                       RETURN(-EFAULT);
+
+               if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR) &&
+                   !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
+                       RETURN(-EOPNOTSUPP);
+
+               size = mo_xattr_get(info->mti_env,
+                                   mdt_object_child(info->mti_object),
+                                   &LU_BUF_NULL, xattr_name);
+               if (size == -ENODATA) {
+                       /* XXX: Some client code will not handle -ENODATA
+                        * for XATTR_NAME_LOV (trusted.lov) properly. */
+                       if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
+                               rc = 0;
+                       else
+                               rc = -ENODATA;
+
+                       size = 0;
+               }
+       } else if (valid == OBD_MD_FLXATTRLS) {
+               xattr_name = "list";
+               size = mo_xattr_list(info->mti_env,
+                                    mdt_object_child(info->mti_object),
+                                    &LU_BUF_NULL);
+       } else if (valid == OBD_MD_FLXATTRALL) {
+               xattr_name = "all";
+               /* N.B. eadatasize = 0 is not valid for FLXATTRALL */
+               /* We could calculate accurate sizes, but this would
+                * introduce a lot of overhead, let's do it later... */
+               size = info->mti_body->mbo_eadatasize;
+               req_capsule_set_size(pill, &RMF_EAVALS, RCL_SERVER, size);
+               req_capsule_set_size(pill, &RMF_EAVALS_LENS, RCL_SERVER, size);
+       } else {
+               CDEBUG(D_INFO, "Valid bits: %#llx\n",
+                      info->mti_body->mbo_valid);
+               RETURN(-EINVAL);
+       }
+
+       if (size < 0) {
+               if (size != -EOPNOTSUPP && size != -ENOENT)
+                       CERROR("%s: error geting EA size for '%s': rc = %d\n",
+                              mdt_obd_name(info->mti_mdt), xattr_name, size);
+               RETURN(size);
+       }
+
+       if (req_capsule_has_field(pill, &RMF_ACL, RCL_SERVER))
+               req_capsule_set_size(pill, &RMF_ACL, RCL_SERVER,
+                                    LUSTRE_POSIX_ACL_MAX_SIZE_OLD);
+
+       req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
+                            info->mti_body->mbo_eadatasize == 0 ? 0 : size);
+
+       rc2 = req_capsule_server_pack(pill);
+       if (rc2 < 0)
+               RETURN(rc2);
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
+               RETURN(-ENOMEM);
+
+       RETURN(rc < 0 ? rc : size);
+}
 
-        rc = req_capsule_server_pack(pill);
-        if (rc) {
-                LASSERT(rc < 0);
-                RETURN(rc);
-        }
+static int mdt_nodemap_map_acl(struct mdt_thread_info *info, void *buf,
+                              size_t size, const char *name,
+                              enum nodemap_tree_type tree_type)
+{
+       struct lu_nodemap      *nodemap;
+       struct obd_export      *exp = info->mti_exp;
+       int                     rc = size;
+
+       ENTRY;
+
+       if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 ||
+           strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
+               if (size > info->mti_mdt->mdt_max_ea_size ||
+                    (!exp_connect_large_acl(exp) &&
+                     size > LUSTRE_POSIX_ACL_MAX_SIZE_OLD))
+                       GOTO(out, rc = -ERANGE);
+
+               nodemap = nodemap_get_from_exp(exp);
+               if (IS_ERR(nodemap))
+                       GOTO(out, rc = PTR_ERR(nodemap));
+
+               rc = nodemap_map_acl(nodemap, buf, size, tree_type);
+               nodemap_putref(nodemap);
+               if (rc < 0)
+                       GOTO(out, rc);
+       }
+out:
+       RETURN(rc);
+}
 
-        RETURN(size);
+static int mdt_getxattr_all(struct mdt_thread_info *info,
+                           struct mdt_body *reqbody, struct mdt_body *repbody,
+                           struct lu_buf *buf, struct md_object *next)
+{
+       const struct lu_env *env = info->mti_env;
+       char *v, *b, *eadatahead, *eadatatail;
+       __u32 *sizes;
+       int eadatasize, eavallen = 0, eavallens = 0, rc;
+
+       ENTRY;
+
+       /*
+        * The format of the pill is the following:
+        * EADATA:      attr1\0attr2\0...attrn\0
+        * EAVALS:      val1val2...valn
+        * EAVALS_LENS: 4,4,...4
+        */
+
+       eadatahead = buf->lb_buf;
+
+       /* Fill out EADATA first */
+       rc = mo_xattr_list(env, next, buf);
+       if (rc < 0)
+               GOTO(out_shrink, rc);
+
+       eadatasize = rc;
+       eadatatail = eadatahead + eadatasize;
+
+       v = req_capsule_server_get(info->mti_pill, &RMF_EAVALS);
+       sizes = req_capsule_server_get(info->mti_pill, &RMF_EAVALS_LENS);
+
+       /* Fill out EAVALS and EAVALS_LENS */
+       for (b = eadatahead; b < eadatatail; b += strlen(b) + 1, v += rc) {
+               buf->lb_buf = v;
+               buf->lb_len = reqbody->mbo_eadatasize - eavallen;
+               rc = mo_xattr_get(env, next, buf, b);
+               if (rc < 0)
+                       GOTO(out_shrink, rc);
+               rc = mdt_nodemap_map_acl(info, buf->lb_buf, rc, b,
+                                        NODEMAP_FS_TO_CLIENT);
+               if (rc < 0)
+                       GOTO(out_shrink, rc);
+               sizes[eavallens] = rc;
+               eavallens++;
+               eavallen += rc;
+       }
+
+out_shrink:
+       if (rc < 0) {
+               eadatasize = 0;
+               eavallens = 0;
+               eavallen = 0;
+       }
+       repbody->mbo_aclsize = eavallen;
+       repbody->mbo_max_mdsize = eavallens;
+
+       req_capsule_shrink(info->mti_pill, &RMF_EAVALS, eavallen, RCL_SERVER);
+       req_capsule_shrink(info->mti_pill, &RMF_EAVALS_LENS,
+                          eavallens * sizeof(__u32), RCL_SERVER);
+       req_capsule_shrink(info->mti_pill, &RMF_EADATA, eadatasize, RCL_SERVER);
+
+       if (rc >= 0)
+               RETURN(eadatasize);
+       return rc;
 }
 
 int mdt_getxattr(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request  *req = mdt_info_req(info);
-        struct mdt_export_data *med = mdt_req2med(req);
-        struct md_ucred        *uc  = mdt_ucred(info);
+       struct ptlrpc_request  *req = mdt_info_req(info);
         struct mdt_body        *reqbody;
         struct mdt_body        *repbody = NULL;
         struct md_object       *next;
         struct lu_buf          *buf;
         int                     easize, rc;
+       u64                     valid;
         ENTRY;
 
         LASSERT(info->mti_object != NULL);
-        LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj.mo_lu));
+       LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj));
 
-        CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->fid1));
+       CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->mbo_fid1));
 
         reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
         if (reqbody == NULL)
                 RETURN(err_serious(-EFAULT));
 
-        rc = mdt_init_ucred(info, reqbody);
+       rc = mdt_init_ucred(info, reqbody);
         if (rc)
                 RETURN(err_serious(rc));
 
         next = mdt_object_child(info->mti_object);
-
-        if (info->mti_body->valid & OBD_MD_FLRMTRGETFACL) {
-                __u32 perm = mdt_identity_get_perm(uc->mu_identity,
-                                                   med->med_rmtclient,
-                                                   req->rq_peer.nid);
-
-                LASSERT(med->med_rmtclient);
-                if (!(perm & CFS_RMTACL_PERM))
-                        GOTO(out, rc = err_serious(-EPERM));
-
-                rc = mo_permission(info->mti_env, NULL, next, NULL,
-                                   MAY_RGETFACL);
-                if (rc)
-                        GOTO(out, rc = err_serious(rc));
-        }
-
         easize = mdt_getxattr_pack_reply(info);
-        if (easize < 0)
-                GOTO(out, rc = err_serious(easize));
+       if (easize == -ENODATA)
+               GOTO(out, rc = easize);
+       else if (easize < 0)
+               GOTO(out, rc = err_serious(easize));
 
         repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
         LASSERT(repbody != NULL);
 
         /* No need further getxattr. */
-        if (easize == 0 || reqbody->eadatasize == 0)
-                GOTO(out, rc = easize);
-
-        buf = &info->mti_buf;
-        buf->lb_buf = req_capsule_server_get(info->mti_pill, &RMF_EADATA);
-        buf->lb_len = easize;
-
-        if (info->mti_body->valid & OBD_MD_FLXATTR) {
-                int flags = CFS_IC_NOTHING;
-                char *xattr_name = req_capsule_client_get(info->mti_pill,
-                                                          &RMF_NAME);
-                CDEBUG(D_INODE, "getxattr %s\n", xattr_name);
-
-                rc = mo_xattr_get(info->mti_env, next, buf, xattr_name);
-                if (rc < 0) {
-                        CERROR("getxattr failed: %d\n", rc);
-                        GOTO(out, rc);
-                }
-
-                if (info->mti_body->valid &
-                    (OBD_MD_FLRMTLSETFACL | OBD_MD_FLRMTLGETFACL))
-                        flags = CFS_IC_ALL;
-                else if (info->mti_body->valid & OBD_MD_FLRMTRGETFACL)
-                        flags = CFS_IC_MAPPED;
-
-                if (rc > 0 && flags != CFS_IC_NOTHING) {
-                        int rc1;
-
-                        LASSERT(med->med_rmtclient);
-                        rc1 = lustre_posix_acl_xattr_id2client(uc,
-                                        med->med_idmap,
-                                        (posix_acl_xattr_header *)(buf->lb_buf),
-                                        rc, flags);
-                        if (unlikely(rc1 < 0))
-                                rc = rc1;
-                }
-        } else if (info->mti_body->valid & OBD_MD_FLXATTRLS) {
-                CDEBUG(D_INODE, "listxattr\n");
-
-                rc = mo_xattr_list(info->mti_env, next, buf);
-                if (rc < 0)
-                        CDEBUG(D_INFO, "listxattr failed: %d\n", rc);
-        } else
-                LBUG();
-
-        EXIT;
+       if (easize == 0 || reqbody->mbo_eadatasize == 0)
+               GOTO(out, rc = easize);
+
+       buf = &info->mti_buf;
+       buf->lb_buf = req_capsule_server_get(info->mti_pill, &RMF_EADATA);
+       buf->lb_len = easize;
+
+       valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
+
+       if (valid == OBD_MD_FLXATTR) {
+               const char *xattr_name = req_capsule_client_get(info->mti_pill,
+                                                               &RMF_NAME);
+               rc = mo_xattr_get(info->mti_env, next, buf, xattr_name);
+               if (rc < 0)
+                       GOTO(out, rc);
+
+               rc = mdt_nodemap_map_acl(info, buf->lb_buf, rc, xattr_name,
+                                        NODEMAP_FS_TO_CLIENT);
+       } else if (valid == OBD_MD_FLXATTRLS) {
+               CDEBUG(D_INODE, "listxattr\n");
+
+               rc = mo_xattr_list(info->mti_env, next, buf);
+               if (rc < 0)
+                       CDEBUG(D_INFO, "listxattr failed: %d\n", rc);
+       } else if (valid == OBD_MD_FLXATTRALL) {
+               rc = mdt_getxattr_all(info, reqbody, repbody,
+                                     buf, next);
+       } else
+               LBUG();
+
+       EXIT;
 out:
-        if (rc >= 0) {
-                repbody->eadatasize = rc;
-                rc = 0;
-        }
-        mdt_exit_ucred(info);
-        return rc;
+       if (rc >= 0) {
+               mdt_counter_incr(req, LPROC_MDT_GETXATTR);
+               /* LU-11109: Set OBD_MD_FLXATTR on success so that
+                * newer clients can distinguish between nonexistent
+                * xattrs and zero length values. */
+               repbody->mbo_valid |= OBD_MD_FLXATTR;
+               repbody->mbo_eadatasize = rc;
+               rc = 0;
+       }
+       mdt_exit_ucred(info);
+       return rc;
 }
 
-static int mdt_rmtlsetfacl(struct mdt_thread_info *info,
-                           struct md_object *next,
-                           const char *xattr_name,
-                           ext_acl_xattr_header *header,
-                           posix_acl_xattr_header **out)
+/* shrink dir layout after migration */
+static int mdt_dir_layout_shrink(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request  *req = mdt_info_req(info);
-        struct mdt_export_data *med = mdt_req2med(req);
-        struct md_ucred        *uc = mdt_ucred(info);
-        struct lu_buf          *buf = &info->mti_buf;
-        int                     rc;
-        ENTRY;
-
-        rc = lustre_ext_acl_xattr_id2server(uc, med->med_idmap, header);
-        if (rc)
-                RETURN(rc);
-        rc = mo_xattr_get(info->mti_env, next, &LU_BUF_NULL, xattr_name);
-        if (rc == -ENODATA)
-                rc = 0;
-        else if (rc < 0)
-                RETURN(rc);
-
-        buf->lb_len = rc;
-        if (buf->lb_len > 0) {
-                OBD_ALLOC(buf->lb_buf, buf->lb_len);
-                if (unlikely(buf->lb_buf == NULL))
-                        RETURN(-ENOMEM);
-
-                rc = mo_xattr_get(info->mti_env, next, buf, xattr_name);
-                if (rc < 0) {
-                        CERROR("getxattr failed: %d\n", rc);
-                        GOTO(_out, rc);
-                }
-        } else
-                buf->lb_buf = NULL;
-
-        rc = lustre_acl_xattr_merge2posix((posix_acl_xattr_header *)(buf->lb_buf),
-                                          buf->lb_len, header, out);
-        EXIT;
-
-_out:
-        if (rc <= 0 && buf->lb_buf != NULL)
-                OBD_FREE(buf->lb_buf, buf->lb_len);
-        return rc;
+       const struct lu_env *env = info->mti_env;
+       struct mdt_device *mdt = info->mti_mdt;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct lmv_user_md *lmu = rr->rr_eadata;
+       __u32 lum_stripe_count = lmu->lum_stripe_count;
+       struct lu_buf *buf = &info->mti_buf;
+       struct lmv_mds_md_v1 *lmv;
+       struct md_attr *ma = &info->mti_attr;
+       struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
+       struct mdt_object *pobj = NULL;
+       struct mdt_object *obj;
+       struct mdt_lock_handle *lhp = NULL;
+       struct mdt_lock_handle *lhc;
+       int rc;
+
+       ENTRY;
+
+       if (!mdt->mdt_enable_dir_migration)
+               RETURN(-EPERM);
+
+       if (!md_capable(uc, CFS_CAP_SYS_ADMIN) &&
+           uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
+           mdt->mdt_enable_remote_dir_gid != -1)
+               RETURN(-EPERM);
+
+       /* mti_big_lmm is used to save LMV, but it may be uninitialized. */
+       if (unlikely(!info->mti_big_lmm)) {
+               info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC);
+               OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize);
+               if (!info->mti_big_lmm)
+                       RETURN(-ENOMEM);
+       }
+
+       obj = mdt_object_find(env, mdt, rr->rr_fid1);
+       if (IS_ERR(obj))
+               RETURN(PTR_ERR(obj));
+
+       /* get parent from PFID */
+       rc = mdt_attr_get_pfid(info, obj, &ma->ma_pfid);
+       if (rc)
+               GOTO(put_obj, rc);
+
+       pobj = mdt_object_find(env, mdt, &ma->ma_pfid);
+       if (IS_ERR(pobj))
+               GOTO(put_obj, rc = PTR_ERR(pobj));
+
+       /* revoke object remote LOOKUP lock */
+       if (mdt_object_remote(pobj)) {
+               rc = mdt_revoke_remote_lookup_lock(info, pobj, obj);
+               if (rc)
+                       GOTO(put_pobj, rc);
+       }
+
+       /*
+        * lock parent if dir will be shrunk to 1 stripe, because dir will be
+        * converted to normal directory, as will change dir fid and update
+        * namespace of parent.
+        */
+       lhp = &info->mti_lh[MDT_LH_PARENT];
+       mdt_lock_reg_init(lhp, LCK_PW);
+
+       if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
+               rc = mdt_reint_object_lock(info, pobj, lhp,
+                                          MDS_INODELOCK_UPDATE, true);
+               if (rc)
+                       GOTO(put_pobj, rc);
+       }
+
+       /* lock object */
+       lhc = &info->mti_lh[MDT_LH_CHILD];
+       mdt_lock_reg_init(lhc, LCK_EX);
+       rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo,
+                                   true);
+       if (rc)
+               GOTO(unlock_pobj, rc);
+
+       ma->ma_lmv = info->mti_big_lmm;
+       ma->ma_lmv_size = info->mti_big_lmmsize;
+       ma->ma_valid = 0;
+       rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
+       if (rc)
+               GOTO(unlock_obj, rc);
+
+       /* user may run 'lfs migrate' multiple times, so it's shrunk already */
+       if (!(ma->ma_valid & MA_LMV))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lmv = &ma->ma_lmv->lmv_md_v1;
+
+       /* ditto */
+       if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lum_stripe_count = lmu->lum_stripe_count;
+       if (!lum_stripe_count)
+               lum_stripe_count = cpu_to_le32(1);
+
+       if (lmv->lmv_migrate_offset != lum_stripe_count) {
+               CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_migrate_offset, lmu->lum_stripe_count);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) {
+               CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_master_mdt_index, lmu->lum_stripe_offset);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lum_stripe_count > 1 &&
+           (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
+           lmu->lum_hash_type) {
+               CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_hash_type, lmu->lum_hash_type);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       buf->lb_buf = rr->rr_eadata;
+       buf->lb_len = rr->rr_eadatalen;
+       rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0);
+       GOTO(unlock_obj, rc);
+
+unlock_obj:
+       mdt_reint_striped_unlock(info, obj, lhc, einfo, rc);
+unlock_pobj:
+       mdt_object_unlock(info, pobj, lhp, rc);
+put_pobj:
+       mdt_object_put(env, pobj);
+put_obj:
+       mdt_object_put(env, obj);
+
+       return rc;
 }
 
 int mdt_reint_setxattr(struct mdt_thread_info *info,
                        struct mdt_lock_handle *unused)
 {
-        struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_export_data  *med = mdt_req2med(req);
-        struct md_ucred         *uc  = mdt_ucred(info);
-        const char               user_string[] = "user.";
-        const char               trust_string[] = "trusted.";
-        struct mdt_lock_handle  *lh;
-        struct req_capsule      *pill = info->mti_pill;
-        const struct lu_env     *env  = info->mti_env;
-        struct lu_buf           *buf  = &info->mti_buf;
-        struct mdt_reint_record *rr   = &info->mti_rr;
-        struct lu_attr          *attr = &info->mti_attr.ma_attr;
-        struct mdt_object       *obj; 
-        struct md_object        *child;
-        __u64                    valid = attr->la_valid;
-        const char              *xattr_name;
-        int                      xattr_len = 0;
-        __u64                    lockpart;
-        int                      rc;
-        posix_acl_xattr_header  *new_xattr = NULL;
-        ENTRY;
-
-        CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1));
+       struct ptlrpc_request   *req = mdt_info_req(info);
+       struct mdt_lock_handle  *lh;
+       const struct lu_env     *env  = info->mti_env;
+       struct lu_buf           *buf  = &info->mti_buf;
+       struct mdt_reint_record *rr   = &info->mti_rr;
+       struct md_attr          *ma = &info->mti_attr;
+       struct lu_attr          *attr = &info->mti_attr.ma_attr;
+       struct mdt_object       *obj;
+       struct md_object        *child;
+       __u64                    valid = attr->la_valid;
+       const char              *xattr_name = rr->rr_name.ln_name;
+       int                      xattr_len = rr->rr_eadatalen;
+       __u64                    lockpart = MDS_INODELOCK_UPDATE;
+       int                      rc;
+       ENTRY;
+
+       CDEBUG(D_INODE, "setxattr for "DFID": %s %s\n", PFID(rr->rr_fid1),
+              valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);
+
+       if (info->mti_dlm_req)
+               ldlm_request_cancel(req, info->mti_dlm_req, 0, LATF_SKIP);
 
         if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
                 RETURN(err_serious(-ENOMEM));
 
-        xattr_name = rr->rr_name;
-
-        CDEBUG(D_INODE, "%s xattr %s\n",
-               valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);
-
-        rc = mdt_init_ucred_reint(info);
+       rc = mdt_init_ucred_reint(info);
         if (rc != 0)
                 RETURN(rc);
 
-        if (valid & OBD_MD_FLRMTRSETFACL) {
-                __u32 perm = mdt_identity_get_perm(uc->mu_identity,
-                                                   med->med_rmtclient,
-                                                   req->rq_peer.nid);
-
-                LASSERT(med->med_rmtclient);
-                if (!(perm & CFS_RMTACL_PERM))
-                        GOTO(out, rc = err_serious(-EPERM));
-        }
-
-        /* various sanity check for xattr name */
-        xattr_name = req_capsule_client_get(pill, &RMF_NAME);
-        if (!xattr_name)
-                GOTO(out, rc = err_serious(-EFAULT));
-
-        if (strncmp(xattr_name, trust_string, sizeof(trust_string) - 1) == 0) {
-                if (strcmp(xattr_name + 8, XATTR_NAME_LOV) == 0)
-                        GOTO(out, rc = -EACCES);
-        }
-
-        if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) &&
-            (strncmp(xattr_name, user_string, sizeof(user_string) - 1) == 0)) {
-                GOTO(out, rc = -EOPNOTSUPP);
-        }
-
-        lockpart = MDS_INODELOCK_UPDATE;
+       if (strncmp(xattr_name, XATTR_USER_PREFIX,
+                   sizeof(XATTR_USER_PREFIX) - 1) == 0) {
+               if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR))
+                       GOTO(out, rc = -EOPNOTSUPP);
+       } else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
+                   sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
+
+               /* setxattr(LMV) with lum is used to shrink dir layout */
+               if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) {
+                       __u32 *magic = rr->rr_eadata;
+
+                       /* we don't let to remove LMV? */
+                       if (!rr->rr_eadata)
+                               GOTO(out, rc = 0);
+
+                       if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
+                           le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
+                               rc = mdt_dir_layout_shrink(info);
+                               GOTO(out, rc);
+                       }
+               }
+
+               if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
+                       GOTO(out, rc = -EPERM);
+
+               if (strcmp(xattr_name, XATTR_NAME_LOV) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_LMA) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_LMV) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_LINK) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_FID) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_VERSION) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_SOM) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_HSM) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_LFSCK_NAMESPACE) == 0)
+                       GOTO(out, rc = 0);
+       } else if ((valid & OBD_MD_FLXATTR) &&
+                  (strcmp(xattr_name, XATTR_NAME_ACL_ACCESS) == 0 ||
+                   strcmp(xattr_name, XATTR_NAME_ACL_DEFAULT) == 0)) {
+               rc = mdt_nodemap_map_acl(info, rr->rr_eadata, xattr_len,
+                                        xattr_name, NODEMAP_CLIENT_TO_FS);
+               if (rc < 0)
+                       GOTO(out, rc);
+               /* ACLs were mapped out, return an error so the user knows */
+               if (rc != xattr_len)
+                       GOTO(out, rc = -EPERM);
+       } else if ((strlen(xattr_name) > strlen(XATTR_LUSTRE_LOV) + 1) &&
+                  strncmp(xattr_name, XATTR_LUSTRE_LOV,
+                          strlen(XATTR_LUSTRE_LOV)) == 0) {
+
+               if (strncmp(xattr_name, XATTR_LUSTRE_LOV".add",
+                           strlen(XATTR_LUSTRE_LOV".add")) &&
+                   strncmp(xattr_name, XATTR_LUSTRE_LOV".set",
+                           strlen(XATTR_LUSTRE_LOV".set")) &&
+                   strncmp(xattr_name, XATTR_LUSTRE_LOV".del",
+                           strlen(XATTR_LUSTRE_LOV".del"))) {
+                       CERROR("%s: invalid xattr name: %s\n",
+                              mdt_obd_name(info->mti_mdt), xattr_name);
+                       GOTO(out, rc = -EINVAL);
+               }
+
+               lockpart |= MDS_INODELOCK_LAYOUT;
+       }
+
+        /* Revoke all clients' lookup lock, since the access
+         * permissions for this inode is changed when ACL_ACCESS is
+         * set. This isn't needed for ACL_DEFAULT, since that does
+         * not change the access permissions of this inode, nor any
+         * other existing inodes. It is setting the ACLs inherited
+         * by new directories/files at create time. */
+       /* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
         if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
-                lockpart |= MDS_INODELOCK_LOOKUP;
+               lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
+       /* We need to take the lock on behalf of old clients so that newer
+        * clients flush their xattr caches */
+       else
+               lockpart |= MDS_INODELOCK_XATTR;
 
         lh = &info->mti_lh[MDT_LH_PARENT];
-        mdt_lock_reg_init(lh, LCK_PW);
+        /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
+         * to cancel them. */
+        mdt_lock_reg_init(lh, LCK_EX);
         obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
-        if (IS_ERR(obj))
-                GOTO(out, rc =  PTR_ERR(obj));
-
-        if (unlikely(!(valid & OBD_MD_FLCTIME))) {
-                CWARN("client miss to set OBD_MD_FLCTIME when "
-                      "setxattr: [object "DFID"] [valid %llu]\n",
-                      PFID(rr->rr_fid1), valid);
-                attr->la_ctime = cfs_time_current_sec();
-        }
-        attr->la_valid = LA_CTIME;
-        child = mdt_object_child(obj);
-        if (valid & OBD_MD_FLXATTR) {
-                char * xattr;
-
-                if (!req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
-                        CDEBUG(D_INFO, "no xattr data supplied\n");
-                        GOTO(out_unlock, rc = -EFAULT);
+       if (IS_ERR(obj))
+               GOTO(out, rc = PTR_ERR(obj));
+
+       tgt_vbr_obj_set(env, mdt_obj2dt(obj));
+       rc = mdt_version_get_check_save(info, obj, 0);
+       if (rc)
+               GOTO(out_unlock, rc);
+
+       if (unlikely(!(valid & OBD_MD_FLCTIME))) {
+               /* This isn't strictly an error, but all current clients
+                * should set OBD_MD_FLCTIME when setting attributes. */
+               CWARN("%s: client miss to set OBD_MD_FLCTIME when "
+                     "setxattr %s: [object "DFID"] [valid %llu]\n",
+                     mdt_obd_name(info->mti_mdt), xattr_name,
+                     PFID(rr->rr_fid1), valid);
+               attr->la_ctime = ktime_get_real_seconds();
+       }
+       attr->la_valid = LA_CTIME;
+       child = mdt_object_child(obj);
+       if (valid & OBD_MD_FLXATTR) {
+               int     flags = 0;
+
+               if (attr->la_flags & XATTR_REPLACE)
+                       flags |= LU_XATTR_REPLACE;
+
+               if (attr->la_flags & XATTR_CREATE)
+                       flags |= LU_XATTR_CREATE;
+
+               mdt_fail_write(env, info->mti_mdt->mdt_bottom,
+                              OBD_FAIL_MDS_SETXATTR_WRITE);
+
+               buf->lb_buf = rr->rr_eadata;
+               buf->lb_len = xattr_len;
+               rc = mo_xattr_set(env, child, buf, xattr_name, flags);
+               /* update ctime after xattr changed */
+               if (rc == 0) {
+                       ma->ma_attr_flags |= MDS_PERM_BYPASS;
+                       mo_attr_set(env, child, ma);
+               }
+        } else if (valid & OBD_MD_FLXATTRRM) {
+                rc = mo_xattr_del(env, child, xattr_name);
+                /* update ctime after xattr changed */
+                if (rc == 0) {
+                        ma->ma_attr_flags |= MDS_PERM_BYPASS;
+                        mo_attr_set(env, child, ma);
                 }
+       } else {
+               CDEBUG(D_INFO, "valid bits: %#llx\n", valid);
+               rc = -EINVAL;
+       }
 
-                xattr_len = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT);
-                if (xattr_len) {
-                        int flags = 0;
-
-                        xattr = req_capsule_client_get(pill, &RMF_EADATA);
-
-                        if (valid & OBD_MD_FLRMTLSETFACL) {
-                                LASSERT(med->med_rmtclient);
-                                xattr_len = mdt_rmtlsetfacl(info, child,
-                                                xattr_name,
-                                                (ext_acl_xattr_header *)xattr,
-                                                &new_xattr);
-                                if (xattr_len < 0)
-                                        GOTO(out_unlock, rc = xattr_len);
+       if (rc == 0)
+               mdt_counter_incr(req, LPROC_MDT_SETXATTR);
 
-                                xattr = (char *)new_xattr;
-                        }
-
-                        if (attr->la_flags & XATTR_REPLACE)
-                                flags |= LU_XATTR_REPLACE;
-
-                        if (attr->la_flags & XATTR_CREATE)
-                                flags |= LU_XATTR_CREATE;
-
-                        mdt_fail_write(env, info->mti_mdt->mdt_bottom,
-                                       OBD_FAIL_MDS_SETXATTR_WRITE);
-
-                        buf->lb_buf = xattr;
-                        buf->lb_len = xattr_len;
-                        rc = mo_xattr_set(env, child, buf, xattr_name, flags, attr);
-                }
-        } else if (valid & OBD_MD_FLXATTRRM) {
-                rc = mo_xattr_del(env, child, xattr_name, attr);
-        } else {
-                CDEBUG(D_INFO, "valid bits: "LPX64"\n", valid);
-                rc = -EINVAL;
-        }
         EXIT;
 out_unlock:
         mdt_object_unlock_put(info, obj, lh, rc);
-        if (unlikely(new_xattr != NULL))
-                lustre_posix_acl_xattr_free(new_xattr, xattr_len);
 out:
-        mdt_exit_ucred(info);
-        return rc;
+       mdt_exit_ucred(info);
+       return rc;
 }