Whamcloud - gitweb
LU-11642 mdt: revoke remote LOOKUP lock in dir layout shrink
[fs/lustre-release.git] / lustre / mdt / mdt_xattr.c
index 481883c..e5f70e3 100644 (file)
 /* return EADATA length to the caller. negative value means error */
 static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
 {
-        struct req_capsule     *pill = info->mti_pill ;
-        struct ptlrpc_request  *req = mdt_info_req(info);
-        char                   *xattr_name;
-        __u64                   valid;
-        static const char       user_string[] = "user.";
-        int                     size, rc;
-        ENTRY;
+       struct req_capsule *pill = info->mti_pill;
+       struct ptlrpc_request *req = mdt_info_req(info);
+       const char *xattr_name;
+       u64 valid;
+       static const char user_string[] = "user.";
+       int size;
+       int rc = 0;
+       int rc2;
+       ENTRY;
 
        valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
 
-        /* Determine how many bytes we need */
+       /* Determine how many bytes we need */
         if (valid == OBD_MD_FLXATTR) {
-                xattr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (!xattr_name)
-                        RETURN(-EFAULT);
+               xattr_name = req_capsule_client_get(pill, &RMF_NAME);
+               if (!xattr_name)
+                       RETURN(-EFAULT);
 
                if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR) &&
                    !strncmp(xattr_name, user_string, sizeof(user_string) - 1))
@@ -74,6 +76,16 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
                size = mo_xattr_get(info->mti_env,
                                    mdt_object_child(info->mti_object),
                                    &LU_BUF_NULL, xattr_name);
+               if (size == -ENODATA) {
+                       /* XXX: Some client code will not handle -ENODATA
+                        * for XATTR_NAME_LOV (trusted.lov) properly. */
+                       if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
+                               rc = 0;
+                       else
+                               rc = -ENODATA;
+
+                       size = 0;
+               }
        } else if (valid == OBD_MD_FLXATTRLS) {
                xattr_name = "list";
                size = mo_xattr_list(info->mti_env,
@@ -93,9 +105,7 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
                RETURN(-EINVAL);
        }
 
-       if (size == -ENODATA) {
-               size = 0;
-       } else if (size < 0) {
+       if (size < 0) {
                if (size != -EOPNOTSUPP && size != -ENOENT)
                        CERROR("%s: error geting EA size for '%s': rc = %d\n",
                               mdt_obd_name(info->mti_mdt), xattr_name, size);
@@ -106,18 +116,17 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
                req_capsule_set_size(pill, &RMF_ACL, RCL_SERVER,
                                     LUSTRE_POSIX_ACL_MAX_SIZE_OLD);
 
-        req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
+       req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER,
                             info->mti_body->mbo_eadatasize == 0 ? 0 : size);
-        rc = req_capsule_server_pack(pill);
-        if (rc) {
-                LASSERT(rc < 0);
-                RETURN(rc);
-        }
 
-        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
-                RETURN(-ENOMEM);
+       rc2 = req_capsule_server_pack(pill);
+       if (rc2 < 0)
+               RETURN(rc2);
 
-        RETURN(size);
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK))
+               RETURN(-ENOMEM);
+
+       RETURN(rc < 0 ? rc : size);
 }
 
 static int mdt_nodemap_map_acl(struct mdt_thread_info *info, void *buf,
@@ -242,8 +251,10 @@ int mdt_getxattr(struct mdt_thread_info *info)
 
         next = mdt_object_child(info->mti_object);
         easize = mdt_getxattr_pack_reply(info);
-        if (easize < 0)
-                GOTO(out, rc = err_serious(easize));
+       if (easize == -ENODATA)
+               GOTO(out, rc = easize);
+       else if (easize < 0)
+               GOTO(out, rc = err_serious(easize));
 
         repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
         LASSERT(repbody != NULL);
@@ -283,6 +294,10 @@ int mdt_getxattr(struct mdt_thread_info *info)
 out:
        if (rc >= 0) {
                mdt_counter_incr(req, LPROC_MDT_GETXATTR);
+               /* LU-11109: Set OBD_MD_FLXATTR on success so that
+                * newer clients can distinguish between nonexistent
+                * xattrs and zero length values. */
+               repbody->mbo_valid |= OBD_MD_FLXATTR;
                repbody->mbo_eadatasize = rc;
                rc = 0;
        }
@@ -290,6 +305,147 @@ out:
        return rc;
 }
 
+/* shrink dir layout after migration */
+static int mdt_dir_layout_shrink(struct mdt_thread_info *info)
+{
+       const struct lu_env *env = info->mti_env;
+       struct mdt_device *mdt = info->mti_mdt;
+       struct lu_ucred *uc = mdt_ucred(info);
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct lmv_user_md *lmu = rr->rr_eadata;
+       __u32 lum_stripe_count = lmu->lum_stripe_count;
+       struct lu_buf *buf = &info->mti_buf;
+       struct lmv_mds_md_v1 *lmv;
+       struct md_attr *ma = &info->mti_attr;
+       struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
+       struct mdt_object *pobj = NULL;
+       struct mdt_object *obj;
+       struct mdt_lock_handle *lhp = NULL;
+       struct mdt_lock_handle *lhc;
+       int rc;
+
+       ENTRY;
+
+       if (!mdt->mdt_enable_dir_migration)
+               RETURN(-EPERM);
+
+       if (!md_capable(uc, CFS_CAP_SYS_ADMIN) &&
+           uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
+           mdt->mdt_enable_remote_dir_gid != -1)
+               RETURN(-EPERM);
+
+       /* mti_big_lmm is used to save LMV, but it may be uninitialized. */
+       if (unlikely(!info->mti_big_lmm)) {
+               info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC);
+               OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize);
+               if (!info->mti_big_lmm)
+                       RETURN(-ENOMEM);
+       }
+
+       obj = mdt_object_find(env, mdt, rr->rr_fid1);
+       if (IS_ERR(obj))
+               RETURN(PTR_ERR(obj));
+
+       /* get parent from PFID */
+       rc = mdt_attr_get_pfid(info, obj, &ma->ma_pfid);
+       if (rc)
+               GOTO(put_obj, rc);
+
+       pobj = mdt_object_find(env, mdt, &ma->ma_pfid);
+       if (IS_ERR(pobj))
+               GOTO(put_obj, rc = PTR_ERR(pobj));
+
+       /* revoke object remote LOOKUP lock */
+       if (mdt_object_remote(pobj)) {
+               rc = mdt_revoke_remote_lookup_lock(info, pobj, obj);
+               if (rc)
+                       GOTO(put_pobj, rc);
+       }
+
+       /*
+        * lock parent if dir will be shrunk to 1 stripe, because dir will be
+        * converted to normal directory, as will change dir fid and update
+        * namespace of parent.
+        */
+       lhp = &info->mti_lh[MDT_LH_PARENT];
+       mdt_lock_reg_init(lhp, LCK_PW);
+
+       if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
+               rc = mdt_reint_object_lock(info, pobj, lhp,
+                                          MDS_INODELOCK_UPDATE, true);
+               if (rc)
+                       GOTO(put_pobj, rc);
+       }
+
+       /* lock object */
+       lhc = &info->mti_lh[MDT_LH_CHILD];
+       mdt_lock_reg_init(lhc, LCK_EX);
+       rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo,
+                                   true);
+       if (rc)
+               GOTO(unlock_pobj, rc);
+
+       ma->ma_lmv = info->mti_big_lmm;
+       ma->ma_lmv_size = info->mti_big_lmmsize;
+       ma->ma_valid = 0;
+       rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
+       if (rc)
+               GOTO(unlock_obj, rc);
+
+       /* user may run 'lfs migrate' multiple times, so it's shrunk already */
+       if (!(ma->ma_valid & MA_LMV))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lmv = &ma->ma_lmv->lmv_md_v1;
+
+       /* ditto */
+       if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lum_stripe_count = lmu->lum_stripe_count;
+       if (!lum_stripe_count)
+               lum_stripe_count = cpu_to_le32(1);
+
+       if (lmv->lmv_migrate_offset != lum_stripe_count) {
+               CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_migrate_offset, lmu->lum_stripe_count);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) {
+               CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_master_mdt_index, lmu->lum_stripe_offset);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lum_stripe_count > 1 &&
+           (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
+           lmu->lum_hash_type) {
+               CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_hash_type, lmu->lum_hash_type);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       buf->lb_buf = rr->rr_eadata;
+       buf->lb_len = rr->rr_eadatalen;
+       rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0);
+       GOTO(unlock_obj, rc);
+
+unlock_obj:
+       mdt_reint_striped_unlock(info, obj, lhc, einfo, rc);
+unlock_pobj:
+       mdt_object_unlock(info, pobj, lhp, rc);
+put_pobj:
+       mdt_object_put(env, pobj);
+put_obj:
+       mdt_object_put(env, obj);
+
+       return rc;
+}
+
 int mdt_reint_setxattr(struct mdt_thread_info *info,
                        struct mdt_lock_handle *unused)
 {
@@ -329,6 +485,21 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
        } else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
                    sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
 
+               /* setxattr(LMV) with lum is used to shrink dir layout */
+               if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) {
+                       __u32 *magic = rr->rr_eadata;
+
+                       /* we don't let to remove LMV? */
+                       if (!rr->rr_eadata)
+                               GOTO(out, rc = 0);
+
+                       if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
+                           le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
+                               rc = mdt_dir_layout_shrink(info);
+                               GOTO(out, rc);
+                       }
+               }
+
                if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
                        GOTO(out, rc = -EPERM);