*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/mdt/mdt_xattr.c
*
#include <obd_class.h>
#include <lustre_nodemap.h>
#include <lustre_acl.h>
+#include <lustre_lmv.h>
#include "mdt_internal.h"
/* return EADATA length to the caller. negative value means error */
-static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
+static int mdt_getxattr_pack_reply(struct mdt_thread_info *info)
{
struct req_capsule *pill = info->mti_pill;
struct ptlrpc_request *req = mdt_info_req(info);
valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
/* Determine how many bytes we need */
- if (valid == OBD_MD_FLXATTR) {
+ if (valid == OBD_MD_FLXATTR) {
xattr_name = req_capsule_client_get(pill, &RMF_NAME);
if (!xattr_name)
RETURN(-EFAULT);
&LU_BUF_NULL, xattr_name);
if (size == -ENODATA) {
/* XXX: Some client code will not handle -ENODATA
- * for XATTR_NAME_LOV (trusted.lov) properly. */
+ * for XATTR_NAME_LOV (trusted.lov) properly.
+ */
if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
rc = 0;
else
xattr_name = "all";
/* N.B. eadatasize = 0 is not valid for FLXATTRALL */
/* We could calculate accurate sizes, but this would
- * introduce a lot of overhead, let's do it later... */
+ * introduce a lot of overhead, let's do it later...
+ */
size = info->mti_body->mbo_eadatasize;
+ if (size <= 0 || size > info->mti_mdt->mdt_max_ea_size ||
+ size & (sizeof(__u32) - 1)) {
+ DEBUG_REQ(D_ERROR, req,
+ "%s: invalid EA size(%d) for FLXATTRALL\n",
+ mdt_obd_name(info->mti_mdt), size);
+ RETURN(-EINVAL);
+ }
req_capsule_set_size(pill, &RMF_EAVALS, RCL_SERVER, size);
req_capsule_set_size(pill, &RMF_EAVALS_LENS, RCL_SERVER, size);
} else {
int mdt_getxattr(struct mdt_thread_info *info)
{
struct ptlrpc_request *req = mdt_info_req(info);
- struct mdt_body *reqbody;
- struct mdt_body *repbody = NULL;
- struct md_object *next;
- struct lu_buf *buf;
- int easize, rc;
+ struct mdt_body *reqbody;
+ struct mdt_body *repbody = NULL;
+ struct md_object *next;
+ struct lu_buf *buf;
+ int easize, rc;
u64 valid;
- ENTRY;
+ ktime_t kstart = ktime_get();
+ ENTRY;
- LASSERT(info->mti_object != NULL);
+ LASSERT(info->mti_object != NULL);
LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj));
CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->mbo_fid1));
- reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
- if (reqbody == NULL)
- RETURN(err_serious(-EFAULT));
+ rc = req_check_sepol(info->mti_pill);
+ if (rc)
+ RETURN(err_serious(rc));
+
+ reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+ if (reqbody == NULL)
+ RETURN(err_serious(-EFAULT));
rc = mdt_init_ucred(info, reqbody);
- if (rc)
- RETURN(err_serious(rc));
+ if (rc)
+ RETURN(err_serious(rc));
- next = mdt_object_child(info->mti_object);
- easize = mdt_getxattr_pack_reply(info);
+ next = mdt_object_child(info->mti_object);
+ easize = mdt_getxattr_pack_reply(info);
if (easize == -ENODATA)
GOTO(out, rc = easize);
else if (easize < 0)
GOTO(out, rc = err_serious(easize));
- repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
- LASSERT(repbody != NULL);
+ repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+ LASSERT(repbody != NULL);
- /* No need further getxattr. */
+ /* No need further getxattr. */
if (easize == 0 || reqbody->mbo_eadatasize == 0)
GOTO(out, rc = easize);
EXIT;
out:
if (rc >= 0) {
- mdt_counter_incr(req, LPROC_MDT_GETXATTR);
+ mdt_counter_incr(req, LPROC_MDT_GETXATTR,
+ ktime_us_delta(ktime_get(), kstart));
/* LU-11109: Set OBD_MD_FLXATTR on success so that
* newer clients can distinguish between nonexistent
- * xattrs and zero length values. */
+ * xattrs and zero length values.
+ */
repbody->mbo_valid |= OBD_MD_FLXATTR;
repbody->mbo_eadatasize = rc;
rc = 0;
return rc;
}
+/* update dir layout after migration/restripe */
+int mdt_dir_layout_update(struct mdt_thread_info *info)
+{
+ const struct lu_env *env = info->mti_env;
+ struct mdt_device *mdt = info->mti_mdt;
+ struct lu_ucred *uc = mdt_ucred(info);
+ struct mdt_reint_record *rr = &info->mti_rr;
+ struct lmv_user_md *lmu = rr->rr_eadata;
+ __u32 lum_stripe_count = lmu->lum_stripe_count;
+ struct md_layout_change *mlc = &info->mti_mlc;
+ struct lmv_mds_md_v1 *lmv;
+ struct md_attr *ma = &info->mti_attr;
+ struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
+ struct mdt_object *pobj = NULL;
+ struct mdt_object *obj;
+ struct mdt_lock_handle *lhp = NULL;
+ struct mdt_lock_handle *lhc;
+ bool shrink = false;
+ int rc;
+
+ ENTRY;
+
+ if (!mdt->mdt_enable_dir_migration)
+ RETURN(-EPERM);
+
+ if (!md_capable(uc, CAP_SYS_ADMIN) &&
+ uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
+ mdt->mdt_enable_remote_dir_gid != -1)
+ RETURN(-EPERM);
+
+ obj = mdt_object_find(env, mdt, rr->rr_fid1);
+ if (IS_ERR(obj))
+ RETURN(PTR_ERR(obj));
+
+ /* get parent from PFID */
+ rc = mdt_attr_get_pfid(info, obj, &ma->ma_pfid);
+ if (rc)
+ GOTO(put_obj, rc);
+
+ pobj = mdt_object_find(env, mdt, &ma->ma_pfid);
+ if (IS_ERR(pobj))
+ GOTO(put_obj, rc = PTR_ERR(pobj));
+
+ /* revoke object remote LOOKUP lock */
+ if (mdt_object_remote(pobj)) {
+ rc = mdt_revoke_remote_lookup_lock(info, pobj, obj);
+ if (rc)
+ GOTO(put_pobj, rc);
+ }
+
+ /*
+ * lock parent if dir will be shrunk to 1 stripe, because dir will be
+ * converted to normal directory, as will change dir FID and update
+ * namespace of parent.
+ */
+ lhp = &info->mti_lh[MDT_LH_PARENT];
+ mdt_lock_reg_init(lhp, LCK_PW);
+
+ if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
+ rc = mdt_reint_object_lock(info, pobj, lhp,
+ MDS_INODELOCK_UPDATE, true);
+ if (rc)
+ GOTO(put_pobj, rc);
+ }
+
+ /* lock object */
+ lhc = &info->mti_lh[MDT_LH_CHILD];
+ mdt_lock_reg_init(lhc, LCK_EX);
+ rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo,
+ true);
+ if (rc)
+ GOTO(unlock_pobj, rc);
+
+ rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
+ if (rc)
+ GOTO(unlock_obj, rc);
+
+ /* user may run 'lfs migrate' multiple times, so it's shrunk already */
+ if (!(ma->ma_valid & MA_LMV))
+ GOTO(unlock_obj, rc = -EALREADY);
+
+ lmv = &ma->ma_lmv->lmv_md_v1;
+ if (!lmv_is_sane(lmv))
+ GOTO(unlock_obj, rc = -EBADF);
+
+ /* ditto */
+ if (!lmv_is_layout_changing(lmv))
+ GOTO(unlock_obj, rc = -EALREADY);
+
+ lum_stripe_count = lmu->lum_stripe_count;
+ if (!lum_stripe_count)
+ lum_stripe_count = cpu_to_le32(1);
+
+ if (lmv_is_migrating(lmv)) {
+ if (lmv->lmv_migrate_offset != lum_stripe_count) {
+ CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_migrate_offset, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_stripe_offset != lmv->lmv_master_mdt_index) {
+ CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_master_mdt_index,
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lum_stripe_count > 1 && lmu->lum_hash_type &&
+ lmu->lum_hash_type !=
+ (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_hash_type, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ shrink = true;
+ } else if (lmv_is_splitting(lmv)) {
+ if (lmv->lmv_stripe_count != lum_stripe_count) {
+ CERROR("%s: "DFID" stripe count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_stripe_count, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_stripe_offset != LMV_OFFSET_DEFAULT) {
+ CERROR("%s: "DFID" dir split offset %u != -1\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_hash_type &&
+ lmu->lum_hash_type !=
+ (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" split hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_hash_type, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+ } else if (lmv_is_merging(lmv)) {
+ if (lmv->lmv_merge_offset != lum_stripe_count) {
+ CERROR("%s: "DFID" stripe count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_merge_offset, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_stripe_offset != LMV_OFFSET_DEFAULT) {
+ CERROR("%s: "DFID" dir merge offset %u != -1\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_hash_type &&
+ (lmu->lum_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
+ (lmv->lmv_merge_hash & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" merge hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_merge_hash, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lum_stripe_count < lmv->lmv_stripe_count)
+ shrink = true;
+ }
+
+ if (shrink) {
+ mlc->mlc_opc = MD_LAYOUT_SHRINK;
+ mlc->mlc_buf.lb_buf = rr->rr_eadata;
+ mlc->mlc_buf.lb_len = rr->rr_eadatalen;
+ rc = mo_layout_change(env, mdt_object_child(obj), mlc);
+ } else {
+ struct lu_buf *buf = &info->mti_buf;
+ u32 version = le32_to_cpu(lmv->lmv_layout_version);
+
+ lmv->lmv_hash_type &= ~LMV_HASH_FLAG_LAYOUT_CHANGE;
+ lmv->lmv_layout_version = cpu_to_le32(++version);
+ lmv->lmv_migrate_offset = 0;
+ lmv->lmv_migrate_hash = 0;
+ buf->lb_buf = lmv;
+ buf->lb_len = sizeof(*lmv);
+ rc = mo_xattr_set(env, mdt_object_child(obj), buf,
+ XATTR_NAME_LMV, LU_XATTR_REPLACE);
+ }
+ GOTO(unlock_obj, rc);
+
+unlock_obj:
+ mdt_reint_striped_unlock(info, obj, lhc, einfo, rc);
+unlock_pobj:
+ mdt_object_unlock(info, pobj, lhp, rc);
+put_pobj:
+ mdt_object_put(env, pobj);
+put_obj:
+ mdt_object_put(env, obj);
+
+ return rc;
+}
+
int mdt_reint_setxattr(struct mdt_thread_info *info,
- struct mdt_lock_handle *unused)
+ struct mdt_lock_handle *unused)
{
struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_lock_handle *lh;
const char *xattr_name = rr->rr_name.ln_name;
int xattr_len = rr->rr_eadatalen;
__u64 lockpart = MDS_INODELOCK_UPDATE;
+ ktime_t kstart = ktime_get();
int rc;
ENTRY;
if (info->mti_dlm_req)
ldlm_request_cancel(req, info->mti_dlm_req, 0, LATF_SKIP);
- if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
- RETURN(err_serious(-ENOMEM));
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
+ RETURN(err_serious(-ENOMEM));
rc = mdt_init_ucred_reint(info);
- if (rc != 0)
- RETURN(rc);
+ if (rc != 0)
+ RETURN(rc);
if (strncmp(xattr_name, XATTR_USER_PREFIX,
sizeof(XATTR_USER_PREFIX) - 1) == 0) {
} else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
- if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
+ /* setxattr(LMV) with lum is used to shrink dir layout */
+ if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) {
+ __u32 *magic = rr->rr_eadata;
+
+ /* we don't let to remove LMV? */
+ if (!rr->rr_eadata)
+ GOTO(out, rc = 0);
+
+ if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
+ le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
+ rc = mdt_dir_layout_update(info);
+ GOTO(out, rc);
+ }
+ }
+
+ if (!md_capable(mdt_ucred(info), CAP_SYS_ADMIN))
GOTO(out, rc = -EPERM);
if (strcmp(xattr_name, XATTR_NAME_LOV) == 0 ||
/* ACLs were mapped out, return an error so the user knows */
if (rc != xattr_len)
GOTO(out, rc = -EPERM);
- } else if ((strlen(xattr_name) > strlen(XATTR_LUSTRE_LOV) + 1) &&
+ } else if ((strlen(xattr_name) > sizeof(XATTR_LUSTRE_LOV)) &&
strncmp(xattr_name, XATTR_LUSTRE_LOV,
strlen(XATTR_LUSTRE_LOV)) == 0) {
- if (strncmp(xattr_name, XATTR_LUSTRE_LOV".add",
- strlen(XATTR_LUSTRE_LOV".add")) &&
- strncmp(xattr_name, XATTR_LUSTRE_LOV".set",
- strlen(XATTR_LUSTRE_LOV".set")) &&
- strncmp(xattr_name, XATTR_LUSTRE_LOV".del",
- strlen(XATTR_LUSTRE_LOV".del"))) {
+ if (!allowed_lustre_lov(xattr_name)) {
CERROR("%s: invalid xattr name: %s\n",
mdt_obd_name(info->mti_mdt), xattr_name);
GOTO(out, rc = -EINVAL);
lockpart |= MDS_INODELOCK_LAYOUT;
}
- /* Revoke all clients' lookup lock, since the access
- * permissions for this inode is changed when ACL_ACCESS is
- * set. This isn't needed for ACL_DEFAULT, since that does
- * not change the access permissions of this inode, nor any
- * other existing inodes. It is setting the ACLs inherited
- * by new directories/files at create time. */
+ /* Revoke all clients' lookup lock, since the access
+ * permissions for this inode is changed when ACL_ACCESS is
+ * set. This isn't needed for ACL_DEFAULT, since that does
+ * not change the access permissions of this inode, nor any
+ * other existing inodes. It is setting the ACLs inherited
+ * by new directories/files at create time.
+ */
/* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
- if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
+ if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
/* We need to take the lock on behalf of old clients so that newer
- * clients flush their xattr caches */
+ * clients flush their xattr caches
+ */
else
lockpart |= MDS_INODELOCK_XATTR;
- lh = &info->mti_lh[MDT_LH_PARENT];
- /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
- * to cancel them. */
- mdt_lock_reg_init(lh, LCK_EX);
- obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
+ lh = &info->mti_lh[MDT_LH_PARENT];
+ /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
+ * to cancel them.
+ */
+ mdt_lock_reg_init(lh, LCK_EX);
+ obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
if (IS_ERR(obj))
GOTO(out, rc = PTR_ERR(obj));
if (unlikely(!(valid & OBD_MD_FLCTIME))) {
/* This isn't strictly an error, but all current clients
- * should set OBD_MD_FLCTIME when setting attributes. */
- CWARN("%s: client miss to set OBD_MD_FLCTIME when "
- "setxattr %s: [object "DFID"] [valid %llu]\n",
+ * should set OBD_MD_FLCTIME when setting attributes.
+ */
+ CWARN("%s: client miss to set OBD_MD_FLCTIME when setxattr %s: [object "DFID"] [valid %llu]\n",
mdt_obd_name(info->mti_mdt), xattr_name,
PFID(rr->rr_fid1), valid);
attr->la_ctime = ktime_get_real_seconds();
ma->ma_attr_flags |= MDS_PERM_BYPASS;
mo_attr_set(env, child, ma);
}
- } else if (valid & OBD_MD_FLXATTRRM) {
- rc = mo_xattr_del(env, child, xattr_name);
- /* update ctime after xattr changed */
- if (rc == 0) {
- ma->ma_attr_flags |= MDS_PERM_BYPASS;
- mo_attr_set(env, child, ma);
- }
+ } else if (valid & OBD_MD_FLXATTRRM) {
+ rc = mo_xattr_del(env, child, xattr_name);
+ /* update ctime after xattr changed */
+ if (rc == 0) {
+ ma->ma_attr_flags |= MDS_PERM_BYPASS;
+ mo_attr_set(env, child, ma);
+ }
} else {
CDEBUG(D_INFO, "valid bits: %#llx\n", valid);
rc = -EINVAL;
}
if (rc == 0)
- mdt_counter_incr(req, LPROC_MDT_SETXATTR);
+ mdt_counter_incr(req, LPROC_MDT_SETXATTR,
+ ktime_us_delta(ktime_get(), kstart));
- EXIT;
+ EXIT;
out_unlock:
- mdt_object_unlock_put(info, obj, lh, rc);
+ mdt_object_unlock_put(info, obj, lh, rc);
out:
mdt_exit_ucred(info);
return rc;