*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/mdt/mdt_xattr.c
*
#include <obd_class.h>
#include <lustre_nodemap.h>
#include <lustre_acl.h>
+#include <lustre_lmv.h>
#include "mdt_internal.h"
/* return EADATA length to the caller. negative value means error */
-static int mdt_getxattr_pack_reply(struct mdt_thread_info * info)
+static int mdt_getxattr_pack_reply(struct mdt_thread_info *info)
{
struct req_capsule *pill = info->mti_pill;
struct ptlrpc_request *req = mdt_info_req(info);
valid = info->mti_body->mbo_valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS);
/* Determine how many bytes we need */
- if (valid == OBD_MD_FLXATTR) {
+ if (valid == OBD_MD_FLXATTR) {
xattr_name = req_capsule_client_get(pill, &RMF_NAME);
if (!xattr_name)
RETURN(-EFAULT);
&LU_BUF_NULL, xattr_name);
if (size == -ENODATA) {
/* XXX: Some client code will not handle -ENODATA
- * for XATTR_NAME_LOV (trusted.lov) properly. */
+ * for XATTR_NAME_LOV (trusted.lov) properly.
+ */
if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
rc = 0;
else
xattr_name = "all";
/* N.B. eadatasize = 0 is not valid for FLXATTRALL */
/* We could calculate accurate sizes, but this would
- * introduce a lot of overhead, let's do it later... */
+ * introduce a lot of overhead, let's do it later...
+ */
size = info->mti_body->mbo_eadatasize;
if (size <= 0 || size > info->mti_mdt->mdt_max_ea_size ||
size & (sizeof(__u32) - 1)) {
int mdt_getxattr(struct mdt_thread_info *info)
{
struct ptlrpc_request *req = mdt_info_req(info);
- struct mdt_body *reqbody;
- struct mdt_body *repbody = NULL;
- struct md_object *next;
- struct lu_buf *buf;
- int easize, rc;
+ struct mdt_body *reqbody;
+ struct mdt_body *repbody = NULL;
+ struct md_object *next;
+ struct lu_buf *buf;
+ int easize, rc;
u64 valid;
- ENTRY;
+ ktime_t kstart = ktime_get();
+ ENTRY;
- LASSERT(info->mti_object != NULL);
+ LASSERT(info->mti_object != NULL);
LASSERT(lu_object_assert_exists(&info->mti_object->mot_obj));
CDEBUG(D_INODE, "getxattr "DFID"\n", PFID(&info->mti_body->mbo_fid1));
if (rc)
RETURN(err_serious(rc));
- reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
- if (reqbody == NULL)
- RETURN(err_serious(-EFAULT));
+ reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+ if (reqbody == NULL)
+ RETURN(err_serious(-EFAULT));
rc = mdt_init_ucred(info, reqbody);
- if (rc)
- RETURN(err_serious(rc));
+ if (rc)
+ RETURN(err_serious(rc));
- next = mdt_object_child(info->mti_object);
- easize = mdt_getxattr_pack_reply(info);
+ next = mdt_object_child(info->mti_object);
+ easize = mdt_getxattr_pack_reply(info);
if (easize == -ENODATA)
GOTO(out, rc = easize);
else if (easize < 0)
GOTO(out, rc = err_serious(easize));
- repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
- LASSERT(repbody != NULL);
+ repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+ LASSERT(repbody != NULL);
- /* No need further getxattr. */
+ /* No need further getxattr. */
if (easize == 0 || reqbody->mbo_eadatasize == 0)
GOTO(out, rc = easize);
EXIT;
out:
if (rc >= 0) {
- mdt_counter_incr(req, LPROC_MDT_GETXATTR);
+ mdt_counter_incr(req, LPROC_MDT_GETXATTR,
+ ktime_us_delta(ktime_get(), kstart));
/* LU-11109: Set OBD_MD_FLXATTR on success so that
* newer clients can distinguish between nonexistent
- * xattrs and zero length values. */
+ * xattrs and zero length values.
+ */
repbody->mbo_valid |= OBD_MD_FLXATTR;
repbody->mbo_eadatasize = rc;
rc = 0;
return rc;
}
-/* shrink dir layout after migration */
-static int mdt_dir_layout_shrink(struct mdt_thread_info *info)
+/* update dir layout after migration/restripe */
+int mdt_dir_layout_update(struct mdt_thread_info *info)
{
const struct lu_env *env = info->mti_env;
struct mdt_device *mdt = info->mti_mdt;
struct mdt_reint_record *rr = &info->mti_rr;
struct lmv_user_md *lmu = rr->rr_eadata;
__u32 lum_stripe_count = lmu->lum_stripe_count;
- struct lu_buf *buf = &info->mti_buf;
+ struct md_layout_change *mlc = &info->mti_mlc;
struct lmv_mds_md_v1 *lmv;
struct md_attr *ma = &info->mti_attr;
struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
struct mdt_object *obj;
struct mdt_lock_handle *lhp = NULL;
struct mdt_lock_handle *lhc;
+ bool shrink = false;
int rc;
ENTRY;
if (!mdt->mdt_enable_dir_migration)
RETURN(-EPERM);
- if (!md_capable(uc, CFS_CAP_SYS_ADMIN) &&
+ if (!md_capable(uc, CAP_SYS_ADMIN) &&
uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
mdt->mdt_enable_remote_dir_gid != -1)
RETURN(-EPERM);
- /* mti_big_lmm is used to save LMV, but it may be uninitialized. */
- if (unlikely(!info->mti_big_lmm)) {
- info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC);
- OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize);
- if (!info->mti_big_lmm)
- RETURN(-ENOMEM);
- }
-
obj = mdt_object_find(env, mdt, rr->rr_fid1);
if (IS_ERR(obj))
RETURN(PTR_ERR(obj));
/*
* lock parent if dir will be shrunk to 1 stripe, because dir will be
- * converted to normal directory, as will change dir fid and update
+ * converted to normal directory, as will change dir FID and update
* namespace of parent.
*/
lhp = &info->mti_lh[MDT_LH_PARENT];
if (rc)
GOTO(unlock_pobj, rc);
- ma->ma_lmv = info->mti_big_lmm;
- ma->ma_lmv_size = info->mti_big_lmmsize;
- ma->ma_valid = 0;
rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
if (rc)
GOTO(unlock_obj, rc);
GOTO(unlock_obj, rc = -EALREADY);
lmv = &ma->ma_lmv->lmv_md_v1;
+ if (!lmv_is_sane(lmv))
+ GOTO(unlock_obj, rc = -EBADF);
/* ditto */
- if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION))
+ if (!lmv_is_layout_changing(lmv))
GOTO(unlock_obj, rc = -EALREADY);
lum_stripe_count = lmu->lum_stripe_count;
if (!lum_stripe_count)
lum_stripe_count = cpu_to_le32(1);
- if (lmv->lmv_migrate_offset != lum_stripe_count) {
- CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
- mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
- lmv->lmv_migrate_offset, lmu->lum_stripe_count);
- GOTO(unlock_obj, rc = -EINVAL);
- }
+ if (lmv_is_migrating(lmv)) {
+ if (lmv->lmv_migrate_offset != lum_stripe_count) {
+ CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_migrate_offset, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
- if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) {
- CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
- mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
- lmv->lmv_master_mdt_index, lmu->lum_stripe_offset);
- GOTO(unlock_obj, rc = -EINVAL);
- }
+ if (lmu->lum_stripe_offset != lmv->lmv_master_mdt_index) {
+ CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_master_mdt_index,
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lum_stripe_count > 1 && lmu->lum_hash_type &&
+ lmu->lum_hash_type !=
+ (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_hash_type, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ shrink = true;
+ } else if (lmv_is_splitting(lmv)) {
+ if (lmv->lmv_stripe_count != lum_stripe_count) {
+ CERROR("%s: "DFID" stripe count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_stripe_count, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_stripe_offset != LMV_OFFSET_DEFAULT) {
+ CERROR("%s: "DFID" dir split offset %u != -1\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_hash_type &&
+ lmu->lum_hash_type !=
+ (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" split hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_hash_type, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+ } else if (lmv_is_merging(lmv)) {
+ if (lmv->lmv_merge_offset != lum_stripe_count) {
+ CERROR("%s: "DFID" stripe count mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_merge_offset, lmu->lum_stripe_count);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
- if (lum_stripe_count > 1 &&
- (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
- lmu->lum_hash_type) {
- CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
- mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
- lmv->lmv_hash_type, lmu->lum_hash_type);
- GOTO(unlock_obj, rc = -EINVAL);
+ if (lmu->lum_stripe_offset != LMV_OFFSET_DEFAULT) {
+ CERROR("%s: "DFID" dir merge offset %u != -1\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmu->lum_stripe_offset);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lmu->lum_hash_type &&
+ (lmu->lum_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
+ (lmv->lmv_merge_hash & cpu_to_le32(LMV_HASH_TYPE_MASK))) {
+ CERROR("%s: "DFID" merge hash mismatch %u != %u\n",
+ mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+ lmv->lmv_merge_hash, lmu->lum_hash_type);
+ GOTO(unlock_obj, rc = -EINVAL);
+ }
+
+ if (lum_stripe_count < lmv->lmv_stripe_count)
+ shrink = true;
}
- buf->lb_buf = rr->rr_eadata;
- buf->lb_len = rr->rr_eadatalen;
- rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0);
+ if (shrink) {
+ mlc->mlc_opc = MD_LAYOUT_SHRINK;
+ mlc->mlc_buf.lb_buf = rr->rr_eadata;
+ mlc->mlc_buf.lb_len = rr->rr_eadatalen;
+ rc = mo_layout_change(env, mdt_object_child(obj), mlc);
+ } else {
+ struct lu_buf *buf = &info->mti_buf;
+ u32 version = le32_to_cpu(lmv->lmv_layout_version);
+
+ lmv->lmv_hash_type &= ~LMV_HASH_FLAG_LAYOUT_CHANGE;
+ lmv->lmv_layout_version = cpu_to_le32(++version);
+ lmv->lmv_migrate_offset = 0;
+ lmv->lmv_migrate_hash = 0;
+ buf->lb_buf = lmv;
+ buf->lb_len = sizeof(*lmv);
+ rc = mo_xattr_set(env, mdt_object_child(obj), buf,
+ XATTR_NAME_LMV, LU_XATTR_REPLACE);
+ }
GOTO(unlock_obj, rc);
unlock_obj:
}
int mdt_reint_setxattr(struct mdt_thread_info *info,
- struct mdt_lock_handle *unused)
+ struct mdt_lock_handle *unused)
{
struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_lock_handle *lh;
const char *xattr_name = rr->rr_name.ln_name;
int xattr_len = rr->rr_eadatalen;
__u64 lockpart = MDS_INODELOCK_UPDATE;
+ ktime_t kstart = ktime_get();
int rc;
ENTRY;
if (info->mti_dlm_req)
ldlm_request_cancel(req, info->mti_dlm_req, 0, LATF_SKIP);
- if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
- RETURN(err_serious(-ENOMEM));
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
+ RETURN(err_serious(-ENOMEM));
rc = mdt_init_ucred_reint(info);
- if (rc != 0)
- RETURN(rc);
+ if (rc != 0)
+ RETURN(rc);
if (strncmp(xattr_name, XATTR_USER_PREFIX,
sizeof(XATTR_USER_PREFIX) - 1) == 0) {
if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
- rc = mdt_dir_layout_shrink(info);
+ rc = mdt_dir_layout_update(info);
GOTO(out, rc);
}
}
- if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
+ if (!md_capable(mdt_ucred(info), CAP_SYS_ADMIN))
GOTO(out, rc = -EPERM);
if (strcmp(xattr_name, XATTR_NAME_LOV) == 0 ||
lockpart |= MDS_INODELOCK_LAYOUT;
}
- /* Revoke all clients' lookup lock, since the access
- * permissions for this inode is changed when ACL_ACCESS is
- * set. This isn't needed for ACL_DEFAULT, since that does
- * not change the access permissions of this inode, nor any
- * other existing inodes. It is setting the ACLs inherited
- * by new directories/files at create time. */
+ /* Revoke all clients' lookup lock, since the access
+ * permissions for this inode is changed when ACL_ACCESS is
+ * set. This isn't needed for ACL_DEFAULT, since that does
+ * not change the access permissions of this inode, nor any
+ * other existing inodes. It is setting the ACLs inherited
+ * by new directories/files at create time.
+ */
/* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
- if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
+ if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
/* We need to take the lock on behalf of old clients so that newer
- * clients flush their xattr caches */
+ * clients flush their xattr caches
+ */
else
lockpart |= MDS_INODELOCK_XATTR;
- lh = &info->mti_lh[MDT_LH_PARENT];
- /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
- * to cancel them. */
- mdt_lock_reg_init(lh, LCK_EX);
- obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
+ lh = &info->mti_lh[MDT_LH_PARENT];
+ /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
+ * to cancel them.
+ */
+ mdt_lock_reg_init(lh, LCK_EX);
+ obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
if (IS_ERR(obj))
GOTO(out, rc = PTR_ERR(obj));
if (unlikely(!(valid & OBD_MD_FLCTIME))) {
/* This isn't strictly an error, but all current clients
- * should set OBD_MD_FLCTIME when setting attributes. */
- CWARN("%s: client miss to set OBD_MD_FLCTIME when "
- "setxattr %s: [object "DFID"] [valid %llu]\n",
+ * should set OBD_MD_FLCTIME when setting attributes.
+ */
+ CWARN("%s: client miss to set OBD_MD_FLCTIME when setxattr %s: [object "DFID"] [valid %llu]\n",
mdt_obd_name(info->mti_mdt), xattr_name,
PFID(rr->rr_fid1), valid);
attr->la_ctime = ktime_get_real_seconds();
ma->ma_attr_flags |= MDS_PERM_BYPASS;
mo_attr_set(env, child, ma);
}
- } else if (valid & OBD_MD_FLXATTRRM) {
- rc = mo_xattr_del(env, child, xattr_name);
- /* update ctime after xattr changed */
- if (rc == 0) {
- ma->ma_attr_flags |= MDS_PERM_BYPASS;
- mo_attr_set(env, child, ma);
- }
+ } else if (valid & OBD_MD_FLXATTRRM) {
+ rc = mo_xattr_del(env, child, xattr_name);
+ /* update ctime after xattr changed */
+ if (rc == 0) {
+ ma->ma_attr_flags |= MDS_PERM_BYPASS;
+ mo_attr_set(env, child, ma);
+ }
} else {
CDEBUG(D_INFO, "valid bits: %#llx\n", valid);
rc = -EINVAL;
}
if (rc == 0)
- mdt_counter_incr(req, LPROC_MDT_SETXATTR);
+ mdt_counter_incr(req, LPROC_MDT_SETXATTR,
+ ktime_us_delta(ktime_get(), kstart));
- EXIT;
+ EXIT;
out_unlock:
- mdt_object_unlock_put(info, obj, lh, rc);
+ mdt_object_unlock_put(info, obj, lh, rc);
out:
mdt_exit_ucred(info);
return rc;