#include <lustre_linkea.h>
-#define PFID_STRIPE_IDX_BITS 16
-#define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
-
int ldiskfs_pdo = 1;
module_param(ldiskfs_pdo, int, 0644);
MODULE_PARM_DESC(ldiskfs_pdo, "ldiskfs with parallel directory operations");
rc = -EOPNOTSUPP;
} else {
fid = &lma->lma_self_fid;
+ if (lma->lma_compat & LMAC_STRIPE_INFO &&
+ osd->od_is_ost)
+ obj->oo_pfid_in_lma = 1;
}
}
bool remote = false;
bool trusted = true;
bool updated = false;
+ bool checked = false;
ENTRY;
LINVRNT(osd_invariant(obj));
check_lma:
result = osd_check_lma(env, obj);
+ checked = true;
if (!result)
goto found;
goto iget;
found:
+ if (!checked) {
+ struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+ struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
+
+ result = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
+ if (!result) {
+ if (lma->lma_compat & LMAC_STRIPE_INFO &&
+ dev->od_is_ost)
+ obj->oo_pfid_in_lma = 1;
+ } else if (result != -ENODATA) {
+ GOTO(out, result);
+ }
+ }
+
obj->oo_compat_dot_created = 1;
obj->oo_compat_dotdot_created = 1;
struct osd_thread_info *info = osd_oti_get(env);
struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
+ LASSERT(!obj->oo_pfid_in_lma);
+
rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
&info->oti_ost_attrs);
if (rc)
return rc;
}
- rc = __osd_xattr_get(inode, dentry, name, buf->lb_buf, buf->lb_len);
- if (rc == -ENODATA && strcmp(name, XATTR_NAME_FID) == 0) {
+ if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma) {
struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
struct lustre_mdt_attrs *lma = &loa->loa_lma;
struct filter_fid *ff;
struct ost_layout *ol;
- if (!osd_dev(dt->do_lu.lo_dev)->od_is_ost)
- goto cache;
-
rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
if (rc)
return rc;
- if (!(lma->lma_compat & LMAC_STRIPE_INFO)) {
- rc = -ENODATA;
- goto cache;
- }
+ LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
rc = sizeof(*ff);
if (buf->lb_len == 0 || !buf->lb_buf)
ol->ol_comp_end = 0;
ol->ol_comp_id = 0;
}
+ } else {
+ rc = __osd_xattr_get(inode, dentry, name,
+ buf->lb_buf, buf->lb_len);
}
-cache:
if (cache_xattr) {
if (rc == -ENOENT || rc == -ENODATA)
osd_oxc_add(obj, name, NULL, 0);
return 0;
}
+static int osd_xattr_set_pfid(const struct lu_env *env, struct osd_object *obj,
+ const struct lu_buf *buf, int fl,
+ struct thandle *handle)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct dentry *dentry = &info->oti_obj_dentry;
+ struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+ struct lustre_mdt_attrs *lma = &loa->loa_lma;
+ struct inode *inode = obj->oo_inode;
+ struct filter_fid *ff = buf->lb_buf;
+ struct ost_layout *ol = &ff->ff_layout;
+ int flags = XATTR_REPLACE;
+ int rc;
+ ENTRY;
+
+ if (buf->lb_len != sizeof(*ff) && buf->lb_len != sizeof(struct lu_fid))
+ RETURN(-EINVAL);
+
+ rc = osd_get_lma(info, inode, dentry, loa);
+ if (rc == -ENODATA) {
+ /* Usually for upgarding from old device */
+ lustre_loa_init(loa, lu_object_fid(&obj->oo_dt.do_lu),
+ LMAC_FID_ON_OST, 0);
+ flags = XATTR_CREATE;
+ } else if (rc) {
+ RETURN(rc);
+ }
+
+ if (!rc && lma->lma_compat & LMAC_STRIPE_INFO) {
+ if ((fl & LU_XATTR_CREATE) && !(fl & LU_XATTR_REPLACE))
+ RETURN(-EEXIST);
+
+ if (LDISKFS_INODE_SIZE(inode->i_sb) > 256) {
+ /* Separate PFID EA from LMA */
+ lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
+ lustre_lma_swab(lma);
+ rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
+ sizeof(*lma), XATTR_REPLACE);
+ if (!rc) {
+ obj->oo_pfid_in_lma = 0;
+ rc = LU_XATTR_CREATE;
+ }
+
+ RETURN(rc);
+ }
+ } else {
+ if (LDISKFS_INODE_SIZE(inode->i_sb) > 256)
+ RETURN(fl);
+
+ /* Old client does not send stripe information,
+ * then store the PFID EA on disk separatedly. */
+ if (unlikely(buf->lb_len == sizeof(struct lu_fid) ||
+ ol->ol_stripe_size == 0))
+ RETURN(fl);
+
+ /* Remove old PFID EA entry firstly. */
+ ll_vfs_dq_init(inode);
+ rc = inode->i_op->removexattr(dentry, XATTR_NAME_FID);
+ if (rc == -ENODATA) {
+ if ((fl & LU_XATTR_REPLACE) && !(fl & LU_XATTR_CREATE))
+ RETURN(rc);
+ } else if (rc) {
+ RETURN(rc);
+ }
+ }
+
+ fid_le_to_cpu(&loa->loa_parent_fid, &ff->ff_parent);
+ if (likely(ol->ol_stripe_size != 0)) {
+ loa->loa_parent_fid.f_ver |= le32_to_cpu(ol->ol_stripe_count) <<
+ PFID_STRIPE_IDX_BITS;
+ loa->loa_stripe_size = le32_to_cpu(ol->ol_stripe_size);
+ lma->lma_compat |= LMAC_STRIPE_INFO;
+ if (ol->ol_comp_id != 0) {
+ loa->loa_comp_id = le32_to_cpu(ol->ol_comp_id);
+ loa->loa_comp_start = le64_to_cpu(ol->ol_comp_start);
+ loa->loa_comp_end = le64_to_cpu(ol->ol_comp_end);
+ lma->lma_compat |= LMAC_COMP_INFO;
+ }
+ }
+
+ lustre_loa_swab(loa, false);
+
+ /* Store the PFID EA inside LMA. */
+ rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, loa, sizeof(*loa),
+ flags);
+ if (!rc)
+ obj->oo_pfid_in_lma = 1;
+
+ RETURN(rc);
+}
+
/*
* Concurrency: @dt is write locked.
*/
struct thandle *handle)
{
struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
struct inode *inode = obj->oo_inode;
struct osd_thread_info *info = osd_oti_get(env);
- struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
- struct lustre_mdt_attrs *lma = &loa->loa_lma;
int fs_flags = 0;
int len;
int rc;
len = buf->lb_len;
osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
- if (fl & LU_XATTR_REPLACE)
- fs_flags |= XATTR_REPLACE;
-
- if (fl & LU_XATTR_CREATE)
- fs_flags |= XATTR_CREATE;
/* For the OST device with 256 bytes inode size by default,
* the PFID EA will be stored together with LMA EA to avoid
* performance trouble. Otherwise the PFID EA can be stored
* independently. LU-8998 */
- if (strcmp(name, XATTR_NAME_FID) == 0 &&
- LDISKFS_INODE_SIZE(inode->i_sb) <= 256) {
- struct dentry *dentry = &info->oti_obj_dentry;
- struct filter_fid *ff;
- struct ost_layout *ol;
- int fl;
-
- LASSERT(osd_dev(dt->do_lu.lo_dev)->od_is_ost);
-
- ff = buf->lb_buf;
- ol = &ff->ff_layout;
- /* Old client does not send stripe information, store
- * the PFID EA on disk directly. */
- if (buf->lb_len == sizeof(struct lu_fid) ||
- ol->ol_stripe_size == 0) {
- len = sizeof(struct lu_fid);
- goto set;
- }
-
- if (buf->lb_len != sizeof(*ff))
- RETURN(-EINVAL);
-
- rc = osd_get_lma(info, inode, dentry, loa);
- if (unlikely(rc == -ENODATA)) {
- /* Usually for upgarding from old device */
- lustre_loa_init(loa, lu_object_fid(&dt->do_lu),
- LMAC_FID_ON_OST, 0);
- fl = XATTR_CREATE;
- } else if (rc) {
- RETURN(rc);
- } else {
- fl = XATTR_REPLACE;
- }
-
- fid_le_to_cpu(&loa->loa_parent_fid, &ff->ff_parent);
- loa->loa_parent_fid.f_ver |= le32_to_cpu(ol->ol_stripe_count) <<
- PFID_STRIPE_IDX_BITS;
- loa->loa_stripe_size = le32_to_cpu(ol->ol_stripe_size);
- lma->lma_compat |= LMAC_STRIPE_INFO;
- if (ol->ol_comp_id != 0) {
- loa->loa_comp_id = le32_to_cpu(ol->ol_comp_id);
- loa->loa_comp_start = le64_to_cpu(ol->ol_comp_start);
- loa->loa_comp_end = le64_to_cpu(ol->ol_comp_end);
- lma->lma_compat |= LMAC_COMP_INFO;
- }
-
- lustre_loa_swab(loa, false);
-
- /* Remove old PFID EA entry firstly. */
- ll_vfs_dq_init(inode);
- rc = inode->i_op->removexattr(dentry, name);
- if (rc && rc != -ENODATA)
- RETURN(rc);
-
- /* Store the PFID EA inside the LMA EA. */
- rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, loa,
- sizeof(*loa), fl);
+ if (strcmp(name, XATTR_NAME_FID) == 0 && osd->od_is_ost &&
+ (LDISKFS_INODE_SIZE(inode->i_sb) <= 256 || obj->oo_pfid_in_lma)) {
+ LASSERT(buf->lb_buf);
- RETURN(rc);
+ fl = osd_xattr_set_pfid(env, obj, buf, fl, handle);
+ if (fl <= 0)
+ RETURN(fl);
} else if (strcmp(name, XATTR_NAME_LMV) == 0) {
+ struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+ struct lustre_mdt_attrs *lma = &loa->loa_lma;
+
rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
if (rc)
RETURN(rc);
RETURN(rc);
}
-set:
+ if (fl & LU_XATTR_REPLACE)
+ fs_flags |= XATTR_REPLACE;
+
+ if (fl & LU_XATTR_CREATE)
+ fs_flags |= XATTR_CREATE;
+
rc = __osd_xattr_set(info, inode, name, buf->lb_buf, len, fs_flags);
osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
- ll_vfs_dq_init(inode);
- dentry->d_inode = inode;
- dentry->d_sb = inode->i_sb;
- rc = inode->i_op->removexattr(dentry, name);
- if (rc == -ENODATA && strcmp(name, XATTR_NAME_FID) == 0) {
+ if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma) {
struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
- LASSERT(osd_dev(dt->do_lu.lo_dev)->od_is_ost);
-
rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
&info->oti_ost_attrs);
if (!rc) {
- if (!(lma->lma_compat & LMAC_STRIPE_INFO)) {
- rc = -ENODATA;
- goto out;
- }
+ LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
lustre_lma_swab(lma);
rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
sizeof(*lma), XATTR_REPLACE);
+ if (!rc)
+ obj->oo_pfid_in_lma = 0;
}
+ } else {
+ ll_vfs_dq_init(inode);
+ dentry->d_inode = inode;
+ dentry->d_sb = inode->i_sb;
+ rc = inode->i_op->removexattr(dentry, name);
}
-out:
osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
if (rc == 0 &&
#include <sys/txg.h>
#include <linux/posix_acl_xattr.h>
-
+#include <lustre_scrub.h>
int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
{
* \retval 0 on success
* \retval negative negated errno on failure
*/
-int __osd_xattr_get(const struct lu_env *env, struct osd_object *obj,
- struct lu_buf *buf, const char *name, int *sizep)
+int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
+ struct lu_buf *buf, const char *name, int *sizep)
{
int rc;
buf, name, sizep);
}
+static int osd_get_pfid_from_lma(const struct lu_env *env,
+ struct osd_object *obj,
+ struct lu_buf *buf, int *sizep)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_ost_attrs *loa =
+ (struct lustre_ost_attrs *)&info->oti_buf;
+ struct lustre_mdt_attrs *lma = &loa->loa_lma;
+ struct filter_fid *ff;
+ struct ost_layout *ol;
+ struct lu_buf tbuf = {
+ .lb_buf = loa,
+ .lb_len = sizeof(info->oti_buf),
+ };
+ int rc;
+ ENTRY;
+
+ CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
+ rc = osd_xattr_get_internal(env, obj, &tbuf,
+ XATTR_NAME_LMA, sizep);
+ if (rc)
+ RETURN(rc);
+
+ lustre_loa_swab(loa, true);
+ LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
+
+ *sizep = sizeof(*ff);
+ if (buf->lb_len == 0 || !buf->lb_buf)
+ RETURN(0);
+
+ if (buf->lb_len < *sizep)
+ RETURN(-ERANGE);
+
+ ff = buf->lb_buf;
+ ol = &ff->ff_layout;
+ ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
+ PFID_STRIPE_IDX_BITS);
+ ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
+ loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
+ fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
+ if (lma->lma_compat & LMAC_COMP_INFO) {
+ ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
+ ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
+ ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
+ } else {
+ ol->ol_comp_start = 0;
+ ol->ol_comp_end = 0;
+ ol->ol_comp_id = 0;
+ }
+
+ RETURN(0);
+}
+
int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
struct lu_buf *buf, const char *name)
{
RETURN(-EOPNOTSUPP);
down_read(&obj->oo_guard);
- rc = __osd_xattr_get(env, obj, buf, name, &size);
+ /* For the OST migrated from ldiskfs, the PFID EA may
+ * be stored in LMA because of ldiskfs inode size. */
+ if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
+ rc = osd_get_pfid_from_lma(env, obj, buf, &size);
+ else
+ rc = osd_xattr_get_internal(env, obj, buf, name, &size);
up_read(&obj->oo_guard);
if (rc == -ENOENT)
return rc;
}
+static int osd_xattr_split_pfid(const struct lu_env *env,
+ struct osd_object *obj, struct osd_thandle *oh)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_ost_attrs *loa =
+ (struct lustre_ost_attrs *)&info->oti_buf;
+ struct lustre_mdt_attrs *lma = &loa->loa_lma;
+ struct lu_buf buf = {
+ .lb_buf = loa,
+ .lb_len = sizeof(info->oti_buf),
+ };
+ int size;
+ int rc;
+ ENTRY;
+
+ CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
+ rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
+ if (rc)
+ RETURN(rc);
+
+ lustre_loa_swab(loa, true);
+ LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
+
+ lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
+ lustre_lma_swab(lma);
+ buf.lb_buf = lma;
+ buf.lb_len = sizeof(*lma);
+ rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
+ LU_XATTR_REPLACE, oh);
+ if (!rc)
+ obj->oo_pfid_in_lma = 0;
+
+ RETURN(rc);
+}
+
int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
const struct lu_buf *buf, const char *name, int fl,
struct thandle *handle)
down_write(&obj->oo_guard);
CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
name, (int)buf->lb_len);
- rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
+ /* For the OST migrated from ldiskfs, the PFID EA may
+ * be stored in LMA because of ldiskfs inode size. */
+ if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
+ obj->oo_pfid_in_lma)) {
+ rc = osd_xattr_split_pfid(env, obj, oh);
+ if (!rc)
+ fl = LU_XATTR_CREATE;
+ }
+
+ if (!rc)
+ rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
up_write(&obj->oo_guard);
RETURN(rc);
RETURN(-EOPNOTSUPP);
down_write(&obj->oo_guard);
- rc = __osd_xattr_del(env, obj, name, oh);
+ /* For the OST migrated from ldiskfs, the PFID EA may
+ * be stored in LMA because of ldiskfs inode size. */
+ if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
+ rc = osd_xattr_split_pfid(env, obj, oh);
+ else
+ rc = __osd_xattr_del(env, obj, name, oh);
up_write(&obj->oo_guard);
RETURN(rc);