Whamcloud - gitweb
LU-10189 osd: handle PFID EA in LMA properly 96/29696/13
authorFan Yong <fan.yong@intel.com>
Wed, 29 Nov 2017 04:25:51 +0000 (12:25 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 17 Dec 2017 06:19:43 +0000 (06:19 +0000)
Originally, the issue was caused by old ldiskfs OST device
with 256-bytes sized inode. Because the inode inline space
was very limited, we have to store the PFID EA inside LMA
EA for stripe and PFL component information.

When we restore the OST from such old OST via server side
file level backup, then such composite LMA will be on the
new OST even if the new OST inode has enough inline space
to hold separated PFID EA.

In futher, if we migrate the old OST from ldiskfs to ZFS,
then such composite LMA will also be on the ZFS based OST
although the PFID EA can be stroed independently on ZFS.

So the OSD logic, in spite of ldiskfs or ZFS, needs to
understand the composite LMA EA, and handle it properly.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I2b66787e725e13da7984f1bc2df45760dfbe4c4d
Reviewed-on: https://review.whamcloud.com/29696
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_scrub.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-zfs/osd_index.c
lustre/osd-zfs/osd_internal.h
lustre/osd-zfs/osd_object.c
lustre/osd-zfs/osd_xattr.c

index 3c4561c..eb066f3 100644 (file)
@@ -39,6 +39,8 @@
 #define OSD_OI_FID_OID_BITS_MAX        10
 #define OSD_OI_FID_NR_MAX      (1UL << OSD_OI_FID_OID_BITS_MAX)
 #define SCRUB_OI_BITMAP_SIZE   (OSD_OI_FID_NR_MAX >> 3)
 #define OSD_OI_FID_OID_BITS_MAX        10
 #define OSD_OI_FID_NR_MAX      (1UL << OSD_OI_FID_OID_BITS_MAX)
 #define SCRUB_OI_BITMAP_SIZE   (OSD_OI_FID_NR_MAX >> 3)
+#define PFID_STRIPE_IDX_BITS   16
+#define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
 
 #define SCRUB_MAGIC_V1                 0x4C5FD252
 #define SCRUB_CHECKPOINT_INTERVAL      60
 
 #define SCRUB_MAGIC_V1                 0x4C5FD252
 #define SCRUB_CHECKPOINT_INTERVAL      60
index 5a5374c..90f07c7 100644 (file)
@@ -76,9 +76,6 @@
 
 #include <lustre_linkea.h>
 
 
 #include <lustre_linkea.h>
 
-#define PFID_STRIPE_IDX_BITS   16
-#define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
-
 int ldiskfs_pdo = 1;
 module_param(ldiskfs_pdo, int, 0644);
 MODULE_PARM_DESC(ldiskfs_pdo, "ldiskfs with parallel directory operations");
 int ldiskfs_pdo = 1;
 module_param(ldiskfs_pdo, int, 0644);
 MODULE_PARM_DESC(ldiskfs_pdo, "ldiskfs with parallel directory operations");
@@ -768,6 +765,9 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                        rc = -EOPNOTSUPP;
                } else {
                        fid = &lma->lma_self_fid;
                        rc = -EOPNOTSUPP;
                } else {
                        fid = &lma->lma_self_fid;
+                       if (lma->lma_compat & LMAC_STRIPE_INFO &&
+                           osd->od_is_ost)
+                               obj->oo_pfid_in_lma = 1;
                }
        }
 
                }
        }
 
@@ -999,6 +999,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
        bool remote = false;
        bool trusted = true;
        bool updated = false;
        bool remote = false;
        bool trusted = true;
        bool updated = false;
+       bool checked = false;
        ENTRY;
 
        LINVRNT(osd_invariant(obj));
        ENTRY;
 
        LINVRNT(osd_invariant(obj));
@@ -1151,6 +1152,7 @@ join:
 
 check_lma:
        result = osd_check_lma(env, obj);
 
 check_lma:
        result = osd_check_lma(env, obj);
+       checked = true;
        if (!result)
                goto found;
 
        if (!result)
                goto found;
 
@@ -1226,6 +1228,20 @@ check_lma:
        goto iget;
 
 found:
        goto iget;
 
 found:
+       if (!checked) {
+               struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+               struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
+
+               result = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
+               if (!result) {
+                       if (lma->lma_compat & LMAC_STRIPE_INFO &&
+                           dev->od_is_ost)
+                               obj->oo_pfid_in_lma = 1;
+               } else if (result != -ENODATA) {
+                       GOTO(out, result);
+               }
+       }
+
        obj->oo_compat_dot_created = 1;
        obj->oo_compat_dotdot_created = 1;
 
        obj->oo_compat_dot_created = 1;
        obj->oo_compat_dotdot_created = 1;
 
@@ -2731,6 +2747,8 @@ static int osd_attr_set(const struct lu_env *env,
                struct osd_thread_info *info = osd_oti_get(env);
                struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
 
                struct osd_thread_info *info = osd_oti_get(env);
                struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
 
+               LASSERT(!obj->oo_pfid_in_lma);
+
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
                                 &info->oti_ost_attrs);
                if (rc)
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
                                 &info->oti_ost_attrs);
                if (rc)
@@ -3833,24 +3851,17 @@ static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                        return rc;
        }
 
                        return rc;
        }
 
-       rc = __osd_xattr_get(inode, dentry, name, buf->lb_buf, buf->lb_len);
-       if (rc == -ENODATA && strcmp(name, XATTR_NAME_FID) == 0) {
+       if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma) {
                struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
                struct lustre_mdt_attrs *lma = &loa->loa_lma;
                struct filter_fid *ff;
                struct ost_layout *ol;
 
                struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
                struct lustre_mdt_attrs *lma = &loa->loa_lma;
                struct filter_fid *ff;
                struct ost_layout *ol;
 
-               if (!osd_dev(dt->do_lu.lo_dev)->od_is_ost)
-                       goto cache;
-
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
                if (rc)
                        return rc;
 
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
                if (rc)
                        return rc;
 
-               if (!(lma->lma_compat & LMAC_STRIPE_INFO)) {
-                       rc = -ENODATA;
-                       goto cache;
-               }
+               LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
 
                rc = sizeof(*ff);
                if (buf->lb_len == 0 || !buf->lb_buf)
 
                rc = sizeof(*ff);
                if (buf->lb_len == 0 || !buf->lb_buf)
@@ -3875,9 +3886,11 @@ static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                        ol->ol_comp_end = 0;
                        ol->ol_comp_id = 0;
                }
                        ol->ol_comp_end = 0;
                        ol->ol_comp_id = 0;
                }
+       } else {
+               rc = __osd_xattr_get(inode, dentry, name,
+                                    buf->lb_buf, buf->lb_len);
        }
 
        }
 
-cache:
        if (cache_xattr) {
                if (rc == -ENOENT || rc == -ENODATA)
                        osd_oxc_add(obj, name, NULL, 0);
        if (cache_xattr) {
                if (rc == -ENOENT || rc == -ENODATA)
                        osd_oxc_add(obj, name, NULL, 0);
@@ -3961,6 +3974,97 @@ upgrade:
        return 0;
 }
 
        return 0;
 }
 
+static int osd_xattr_set_pfid(const struct lu_env *env, struct osd_object *obj,
+                             const struct lu_buf *buf, int fl,
+                             struct thandle *handle)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct dentry *dentry = &info->oti_obj_dentry;
+       struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+       struct lustre_mdt_attrs *lma = &loa->loa_lma;
+       struct inode *inode = obj->oo_inode;
+       struct filter_fid *ff = buf->lb_buf;
+       struct ost_layout *ol = &ff->ff_layout;
+       int flags = XATTR_REPLACE;
+       int rc;
+       ENTRY;
+
+       if (buf->lb_len != sizeof(*ff) && buf->lb_len != sizeof(struct lu_fid))
+               RETURN(-EINVAL);
+
+       rc = osd_get_lma(info, inode, dentry, loa);
+       if (rc == -ENODATA) {
+               /* Usually for upgarding from old device */
+               lustre_loa_init(loa, lu_object_fid(&obj->oo_dt.do_lu),
+                               LMAC_FID_ON_OST, 0);
+               flags = XATTR_CREATE;
+       } else if (rc) {
+               RETURN(rc);
+       }
+
+       if (!rc && lma->lma_compat & LMAC_STRIPE_INFO) {
+               if ((fl & LU_XATTR_CREATE) && !(fl & LU_XATTR_REPLACE))
+                       RETURN(-EEXIST);
+
+               if (LDISKFS_INODE_SIZE(inode->i_sb) > 256) {
+                       /* Separate PFID EA from LMA */
+                       lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
+                       lustre_lma_swab(lma);
+                       rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
+                                            sizeof(*lma), XATTR_REPLACE);
+                       if (!rc) {
+                               obj->oo_pfid_in_lma = 0;
+                               rc = LU_XATTR_CREATE;
+                       }
+
+                       RETURN(rc);
+               }
+       } else {
+               if (LDISKFS_INODE_SIZE(inode->i_sb) > 256)
+                       RETURN(fl);
+
+               /* Old client does not send stripe information,
+                * then store the PFID EA on disk separatedly. */
+               if (unlikely(buf->lb_len == sizeof(struct lu_fid) ||
+                            ol->ol_stripe_size == 0))
+                       RETURN(fl);
+
+               /* Remove old PFID EA entry firstly. */
+               ll_vfs_dq_init(inode);
+               rc = inode->i_op->removexattr(dentry, XATTR_NAME_FID);
+               if (rc == -ENODATA) {
+                       if ((fl & LU_XATTR_REPLACE) && !(fl & LU_XATTR_CREATE))
+                               RETURN(rc);
+               } else if (rc) {
+                       RETURN(rc);
+               }
+       }
+
+       fid_le_to_cpu(&loa->loa_parent_fid, &ff->ff_parent);
+       if (likely(ol->ol_stripe_size != 0)) {
+               loa->loa_parent_fid.f_ver |= le32_to_cpu(ol->ol_stripe_count) <<
+                                            PFID_STRIPE_IDX_BITS;
+               loa->loa_stripe_size = le32_to_cpu(ol->ol_stripe_size);
+               lma->lma_compat |= LMAC_STRIPE_INFO;
+               if (ol->ol_comp_id != 0) {
+                       loa->loa_comp_id = le32_to_cpu(ol->ol_comp_id);
+                       loa->loa_comp_start = le64_to_cpu(ol->ol_comp_start);
+                       loa->loa_comp_end = le64_to_cpu(ol->ol_comp_end);
+                       lma->lma_compat |= LMAC_COMP_INFO;
+               }
+       }
+
+       lustre_loa_swab(loa, false);
+
+       /* Store the PFID EA inside LMA. */
+       rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, loa, sizeof(*loa),
+                            flags);
+       if (!rc)
+               obj->oo_pfid_in_lma = 1;
+
+       RETURN(rc);
+}
+
 /*
  * Concurrency: @dt is write locked.
  */
 /*
  * Concurrency: @dt is write locked.
  */
@@ -3969,10 +4073,9 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                         struct thandle *handle)
 {
        struct osd_object *obj = osd_dt_obj(dt);
                         struct thandle *handle)
 {
        struct osd_object *obj = osd_dt_obj(dt);
+       struct osd_device *osd = osd_obj2dev(obj);
        struct inode *inode = obj->oo_inode;
        struct osd_thread_info *info = osd_oti_get(env);
        struct inode *inode = obj->oo_inode;
        struct osd_thread_info *info = osd_oti_get(env);
-       struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
-       struct lustre_mdt_attrs *lma = &loa->loa_lma;
        int fs_flags = 0;
        int len;
        int rc;
        int fs_flags = 0;
        int len;
        int rc;
@@ -4005,76 +4108,22 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
 
        len = buf->lb_len;
        osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
 
        len = buf->lb_len;
        osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
-       if (fl & LU_XATTR_REPLACE)
-               fs_flags |= XATTR_REPLACE;
-
-       if (fl & LU_XATTR_CREATE)
-               fs_flags |= XATTR_CREATE;
 
        /* For the OST device with 256 bytes inode size by default,
         * the PFID EA will be stored together with LMA EA to avoid
         * performance trouble. Otherwise the PFID EA can be stored
         * independently. LU-8998 */
 
        /* For the OST device with 256 bytes inode size by default,
         * the PFID EA will be stored together with LMA EA to avoid
         * performance trouble. Otherwise the PFID EA can be stored
         * independently. LU-8998 */
-       if (strcmp(name, XATTR_NAME_FID) == 0 &&
-           LDISKFS_INODE_SIZE(inode->i_sb) <= 256) {
-               struct dentry *dentry = &info->oti_obj_dentry;
-               struct filter_fid *ff;
-               struct ost_layout *ol;
-               int fl;
-
-               LASSERT(osd_dev(dt->do_lu.lo_dev)->od_is_ost);
-
-               ff = buf->lb_buf;
-               ol = &ff->ff_layout;
-               /* Old client does not send stripe information, store
-                * the PFID EA on disk directly. */
-               if (buf->lb_len == sizeof(struct lu_fid) ||
-                   ol->ol_stripe_size == 0) {
-                       len = sizeof(struct lu_fid);
-                       goto set;
-               }
-
-               if (buf->lb_len != sizeof(*ff))
-                       RETURN(-EINVAL);
-
-               rc = osd_get_lma(info, inode, dentry, loa);
-               if (unlikely(rc == -ENODATA)) {
-                       /* Usually for upgarding from old device */
-                       lustre_loa_init(loa, lu_object_fid(&dt->do_lu),
-                                       LMAC_FID_ON_OST, 0);
-                       fl = XATTR_CREATE;
-               } else if (rc) {
-                       RETURN(rc);
-               } else {
-                       fl = XATTR_REPLACE;
-               }
-
-               fid_le_to_cpu(&loa->loa_parent_fid, &ff->ff_parent);
-               loa->loa_parent_fid.f_ver |= le32_to_cpu(ol->ol_stripe_count) <<
-                                            PFID_STRIPE_IDX_BITS;
-               loa->loa_stripe_size = le32_to_cpu(ol->ol_stripe_size);
-               lma->lma_compat |= LMAC_STRIPE_INFO;
-               if (ol->ol_comp_id != 0) {
-                       loa->loa_comp_id = le32_to_cpu(ol->ol_comp_id);
-                       loa->loa_comp_start = le64_to_cpu(ol->ol_comp_start);
-                       loa->loa_comp_end = le64_to_cpu(ol->ol_comp_end);
-                       lma->lma_compat |= LMAC_COMP_INFO;
-               }
-
-               lustre_loa_swab(loa, false);
-
-               /* Remove old PFID EA entry firstly. */
-               ll_vfs_dq_init(inode);
-               rc = inode->i_op->removexattr(dentry, name);
-               if (rc && rc != -ENODATA)
-                       RETURN(rc);
-
-               /* Store the PFID EA inside the LMA EA. */
-               rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, loa,
-                                    sizeof(*loa), fl);
+       if (strcmp(name, XATTR_NAME_FID) == 0 && osd->od_is_ost &&
+           (LDISKFS_INODE_SIZE(inode->i_sb) <= 256 || obj->oo_pfid_in_lma)) {
+               LASSERT(buf->lb_buf);
 
 
-               RETURN(rc);
+               fl = osd_xattr_set_pfid(env, obj, buf, fl, handle);
+               if (fl <= 0)
+                       RETURN(fl);
        } else if (strcmp(name, XATTR_NAME_LMV) == 0) {
        } else if (strcmp(name, XATTR_NAME_LMV) == 0) {
+               struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+               struct lustre_mdt_attrs *lma = &loa->loa_lma;
+
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
                if (rc)
                        RETURN(rc);
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
                if (rc)
                        RETURN(rc);
@@ -4087,7 +4136,12 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                        RETURN(rc);
        }
 
                        RETURN(rc);
        }
 
-set:
+       if (fl & LU_XATTR_REPLACE)
+               fs_flags |= XATTR_REPLACE;
+
+       if (fl & LU_XATTR_CREATE)
+               fs_flags |= XATTR_CREATE;
+
        rc = __osd_xattr_set(info, inode, name, buf->lb_buf, len, fs_flags);
        osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
 
        rc = __osd_xattr_set(info, inode, name, buf->lb_buf, len, fs_flags);
        osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
 
@@ -4168,31 +4222,28 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
 
        osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
 
 
        osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
 
-       ll_vfs_dq_init(inode);
-       dentry->d_inode = inode;
-       dentry->d_sb = inode->i_sb;
-       rc = inode->i_op->removexattr(dentry, name);
-       if (rc == -ENODATA && strcmp(name, XATTR_NAME_FID) == 0) {
+       if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma) {
                struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
 
                struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
 
-               LASSERT(osd_dev(dt->do_lu.lo_dev)->od_is_ost);
-
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
                                 &info->oti_ost_attrs);
                if (!rc) {
                rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
                                 &info->oti_ost_attrs);
                if (!rc) {
-                       if (!(lma->lma_compat & LMAC_STRIPE_INFO)) {
-                               rc = -ENODATA;
-                               goto out;
-                       }
+                       LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
 
                        lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
                        lustre_lma_swab(lma);
                        rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
                                             sizeof(*lma), XATTR_REPLACE);
 
                        lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
                        lustre_lma_swab(lma);
                        rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
                                             sizeof(*lma), XATTR_REPLACE);
+                       if (!rc)
+                               obj->oo_pfid_in_lma = 0;
                }
                }
+       } else {
+               ll_vfs_dq_init(inode);
+               dentry->d_inode = inode;
+               dentry->d_sb = inode->i_sb;
+               rc = inode->i_op->removexattr(dentry, name);
        }
 
        }
 
-out:
        osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
 
        if (rc == 0 &&
        osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
 
        if (rc == 0 &&
index c794bcf..e21cce8 100644 (file)
@@ -135,7 +135,8 @@ struct osd_object {
        /** protects inode attributes. */
        spinlock_t              oo_guard;
 
        /** protects inode attributes. */
        spinlock_t              oo_guard;
 
-       __u32                   oo_destroyed:1;
+       __u32                   oo_destroyed:1,
+                               oo_pfid_in_lma:1;
 
        /* the i_flags in LMA */
        __u32                   oo_lma_flags;
 
        /* the i_flags in LMA */
        __u32                   oo_lma_flags;
index b9db936..6d607c8 100644 (file)
@@ -326,7 +326,7 @@ out:
 
 /*
  * As we don't know FID, we can't use LU object, so this function
 
 /*
  * As we don't know FID, we can't use LU object, so this function
- * partially duplicate __osd_xattr_get() which is built around
+ * partially duplicate osd_xattr_get_internal() which is built around
  * LU-object and uses it to cache data like regular EA dnode, etc
  */
 static int osd_find_parent_by_dnode(const struct lu_env *env,
  * LU-object and uses it to cache data like regular EA dnode, etc
  */
 static int osd_find_parent_by_dnode(const struct lu_env *env,
index f94e6aa..fbc7c99 100644 (file)
@@ -372,7 +372,8 @@ struct osd_object {
 #ifdef ZFS_PROJINHERIT
                                 oo_with_projid:1,
 #endif
 #ifdef ZFS_PROJINHERIT
                                 oo_with_projid:1,
 #endif
-                                oo_late_attr_set:1;
+                                oo_late_attr_set:1,
+                                oo_pfid_in_lma:1;
 
        /* the i_flags in LMA */
        __u32                    oo_lma_flags;
 
        /* the i_flags in LMA */
        __u32                    oo_lma_flags;
index e9a8dbe..d3f7363 100644 (file)
@@ -454,6 +454,10 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                              lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
                              PFID(lu_object_fid(&obj->oo_dt.do_lu)));
                        rc = -EOPNOTSUPP;
                              lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
                              PFID(lu_object_fid(&obj->oo_dt.do_lu)));
                        rc = -EOPNOTSUPP;
+               } else {
+                       if (lma->lma_compat & LMAC_STRIPE_INFO &&
+                           osd_obj2dev(obj)->od_is_ost)
+                               obj->oo_pfid_in_lma = 1;
                }
        } else if (rc == -ENODATA) {
                /* haven't initialize LMA xattr */
                }
        } else if (rc == -ENODATA) {
                /* haven't initialize LMA xattr */
@@ -1116,6 +1120,7 @@ static int osd_attr_set(const struct lu_env *env, struct dt_object *dt,
                struct lu_buf buf;
 
                if (la->la_flags & LUSTRE_LMA_FL_MASKS) {
                struct lu_buf buf;
 
                if (la->la_flags & LUSTRE_LMA_FL_MASKS) {
+                       LASSERT(!obj->oo_pfid_in_lma);
                        CLASSERT(sizeof(info->oti_buf) >= sizeof(*lma));
                        lma = (struct lustre_mdt_attrs *)&info->oti_buf;
                        buf.lb_buf = lma;
                        CLASSERT(sizeof(info->oti_buf) >= sizeof(*lma));
                        lma = (struct lustre_mdt_attrs *)&info->oti_buf;
                        buf.lb_buf = lma;
index aaa94ab..d6bed9a 100644 (file)
@@ -62,7 +62,7 @@
 #include <sys/txg.h>
 
 #include <linux/posix_acl_xattr.h>
 #include <sys/txg.h>
 
 #include <linux/posix_acl_xattr.h>
-
+#include <lustre_scrub.h>
 
 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
 {
 
 int __osd_xattr_load(struct osd_device *osd, sa_handle_t *hdl, nvlist_t **sa)
 {
@@ -205,8 +205,8 @@ out_rele:
  * \retval 0           on success
  * \retval negative    negated errno on failure
  */
  * \retval 0           on success
  * \retval negative    negated errno on failure
  */
-int __osd_xattr_get(const struct lu_env *env, struct osd_object *obj,
-                   struct lu_buf *buf, const char *name, int *sizep)
+int osd_xattr_get_internal(const struct lu_env *env, struct osd_object *obj,
+                          struct lu_buf *buf, const char *name, int *sizep)
 {
        int rc;
 
 {
        int rc;
 
@@ -222,6 +222,59 @@ int __osd_xattr_get(const struct lu_env *env, struct osd_object *obj,
                                     buf, name, sizep);
 }
 
                                     buf, name, sizep);
 }
 
+static int osd_get_pfid_from_lma(const struct lu_env *env,
+                                struct osd_object *obj,
+                                struct lu_buf *buf, int *sizep)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_ost_attrs *loa =
+               (struct lustre_ost_attrs *)&info->oti_buf;
+       struct lustre_mdt_attrs *lma = &loa->loa_lma;
+       struct filter_fid *ff;
+       struct ost_layout *ol;
+       struct lu_buf tbuf = {
+               .lb_buf = loa,
+               .lb_len = sizeof(info->oti_buf),
+       };
+       int rc;
+       ENTRY;
+
+       CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
+       rc = osd_xattr_get_internal(env, obj, &tbuf,
+                                   XATTR_NAME_LMA, sizep);
+       if (rc)
+               RETURN(rc);
+
+       lustre_loa_swab(loa, true);
+       LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
+
+       *sizep = sizeof(*ff);
+       if (buf->lb_len == 0 || !buf->lb_buf)
+               RETURN(0);
+
+       if (buf->lb_len < *sizep)
+               RETURN(-ERANGE);
+
+       ff = buf->lb_buf;
+       ol = &ff->ff_layout;
+       ol->ol_stripe_count = cpu_to_le32(loa->loa_parent_fid.f_ver >>
+                                         PFID_STRIPE_IDX_BITS);
+       ol->ol_stripe_size = cpu_to_le32(loa->loa_stripe_size);
+       loa->loa_parent_fid.f_ver &= PFID_STRIPE_COUNT_MASK;
+       fid_cpu_to_le(&ff->ff_parent, &loa->loa_parent_fid);
+       if (lma->lma_compat & LMAC_COMP_INFO) {
+               ol->ol_comp_start = cpu_to_le64(loa->loa_comp_start);
+               ol->ol_comp_end = cpu_to_le64(loa->loa_comp_end);
+               ol->ol_comp_id = cpu_to_le32(loa->loa_comp_id);
+       } else {
+               ol->ol_comp_start = 0;
+               ol->ol_comp_end = 0;
+               ol->ol_comp_id = 0;
+       }
+
+       RETURN(0);
+}
+
 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                  struct lu_buf *buf, const char *name)
 {
 int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                  struct lu_buf *buf, const char *name)
 {
@@ -238,7 +291,12 @@ int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                RETURN(-EOPNOTSUPP);
 
        down_read(&obj->oo_guard);
                RETURN(-EOPNOTSUPP);
 
        down_read(&obj->oo_guard);
-       rc = __osd_xattr_get(env, obj, buf, name, &size);
+       /* For the OST migrated from ldiskfs, the PFID EA may
+        * be stored in LMA because of ldiskfs inode size. */
+       if (strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma)
+               rc = osd_get_pfid_from_lma(env, obj, buf, &size);
+       else
+               rc = osd_xattr_get_internal(env, obj, buf, name, &size);
        up_read(&obj->oo_guard);
 
        if (rc == -ENOENT)
        up_read(&obj->oo_guard);
 
        if (rc == -ENOENT)
@@ -684,6 +742,41 @@ out:
        return rc;
 }
 
        return rc;
 }
 
+static int osd_xattr_split_pfid(const struct lu_env *env,
+                               struct osd_object *obj, struct osd_thandle *oh)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_ost_attrs *loa =
+               (struct lustre_ost_attrs *)&info->oti_buf;
+       struct lustre_mdt_attrs *lma = &loa->loa_lma;
+       struct lu_buf buf = {
+               .lb_buf = loa,
+               .lb_len = sizeof(info->oti_buf),
+       };
+       int size;
+       int rc;
+       ENTRY;
+
+       CLASSERT(sizeof(info->oti_buf) >= sizeof(*loa));
+       rc = osd_xattr_get_internal(env, obj, &buf, XATTR_NAME_LMA, &size);
+       if (rc)
+               RETURN(rc);
+
+       lustre_loa_swab(loa, true);
+       LASSERT(lma->lma_compat & LMAC_STRIPE_INFO);
+
+       lma->lma_compat &= ~(LMAC_STRIPE_INFO | LMAC_COMP_INFO);
+       lustre_lma_swab(lma);
+       buf.lb_buf = lma;
+       buf.lb_len = sizeof(*lma);
+       rc = osd_xattr_set_internal(env, obj, &buf, XATTR_NAME_LMA,
+                                   LU_XATTR_REPLACE, oh);
+       if (!rc)
+               obj->oo_pfid_in_lma = 0;
+
+       RETURN(rc);
+}
+
 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                  const struct lu_buf *buf, const char *name, int fl,
                  struct thandle *handle)
 int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                  const struct lu_buf *buf, const char *name, int fl,
                  struct thandle *handle)
@@ -706,7 +799,17 @@ int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
        down_write(&obj->oo_guard);
        CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
                name, (int)buf->lb_len);
        down_write(&obj->oo_guard);
        CDEBUG(D_INODE, "Setting xattr %s with size %d\n",
                name, (int)buf->lb_len);
-       rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
+       /* For the OST migrated from ldiskfs, the PFID EA may
+        * be stored in LMA because of ldiskfs inode size. */
+       if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 &&
+                    obj->oo_pfid_in_lma)) {
+               rc = osd_xattr_split_pfid(env, obj, oh);
+               if (!rc)
+                       fl = LU_XATTR_CREATE;
+       }
+
+       if (!rc)
+               rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh);
        up_write(&obj->oo_guard);
 
        RETURN(rc);
        up_write(&obj->oo_guard);
 
        RETURN(rc);
@@ -843,7 +946,12 @@ int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
                RETURN(-EOPNOTSUPP);
 
        down_write(&obj->oo_guard);
                RETURN(-EOPNOTSUPP);
 
        down_write(&obj->oo_guard);
-       rc = __osd_xattr_del(env, obj, name, oh);
+       /* For the OST migrated from ldiskfs, the PFID EA may
+        * be stored in LMA because of ldiskfs inode size. */
+       if (unlikely(strcmp(name, XATTR_NAME_FID) == 0 && obj->oo_pfid_in_lma))
+               rc = osd_xattr_split_pfid(env, obj, oh);
+       else
+               rc = __osd_xattr_del(env, obj, name, oh);
        up_write(&obj->oo_guard);
 
        RETURN(rc);
        up_write(&obj->oo_guard);
 
        RETURN(rc);