Whamcloud - gitweb
LU-11625 ofd: handle upgraded filter_fid properly 27/33627/5
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 7 Nov 2018 02:40:18 +0000 (19:40 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 23 Jan 2019 09:18:37 +0000 (09:18 +0000)
Since there have been several iterations of struct filter_fid stored
on disk, the current code wasn't checking for all of the possible
cases when trying to decide what action to take when accessing and
upgrading the xattr for new capabilities.

Properly check for the various different struct filter_fid sizes and
handle them appropriately.  Add a more verbose description of the
various cases so that this is more clear to others in the future.

Add decoding of filter_fid fields added for FLR in 2.11.

We should already be testing for upgrading the filter_fid xattr
from different OST versions in conf-sanity test_32d.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ifef2292296236cb06ff7e8cd50caff4b133ebbe5
Reviewed-on: https://review.whamcloud.com/33627
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_fid.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lod/lod_internal.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_scrub.c
lustre/utils/ll_decode_filter_fid.c

index cbf3868..ea6d743 100644 (file)
@@ -339,7 +339,6 @@ static inline void ost_layout_le_to_cpu(struct ost_layout *dst,
        dst->ol_comp_id = __le32_to_cpu(src->ol_comp_id);
 }
 
-/* Both filter_fid_*cpu* functions not currently used */
 static inline void filter_fid_cpu_to_le(struct filter_fid *dst,
                                        const struct filter_fid *src, int size)
 {
index 7997d39..a747e45 100644 (file)
@@ -201,15 +201,47 @@ struct ost_layout {
        __u32   ol_comp_id;
 } __attribute__((packed));
 
-/* keep this one for compatibility */
-struct filter_fid_old {
-       struct lu_fid   ff_parent;
-       __u64           ff_objid;
-       __u64           ff_seq;
+/* The filter_fid structure has changed several times over its lifetime.
+ * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
+ * stripe_index and the "self FID" (objid/seq) to be able to recover the
+ * OST objects in case of corruption.  With the move to 2.4 and OSD-API for
+ * the OST, the "trusted.lma" xattr was added to the OST objects to store
+ * the "self FID" to be consistent with the MDT on-disk format, and the
+ * filter_fid only stored the MDT inode parent FID and stripe index.
+ *
+ * In 2.10, the addition of PFL composite layouts required more information
+ * to be stored into the filter_fid in order to be able to identify which
+ * component the OST object belonged.  As well, the stripe size may vary
+ * between components, so it was no longer safe to assume the stripe size
+ * or stripe_count of a file.  This is also more robust for plain layouts.
+ *
+ * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
+ * enough space to store both the filter_fid and LMA in the inode, so they
+ * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
+ * an extra seek for every OST object access.
+ *
+ * In 2.11, FLR mirror layouts also need to store the layout version and
+ * range so that writes to old versions of the layout are not allowed.
+ * That ensures that mirrored objects are not modified by evicted clients,
+ * and ensures that the components are correctly marked stale on the MDT.
+ */
+struct filter_fid_18_23 {
+       struct lu_fid           ff_parent;      /* stripe_idx in f_ver */
+       __u64                   ff_objid;
+       __u64                   ff_seq;
+};
+
+struct filter_fid_24_29 {
+       struct lu_fid           ff_parent;      /* stripe_idx in f_ver */
+};
+
+struct filter_fid_210 {
+       struct lu_fid           ff_parent;      /* stripe_idx in f_ver */
+       struct ost_layout       ff_layout;
 };
 
 struct filter_fid {
-       struct lu_fid           ff_parent;
+       struct lu_fid           ff_parent;      /* stripe_idx in f_ver */
        struct ost_layout       ff_layout;
        __u32                   ff_layout_version;
        __u32                   ff_range; /* range of layout version that
index 1039942..a240b72 100644 (file)
@@ -439,7 +439,7 @@ struct lod_thread_info {
        struct lustre_cfg               lti_lustre_cfg;
        /* used to store parent default striping in create */
        struct lod_default_striping     lti_def_striping;
-       struct filter_fid lti_ff;
+       struct filter_fid               lti_ff;
        __u32                           *lti_comp_idx;
        size_t                          lti_comp_size;
        size_t                          lti_count;
index f46bf1a..d03fb6b 100644 (file)
@@ -706,29 +706,30 @@ put:
 }
 
 /**
- * \retval +v: new filter_fid, does not contain self-fid
- * \retval 0:  filter_fid_old, contains self-fid
+ * \retval +v: new filter_fid does not contain self-fid
+ * \retval 0:  filter_fid_18_23, contains self-fid
  * \retval -v: other failure cases
  */
 int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
                 struct dentry *dentry, struct lu_fid *fid)
 {
-       struct filter_fid_old *ff = &info->oti_ff;
+       struct filter_fid *ff = &info->oti_ff;
        struct ost_id *ostid = &info->oti_ostid;
        int rc;
 
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
-       if (rc == sizeof(*ff)) {
-               rc = 0;
-               ostid_set_seq(ostid, le64_to_cpu(ff->ff_seq));
-               rc = ostid_set_id(ostid, le64_to_cpu(ff->ff_objid));
+       if (rc == sizeof(struct filter_fid_18_23)) {
+               struct filter_fid_18_23 *ff_old = (void *)ff;
+
+               ostid_set_seq(ostid, le64_to_cpu(ff_old->ff_seq));
+               rc = ostid_set_id(ostid, le64_to_cpu(ff_old->ff_objid));
                /*
                 * XXX: use 0 as the index for compatibility, the caller will
                 * handle index related issues when necessary.
                 */
                if (!rc)
                        ostid_to_fid(fid, ostid, 0);
-       } else if (rc == sizeof(struct filter_fid)) {
+       } else if (rc >= (int)sizeof(struct filter_fid_24_29)) {
                rc = 1;
        } else if (rc >= 0) {
                rc = -EINVAL;
@@ -782,7 +783,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
-               if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) {
+               if (rc > 0 || (rc == -ENODATA && osd->od_index_in_idif)) {
                        /*
                         * For the given OST-object, if it has neither LMA nor
                         * FID in XATTR_NAME_FID, then the given FID (which is
index 9afd83b..aa8a293 100644 (file)
@@ -619,31 +619,31 @@ struct osd_thread_info {
        struct osd_idmap_cache oti_cache;
 
        /* dedicated OI cache for insert (which needs inum) */
-       struct osd_idmap_cache *oti_ins_cache;
-       int                    oti_ins_cache_size;
-       int                    oti_ins_cache_used;
+       struct osd_idmap_cache          *oti_ins_cache;
+       int                             oti_ins_cache_size;
+       int                             oti_ins_cache_used;
        /* inc by osd_trans_create and dec by osd_trans_stop */
-       int                    oti_ins_cache_depth;
-
-        int                    oti_r_locks;
-        int                    oti_w_locks;
-        int                    oti_txns;
-        /** used in osd_fid_set() to put xattr */
-        struct lu_buf          oti_buf;
-       struct lu_buf          oti_big_buf;
-        /** used in osd_ea_fid_set() to set fid into common ea */
+       int                             oti_ins_cache_depth;
+
+       int                             oti_r_locks;
+       int                             oti_w_locks;
+       int                             oti_txns;
+       /** used in osd_fid_set() to put xattr */
+       struct lu_buf                   oti_buf;
+       struct lu_buf                   oti_big_buf;
+       /** used in osd_ea_fid_set() to set fid into common ea */
        union {
                struct lustre_ost_attrs oti_ost_attrs;
-               struct filter_fid_old   oti_ff;
-               struct filter_fid       oti_ff_new;
+               struct filter_fid_18_23 oti_ff_old;
+               struct filter_fid       oti_ff;
        };
        /** 0-copy IO */
-       struct osd_iobuf       oti_iobuf;
+       struct osd_iobuf                oti_iobuf;
        /* used to access objects in /O */
-       struct inode          *oti_inode;
+       struct inode                    *oti_inode;
 #define OSD_FID_REC_SZ 32
-       char                   oti_ldp[OSD_FID_REC_SZ];
-       char                   oti_ldp2[OSD_FID_REC_SZ];
+       char                            oti_ldp[OSD_FID_REC_SZ];
+       char                            oti_ldp2[OSD_FID_REC_SZ];
 
        /* used by quota code */
        union {
index 6c47f8d..9281756 100644 (file)
@@ -163,13 +163,13 @@ static int
 osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
                     struct inode *inode, const struct lu_fid *fid)
 {
-       struct filter_fid_old   *ff      = &info->oti_ff;
-       struct dentry           *dentry  = &info->oti_obj_dentry;
-       struct lu_fid           *tfid    = &info->oti_fid;
-       handle_t                *jh;
-       int                      size    = 0;
-       int                      rc;
-       bool                     reset   = false;
+       struct filter_fid_18_23 *ff = &info->oti_ff_old;
+       struct dentry *dentry = &info->oti_obj_dentry;
+       struct lu_fid *tfid = &info->oti_fid;
+       bool fid_18_23 = false;
+       handle_t *jh;
+       int size = 0;
+       int rc;
        ENTRY;
 
        if (dev->od_scrub.os_scrub.os_file.sf_param & SP_DRYRUN)
@@ -212,14 +212,14 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
                if (rc)
                        GOTO(stop, rc);
 
-               reset = true;
-       } else if (rc != -ENODATA && rc != sizeof(struct filter_fid)) {
+               fid_18_23 = true;
+       } else if (rc != -ENODATA && rc < (int)sizeof(struct filter_fid_24_29)) {
                GOTO(stop, rc = -EINVAL);
        }
 
        /* 3) make new LMA and add it */
        rc = osd_ea_fid_set(info, inode, tfid, LMAC_FID_ON_OST, 0);
-       if (reset) {
+       if (fid_18_23) {
                if (rc)
                        /* If failed, we should try to add the old back. */
                        size = sizeof(*ff);
@@ -674,7 +674,7 @@ static int osd_scrub_get_fid(struct osd_thread_info *info,
                rc = osd_get_idif(info, inode, &info->oti_obj_dentry, fid);
                if (rc == 0) {
                        if (scrub)
-                               /* It is old 2.x (x <= 3) or 1.8 OST-object. */
+                               /* It is 2.3 or older OST-object. */
                                rc = SCRUB_NEXT_OSTOBJ_OLD;
                        return rc;
                }
@@ -685,7 +685,7 @@ static int osd_scrub_get_fid(struct osd_thread_info *info,
                                 * to generate its FID, ignore it directly. */
                                rc = SCRUB_NEXT_CONTINUE;
                        else
-                               /* It is 2.4 OST-object. */
+                               /* It is 2.4 or newer OST-object. */
                                rc = SCRUB_NEXT_OSTOBJ_OLD;
                        return rc;
                }
@@ -2014,7 +2014,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
 
 /**
  * It scans the /lost+found, and for the OST-object (with filter_fid
- * or filter_fid_old), move them back to its proper /O/<seq>/d<x>.
+ * or filter_fid_18_23), move them back to its proper /O/<seq>/d<x>.
  */
 #ifdef HAVE_FILLDIR_USE_CTX
 static int osd_ios_lf_fill(struct dir_context *buf,
@@ -2649,7 +2649,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
                 * an old device, it can be found and cleared later.
                 *
                 * For the system with "SIF_NO_HANDLE_OLD_FID", we do not
-                * need to check "filter_fid_old" and to convert it to
+                * need to check "filter_fid_18_23" and to convert it to
                 * "filter_fid" for each object, and all the IGIF should
                 * have their FID mapping in OI files already. */
                if (dev->od_maybe_new && rc == -ENOENT)
index 975868a..cc648ff 100644 (file)
@@ -87,6 +87,7 @@ int main(int argc, char *argv[])
        for (i = 1; i < argc; i++) {
                char buf[1024]; /* allow xattr that may be larger */
                struct filter_fid *ff = (void *)buf;
+               static int printed;
                int size;
 
                size = getxattr(argv[i], "trusted.fid", buf,
@@ -146,16 +147,21 @@ int main(int argc, char *argv[])
                }
 
                if (size != sizeof(struct filter_fid) &&
-                   size != sizeof(struct filter_fid_old) &&
-                   size != sizeof(struct lu_fid)) {
-                       fprintf(stderr, "%s: warning: fid larger than expected"
-                               " (%d bytes), recompile?\n", argv[i], size);
-                       continue;
+                   size != sizeof(struct filter_fid_18_23) &&
+                   size != sizeof(struct filter_fid_24_29) &&
+                   size != sizeof(struct filter_fid_210) && !printed) {
+                       fprintf(stderr,
+                               "%s: warning: ffid size is unexpected (%d bytes), recompile?\n",
+                               argv[i], size);
+                       printed = 1;
+
+                       if (size < sizeof(struct filter_fid_24_29))
+                               continue;
                }
 
                printf("%s: ", argv[i]);
-               if (size == sizeof(struct filter_fid_old)) {
-                       struct filter_fid_old *ffo = (void *)buf;
+               if (size == sizeof(struct filter_fid_18_23)) {
+                       struct filter_fid_18_23 *ffo = (void *)buf;
 
                        printf("objid=%llu seq=%llu ",
                               (unsigned long long)__le64_to_cpu(ffo->ff_objid),
@@ -168,7 +174,7 @@ int main(int argc, char *argv[])
                       /* this is stripe_nr actually */
                       __le32_to_cpu(ff->ff_parent.f_stripe_idx));
 
-               if (size >= sizeof(struct filter_fid)) {
+               if (size >= sizeof(struct filter_fid_210)) {
                        struct ost_layout *ol = &ff->ff_layout;
 
                        /* new filter_fid, support PFL */
@@ -183,6 +189,10 @@ int main(int argc, char *argv[])
                                       __le64_to_cpu(ol->ol_comp_start),
                                       __le64_to_cpu(ol->ol_comp_end));
                }
+               if (size >= sizeof(struct filter_fid))
+                       printf(" layout_version=%u range=%u",
+                              __le32_to_cpu(ff->ff_layout_version),
+                              __le32_to_cpu(ff->ff_range));
 
                printf("\n");
        }