Whamcloud - gitweb
LU-10288 lfsck: layout LFSCK for mirrored file 05/32705/5
authorFan Yong <fan.yong@intel.com>
Sat, 14 Jul 2018 21:15:21 +0000 (05:15 +0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 6 Aug 2018 14:34:41 +0000 (14:34 +0000)
This patch makes the layout LFSCK to support mirrored file
as following:

1. Verify mirrored file's LOV EA and PFID EA, including all
   kinds of inconsistencies as non-mirrored file may hit.

2. Rebuild mirrored file's LOV EA from orphan OST-objects,
   recover the component's status/flags before the crash:
   init, stale, and so on.

3. For the mirrored file with dangling reference (OST object),
   it does NOT rebuild the lost OST-object from other replica,
   instead, it either reports the curruption or re-create empty
   OST-object that follows the same rules as non-mirrored case.

Some code cleanup and new test cases for LFSCK against mirrored file.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I560746fc2aae40101dcb0e8513b6c7ed54902ec6
Reviewed-on: https://review.whamcloud.com/32705
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
12 files changed:
lustre/include/lustre_swab.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/obdclass/dt_object.c
lustre/osp/osp_object.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity-lfsck.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index f553fa9..cece542 100644 (file)
@@ -52,6 +52,7 @@
 
 void lustre_swab_orphan_ent(struct lu_orphan_ent *ent);
 void lustre_swab_orphan_ent_v2(struct lu_orphan_ent_v2 *ent);
+void lustre_swab_orphan_ent_v3(struct lu_orphan_ent_v3 *ent);
 void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 void lustre_swab_connect(struct obd_connect_data *ocd);
 void lustre_swab_hsm_user_state(struct hsm_user_state *hus);
index cf39abb..9e5d4f0 100644 (file)
@@ -384,6 +384,23 @@ struct lu_orphan_ent_v2 {
        struct lu_orphan_rec_v2 loe_rec;
 };
 
+struct lu_orphan_rec_v3 {
+       struct lu_orphan_rec    lor_rec;
+       struct ost_layout       lor_layout;
+       /* The OST-object declared layout version in PFID EA.*/
+       __u32                   lor_layout_version;
+       /* The OST-object declared layout range (of version) in PFID EA.*/
+       __u32                   lor_range;
+       __u32                   lor_padding_1;
+       __u64                   lor_padding_2;
+};
+
+struct lu_orphan_ent_v3 {
+       /* The orphan OST-object's FID */
+       struct lu_fid           loe_key;
+       struct lu_orphan_rec_v3 loe_rec;
+};
+
 /** @} lu_fid */
 
 /** \defgroup lu_dir lu_dir
index 3316e4d..cd1b2af 100644 (file)
@@ -624,7 +624,9 @@ struct lov_comp_md_entry_v1 {
        __u32                   lcme_offset;    /* offset of component blob,
                                                   start from lov_comp_md_v1 */
        __u32                   lcme_size;      /* size of component blob */
-       __u64                   lcme_padding[2];
+       __u32                   lcme_layout_gen;
+       __u32                   lcme_padding_1;
+       __u64                   lcme_padding_2;
 } __attribute__((packed));
 
 #define SEQ_ID_MAX             0x0000FFFF
index ac94f8f..a92181f 100644 (file)
@@ -903,7 +903,7 @@ struct lfsck_thread_info {
        struct ldlm_res_id      lti_resid;
        struct filter_fid       lti_ff;
        struct dt_allocation_hint lti_hint;
-       struct lu_orphan_rec_v2 lti_rec;
+       struct lu_orphan_rec_v3 lti_rec;
        struct lov_user_md      lti_lum;
        struct dt_insert_rec    lti_dt_rec;
        struct lu_object_conf   lti_conf;
index d7c938d..cdb46d8 100644 (file)
@@ -328,7 +328,8 @@ out:
 }
 
 static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
-                                          struct lov_mds_md_v1 *lmm)
+                                          struct lov_mds_md_v1 *lmm,
+                                          __u64 start, __u32 comp_id)
 {
        __u32 magic;
        __u32 pattern;
@@ -353,10 +354,24 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        }
 
        pattern = le32_to_cpu(lmm->lmm_pattern);
-       /* XXX: currently, we only support LOV_PATTERN_RAID0. */
+
+#if 0
+       /* XXX: DoM file verification will be supportted via LU-11081. */
+       if (lov_pattern(pattern) == LOV_PATTERN_MDT) {
+               if (start != 0) {
+                       CDEBUG(D_LFSCK, "The DoM entry for "DFID" is not "
+                              "the first component in the mirror %x/%llu\n",
+                              PFID(lfsck_dto2fid(obj)), comp_id, start);
+
+                       return -EINVAL;
+               }
+       }
+#endif
+
        if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
                CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file "
-                      DFID"\n", pattern, PFID(lfsck_dto2fid(obj)));
+                      DFID" in the component %x\n",
+                      pattern, PFID(lfsck_dto2fid(obj)), comp_id);
 
                return -EOPNOTSUPP;
        }
@@ -382,7 +397,7 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                        return -EINVAL;
                }
 
-               for (i = 0; i < count; i++) {
+               for (i = 0; i < count && !rc; i++) {
                        struct lov_comp_md_entry_v1 *lcme =
                                                &lcm->lcm_entries[i];
                        __u64 start = le64_to_cpu(lcme->lcme_extent.e_start);
@@ -411,13 +426,12 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                        }
 
                        rc = lfsck_layout_verify_header_v1v3(obj,
-                               (struct lov_mds_md_v1 *)((char *)lmm +
-                               le32_to_cpu(lcme->lcme_offset)));
-                       if (rc)
-                               return rc;
+                                       (struct lov_mds_md_v1 *)((char *)lmm +
+                                       le32_to_cpu(lcme->lcme_offset)), start,
+                                       comp_id);
                }
        } else {
-               rc = lfsck_layout_verify_header_v1v3(obj, lmm);
+               rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0);
        }
 
        return rc;
@@ -434,7 +448,7 @@ again:
        if (rc == -ERANGE) {
                rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV);
                if (rc <= 0)
-                       return rc;
+                       return !rc ? -ENODATA : rc;
 
                lu_buf_realloc(buf, rc);
                if (buf->lb_buf == NULL)
@@ -443,11 +457,8 @@ again:
                goto again;
        }
 
-       if (rc == -ENODATA)
-               rc = 0;
-
        if (rc <= 0)
-               return rc;
+               return !rc ? -ENODATA : rc;
 
        if (unlikely(buf->lb_buf == NULL)) {
                lu_buf_alloc(buf, rc);
@@ -1788,12 +1799,13 @@ static int lfsck_layout_new_v1_lovea(const struct lu_env *env,
 }
 
 static int lfsck_layout_new_comp_lovea(const struct lu_env *env,
-                                     struct ost_layout *ol,
-                                     struct dt_object *parent,
-                                     struct lu_buf *buf, __u32 ea_off,
-                                     struct lov_mds_md_v1 **lmm,
-                                     struct lov_ost_data_v1 **objs)
+                                      struct lu_orphan_rec_v3 *rec,
+                                      struct dt_object *parent,
+                                      struct lu_buf *buf, __u32 ea_off,
+                                      struct lov_mds_md_v1 **lmm,
+                                      struct lov_ost_data_v1 **objs)
 {
+       struct ost_layout *ol = &rec->lor_layout;
        struct lov_comp_md_v1 *lcm;
        struct lov_comp_md_entry_v1 *lcme;
        __u32 pattern = LOV_PATTERN_RAID0;
@@ -1808,9 +1820,22 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env,
        lcm = buf->lb_buf;
        lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
        lcm->lcm_size = cpu_to_le32(size);
-       lcm->lcm_layout_gen = cpu_to_le32(1);
-       lcm->lcm_flags = 0;
+       if (rec->lor_range) {
+               lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version +
+                                                 rec->lor_range);
+               lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING);
+       } else if (rec->lor_layout_version) {
+               lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version +
+                                                 rec->lor_range);
+               lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE);
+       } else {
+               lcm->lcm_layout_gen = cpu_to_le32(1);
+               lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE);
+       }
        lcm->lcm_entry_count = cpu_to_le16(1);
+       /* Currently, we do not know how many mirrors will be, set it as zero
+        * at the beginning. It will be updated when more mirrors are found. */
+       lcm->lcm_mirror_count = 0;
 
        lcme = &lcm->lcm_entries[0];
        lcme->lcme_id = cpu_to_le32(ol->ol_comp_id);
@@ -1819,6 +1844,7 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env,
        lcme->lcme_extent.e_end = cpu_to_le64(ol->ol_comp_end);
        lcme->lcme_offset = cpu_to_le32(offset);
        lcme->lcme_size = cpu_to_le32(lcme_size);
+       lcme->lcme_layout_gen = lcm->lcm_layout_gen;
        if (ol->ol_stripe_count > 1)
                pattern |= LOV_PATTERN_F_HOLE;
 
@@ -1830,15 +1856,66 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env,
        return size;
 }
 
-static int lfsck_layout_add_comp_comp(const struct lu_env *env,
-                                    struct lfsck_instance *lfsck,
-                                    struct thandle *handle,
-                                    struct ost_layout *ol,
-                                    struct dt_object *parent,
-                                    const struct lu_fid *cfid,
-                                    struct lu_buf *buf, __u32 ost_idx,
-                                    __u32 ea_off, int pos)
+static void lfsck_layout_update_lcm(struct lov_comp_md_v1 *lcm,
+                                   struct lov_comp_md_entry_v1 *lcme,
+                                   __u32 version, __u32 range)
+{
+       struct lov_comp_md_entry_v1 *tmp;
+       __u64 start = le64_to_cpu(lcme->lcme_extent.e_start);
+       __u64 end = le64_to_cpu(lcme->lcme_extent.e_end);
+       __u32 gen = version + range;
+       __u32 tmp_gen;
+       int i;
+       __u16 count = le16_to_cpu(lcm->lcm_entry_count);
+       __u16 flags = le16_to_cpu(lcm->lcm_flags);
+
+       if (!gen)
+               gen = 1;
+       lcme->lcme_layout_gen = cpu_to_le32(gen);
+       if (le32_to_cpu(lcm->lcm_layout_gen) < gen)
+               lcm->lcm_layout_gen = cpu_to_le32(gen);
+
+       if (range)
+               lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING);
+       else if (flags == LCM_FL_NONE && le16_to_cpu(lcm->lcm_mirror_count) > 0)
+               lcm->lcm_flags = cpu_to_le16(LCM_FL_RDONLY);
+
+       for (i = 0; i < count; i++) {
+               tmp = &lcm->lcm_entries[i];
+               if (le64_to_cpu(tmp->lcme_extent.e_end) <= start)
+                       continue;
+
+               if (le64_to_cpu(tmp->lcme_extent.e_start) >= end)
+                       continue;
+
+               if (le32_to_cpu(tmp->lcme_flags) & LCME_FL_STALE)
+                       continue;
+
+               tmp_gen = le32_to_cpu(tmp->lcme_layout_gen);
+               /* "lcme_layout_gen == 0" but without LCME_FL_STALE flag,
+                * then it should be the latest version of all mirrors. */
+               if (tmp_gen == 0 || tmp_gen > gen) {
+                       lcme->lcme_flags = cpu_to_le32(
+                               le32_to_cpu(lcme->lcme_flags) | LCME_FL_STALE);
+                       break;
+               }
+
+               if (tmp_gen < gen)
+                       tmp->lcme_flags = cpu_to_le32(
+                               le32_to_cpu(tmp->lcme_flags) | LCME_FL_STALE);
+       }
+}
+
+static int lfsck_layout_add_comp(const struct lu_env *env,
+                                struct lfsck_instance *lfsck,
+                                struct thandle *handle,
+                                struct lu_orphan_rec_v3 *rec,
+                                struct dt_object *parent,
+                                const struct lu_fid *cfid,
+                                struct lu_buf *buf, __u32 ost_idx,
+                                __u32 ea_off, int pos, bool new_mirror)
 {
+       struct ost_layout *ol = &rec->lor_layout;
        struct lov_comp_md_v1 *lcm = buf->lb_buf;
        struct lov_comp_md_entry_v1 *lcme;
        struct lov_mds_md_v1 *lmm;
@@ -1858,8 +1935,9 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env,
         * have reallocated the buf. */
        lcm = buf->lb_buf;
        lcm->lcm_size = cpu_to_le32(size);
-       le32_add_cpu(&lcm->lcm_layout_gen, 1);
        lcm->lcm_entry_count = cpu_to_le16(count + 1);
+       if (new_mirror)
+               le16_add_cpu(&lcm->lcm_mirror_count, 1);
 
        /* 1. Move the component bodies from [pos, count-1] to [pos+1, count]
         *    with distance of 'added'. */
@@ -1924,6 +2002,10 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env,
                                           ol->ol_stripe_size, ea_off,
                                           pattern, ol->ol_stripe_count);
 
+       /* 6. Update mirror related flags and version. */
+       lfsck_layout_update_lcm(lcm, lcme, rec->lor_layout_version,
+                               rec->lor_range);
+
        rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid, buf,
                                       lmm, objs, LU_XATTR_REPLACE, ost_idx,
                                       le32_to_cpu(lcm->lcm_size));
@@ -1931,10 +2013,12 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env,
        CDEBUG(D_LFSCK, "%s: layout LFSCK assistant add new COMP for "
               DFID": parent "DFID", OST-index %u, stripe-index %u, "
               "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, "
-              "comp_end %llu, %s LOV EA hole: rc = %d\n",
+              "comp_end %llu, layout version %u, range %u, "
+              "%s LOV EA hole: rc = %d\n",
               lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)),
               ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count,
               ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end,
+              rec->lor_layout_version, rec->lor_range,
               le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ?
               "with" : "without", rc);
 
@@ -2014,24 +2098,25 @@ static int lfsck_layout_extend_v1v3_lovea(const struct lu_env *env,
 static int lfsck_layout_update_lovea(const struct lu_env *env,
                                     struct lfsck_instance *lfsck,
                                     struct thandle *handle,
-                                    struct ost_layout *ol,
+                                    struct lu_orphan_rec_v3 *rec,
                                     struct dt_object *parent,
                                     const struct lu_fid *cfid,
                                     struct lu_buf *buf, int fl,
                                     __u32 ost_idx, __u32 ea_off)
 {
+       struct ost_layout *ol = &rec->lor_layout;
        struct lov_mds_md_v1 *lmm = NULL;
        struct lov_ost_data_v1 *objs = NULL;
        int rc = 0;
        ENTRY;
 
        if (ol->ol_comp_id != 0)
-               rc = lfsck_layout_new_comp_lovea(env, ol, parent, buf, ea_off,
-                                               &lmm, &objs);
+               rc = lfsck_layout_new_comp_lovea(env, rec, parent, buf, ea_off,
+                                                &lmm, &objs);
        else
-               rc = lfsck_layout_new_v1_lovea(env, lfsck, ol, parent, buf,
-                                              ea_off, &lmm, &objs);
-
+               rc = lfsck_layout_new_v1_lovea(env, lfsck, &rec->lor_layout,
+                                              parent, buf, ea_off, &lmm,
+                                              &objs);
        if (rc > 0)
                rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid,
                                               buf, lmm, objs, fl, ost_idx, rc);
@@ -2039,10 +2124,12 @@ static int lfsck_layout_update_lovea(const struct lu_env *env,
        CDEBUG(D_LFSCK, "%s: layout LFSCK assistant created layout EA for "
               DFID": parent "DFID", OST-index %u, stripe-index %u, "
               "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, "
-              "comp_end %llu, fl %d, %s LOV EA hole: rc = %d\n",
+              "comp_end %llu, layout version %u, range %u, fl %d, "
+              "%s LOV EA hole: rc = %d\n",
               lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)),
               ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count,
-              ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, fl,
+              ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end,
+              rec->lor_layout_version, rec->lor_range, fl,
               le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ?
               "with" : "without", rc);
 
@@ -2052,7 +2139,8 @@ static int lfsck_layout_update_lovea(const struct lu_env *env,
 static int __lfsck_layout_update_pfid(const struct lu_env *env,
                                      struct dt_object *child,
                                      const struct lu_fid *pfid,
-                                     const struct ost_layout *ol, __u32 offset)
+                                     const struct ost_layout *ol, __u32 offset,
+                                     __u32 version, __u32 range)
 {
        struct dt_device        *dev    = lfsck_obj2dev(child);
        struct filter_fid       *ff     = &lfsck_env_info(env)->lti_ff;
@@ -2067,6 +2155,8 @@ static int __lfsck_layout_update_pfid(const struct lu_env *env,
         * parent MDT-object's layout EA. */
        ff->ff_parent.f_stripe_idx = cpu_to_le32(offset);
        ost_layout_cpu_to_le(&ff->ff_layout, ol);
+       ff->ff_layout_version = cpu_to_le32(version);
+       ff->ff_range = cpu_to_le32(range);
        lfsck_buf_init(&buf, ff, sizeof(*ff));
 
        handle = dt_trans_create(env, dev);
@@ -2101,7 +2191,7 @@ static int lfsck_layout_update_pfid(const struct lu_env *env,
                                    struct dt_object *parent,
                                    struct lu_fid *cfid,
                                    struct dt_device *cdev,
-                                   struct ost_layout *ol, __u32 ea_off)
+                                   struct lu_orphan_rec_v3 *rec, __u32 ea_off)
 {
        struct dt_object        *child;
        int                      rc     = 0;
@@ -2113,7 +2203,9 @@ static int lfsck_layout_update_pfid(const struct lu_env *env,
 
        rc = __lfsck_layout_update_pfid(env, child,
                                        lu_object_fid(&parent->do_lu),
-                                       ol, ea_off);
+                                       &rec->lor_layout, ea_off,
+                                       rec->lor_layout_version,
+                                       rec->lor_range);
        lfsck_object_put(env, child);
 
        RETURN(rc == 0 ? 1 : rc);
@@ -2190,7 +2282,7 @@ static int lfsck_lovea_size(struct ost_layout *ol, __u32 ea_off)
 static int lfsck_layout_recreate_parent(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct lfsck_tgt_desc *ltd,
-                                       struct lu_orphan_rec_v2 *rec,
+                                       struct lu_orphan_rec_v3 *rec,
                                        struct lu_fid *cfid,
                                        const char *infix,
                                        const char *type,
@@ -2202,7 +2294,6 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env,
        struct lu_attr                  *la     = &info->lti_la2;
        struct dt_object_format         *dof    = &info->lti_dof;
        struct lfsck_instance           *lfsck  = com->lc_lfsck;
-       struct ost_layout               *ol     = &rec->lor_layout;
        struct lu_fid                   *pfid   = &rec->lor_rec.lor_fid;
        struct lu_fid                   *tfid   = &info->lti_fid3;
        struct dt_device                *dev    = lfsck->li_bottom;
@@ -2264,7 +2355,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env,
         * the stripe(s). The LFSCK will specify the LOV EA via
         * lfsck_layout_update_lovea(). */
 
-       size = lfsck_lovea_size(ol, ea_off);
+       size = lfsck_lovea_size(&rec->lor_layout, ea_off);
        if (ea_buf->lb_len < size) {
                lu_buf_realloc(ea_buf, size);
                if (ea_buf->lb_buf == NULL)
@@ -2341,7 +2432,7 @@ again:
        dt_write_lock(env, pobj, 0);
        rc = dt_create(env, pobj, la, NULL, dof, th);
        if (rc == 0)
-               rc = lfsck_layout_update_lovea(env, lfsck, th, ol, pobj, cfid,
+               rc = lfsck_layout_update_lovea(env, lfsck, th, rec, pobj, cfid,
                        &lov_buf, LU_XATTR_CREATE, ltd->ltd_index, ea_off);
        dt_write_unlock(env, pobj);
        if (rc < 0)
@@ -2358,7 +2449,10 @@ again:
                th = NULL;
 
                /* The 2nd transaction. */
-               rc = __lfsck_layout_update_pfid(env, cobj, pfid, ol, ea_off);
+               rc = __lfsck_layout_update_pfid(env, cobj, pfid,
+                                               &rec->lor_layout, ea_off,
+                                               rec->lor_layout_version,
+                                               rec->lor_range);
        }
 
        GOTO(stop, rc);
@@ -2561,7 +2655,7 @@ put:
 static int lfsck_layout_conflict_create(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct lfsck_tgt_desc *ltd,
-                                       struct lu_orphan_rec_v2 *rec,
+                                       struct lu_orphan_rec_v3 *rec,
                                        struct dt_object *parent,
                                        struct lu_fid *cfid,
                                        struct lu_buf *ea_buf,
@@ -2665,7 +2759,7 @@ out:
 static int lfsck_layout_recreate_lovea(const struct lu_env *env,
                                       struct lfsck_component *com,
                                       struct lfsck_tgt_desc *ltd,
-                                      struct lu_orphan_rec_v2 *rec,
+                                      struct lu_orphan_rec_v3 *rec,
                                       struct dt_object *parent,
                                       struct lu_fid *cfid,
                                       __u32 ost_idx, __u32 ea_off)
@@ -2691,8 +2785,10 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env,
        int                       rc            = 0;
        int                       rc1;
        int                       i;
-       __u16                     count;
-       bool                      locked        = false;
+       int pos = 0;
+       __u16 count;
+       bool locked = false;
+       bool new_mirror = true;
        ENTRY;
 
        rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
@@ -2702,11 +2798,12 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env,
                CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to recreate "
                       "LOV EA for "DFID": parent "DFID", OST-index %u, "
                       "stripe-index %u, comp_id %u, comp_start %llu, "
-                      "comp_end %llu: rc = %d\n",
+                      "comp_end %llu, layout version %u, range %u: rc = %d\n",
                       lfsck_lfsck2name(lfsck), PFID(cfid),
                       PFID(lfsck_dto2fid(parent)), ost_idx, ea_off,
                       ol->ol_comp_id, ol->ol_comp_start,
-                      ol->ol_comp_end, rc);
+                      ol->ol_comp_end, rec->lor_layout_version,
+                      rec->lor_range, rc);
 
                RETURN(rc);
        }
@@ -2777,7 +2874,7 @@ again:
 
                LASSERT(buf->lb_len >= lovea_size);
 
-               rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent,
+               rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent,
                                               cfid, buf, fl, ost_idx, ea_off);
 
                GOTO(unlock_parent, rc);
@@ -2793,28 +2890,41 @@ again:
 
                LASSERT(buf->lb_len >= lovea_size);
 
-               rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent,
+               rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent,
                                               cfid, buf, fl, ost_idx, ea_off);
 
                GOTO(unlock_parent, rc);
        }
 
        /* For other unknown magic/pattern, keep the current LOV EA. */
-       if (rc1 != 0)
+       if (rc1 == -EOPNOTSUPP)
+               GOTO(unlock_parent, rc1 = 0);
+
+       if (rc1)
                GOTO(unlock_parent, rc = rc1);
 
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_COMP_V1) {
                __u64 start;
                __u64 end;
+               __u16 mirror_id0 = mirror_id_of(ol->ol_comp_id);
+               __u16 mirror_id1;
 
                lcm = buf->lb_buf;
                count = le16_to_cpu(lcm->lcm_entry_count);
-               for (i = 0; i < count; i++) {
+               for (i = 0; i < count; pos = ++i) {
                        lcme = &lcm->lcm_entries[i];
                        start = le64_to_cpu(lcme->lcme_extent.e_start);
                        end = le64_to_cpu(lcme->lcme_extent.e_end);
+                       mirror_id1 = mirror_id_of(le32_to_cpu(lcme->lcme_id));
+
+                       if (mirror_id0 > mirror_id1)
+                               continue;
 
+                       if (mirror_id0 < mirror_id1)
+                               break;
+
+                       new_mirror = false;
                        if (end <= ol->ol_comp_start)
                                continue;
 
@@ -2827,8 +2937,8 @@ again:
                        goto further;
                }
 
-               rc = lfsck_layout_add_comp_comp(env, lfsck, handle, ol, parent,
-                                              cfid, buf, ost_idx, ea_off, i);
+               rc = lfsck_layout_add_comp(env, lfsck, handle, rec, parent,
+                               cfid, buf, ost_idx, ea_off, pos, new_mirror);
 
                GOTO(unlock_parent, rc);
        }
@@ -2851,8 +2961,14 @@ further:
                        goto again;
                }
 
-               if (lcme && !(flags & LCME_FL_INIT))
+               if (lcm) {
+                       LASSERT(lcme);
+
                        lcme->lcme_flags = cpu_to_le32(flags | LCME_FL_INIT);
+                       lfsck_layout_update_lcm(lcm, lcme,
+                                               rec->lor_layout_version,
+                                               rec->lor_range);
+               }
 
                rc = lfsck_layout_extend_v1v3_lovea(env, lfsck, handle, ol,
                                        parent, cfid, buf, ost_idx, ea_off);
@@ -2915,11 +3031,12 @@ further:
                                        GOTO(unlock_parent, rc = -EINVAL);
                                }
 
-                               le32_add_cpu(&lcm->lcm_layout_gen, 1);
                                lovea_size = le32_to_cpu(lcm->lcm_size);
-                               if (!(flags & LCME_FL_INIT))
-                                       lcme->lcme_flags = cpu_to_le32(flags |
-                                                               LCME_FL_INIT);
+                               lcme->lcme_flags = cpu_to_le32(flags |
+                                                              LCME_FL_INIT);
+                               lfsck_layout_update_lcm(lcm, lcme,
+                                                       rec->lor_layout_version,
+                                                       rec->lor_range);
                        }
 
                        LASSERTF(buf->lb_len >= lovea_size,
@@ -2969,7 +3086,7 @@ further:
                                lfsck_ibits_unlock(&lh, LCK_EX);
                                rc = lfsck_layout_update_pfid(env, com, parent,
                                                        cfid, ltd->ltd_tgt,
-                                                       ol, i);
+                                                       rec, i);
 
                                CDEBUG(D_LFSCK, "%s layout LFSCK assistant "
                                       "updated OST-object's pfid for "DFID
@@ -3019,7 +3136,7 @@ unlock_layout:
 static int lfsck_layout_scan_orphan_one(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct lfsck_tgt_desc *ltd,
-                                       struct lu_orphan_rec_v2 *rec,
+                                       struct lu_orphan_rec_v3 *rec,
                                        struct lu_fid *cfid)
 {
        struct lfsck_layout     *lo     = com->lc_file_ram;
@@ -3158,7 +3275,7 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env,
 
        do {
                struct dt_key           *key;
-               struct lu_orphan_rec_v2 *rec = &info->lti_rec;
+               struct lu_orphan_rec_v3 *rec = &info->lti_rec;
 
                if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) &&
                    unlikely(!thread_is_running(&lfsck->li_thread)))
@@ -3197,9 +3314,10 @@ log:
        return rc > 0 ? 0 : rc;
 }
 
-static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol,
+static int lfsck_lov2layout(struct lov_mds_md_v1 *lmm, struct filter_fid *ff,
                            __u32 comp_id)
 {
+       struct ost_layout *ol = &ff->ff_layout;
        __u32 magic = le32_to_cpu(lmm->lmm_magic);
        int rc = 0;
        ENTRY;
@@ -3210,6 +3328,8 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol,
                ol->ol_comp_start = 0;
                ol->ol_comp_end = 0;
                ol->ol_comp_id = 0;
+               ff->ff_layout_version = 0;
+               ff->ff_range = 0;
        } else if (magic == LOV_MAGIC_COMP_V1) {
                struct lov_comp_md_v1 *lcm = (struct lov_comp_md_v1 *)lmm;
                struct lov_comp_md_entry_v1 *lcme = NULL;
@@ -3236,6 +3356,8 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol,
                ol->ol_comp_start = le64_to_cpu(lcme->lcme_extent.e_start);
                ol->ol_comp_end = le64_to_cpu(lcme->lcme_extent.e_end);
                ol->ol_comp_id = le32_to_cpu(lcme->lcme_id);
+               ff->ff_layout_version = le32_to_cpu(lcme->lcme_layout_gen);
+               ff->ff_range = 0;
        } else {
                GOTO(out, rc = -EINVAL);
        }
@@ -3279,7 +3401,6 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env,
 {
        struct lfsck_thread_info *info = lfsck_env_info(env);
        struct filter_fid *ff = &info->lti_ff;
-       struct ost_layout *ol = &ff->ff_layout;
        struct dt_object_format *dof = &info->lti_dof;
        struct lu_attr *la = &info->lti_la;
        struct lfsck_instance *lfsck = com->lc_lfsck;
@@ -3319,10 +3440,12 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env,
        ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off);
 
        rc = lfsck_layout_get_lovea(env, parent, tbuf);
-       if (rc < 0)
+       if (unlikely(rc == -ENODATA))
+               rc = 0;
+       if (rc <= 0)
                GOTO(unlock1, rc);
 
-       rc = lfsck_lmm2layout(tbuf->lb_buf, ol, comp_id);
+       rc = lfsck_lov2layout(tbuf->lb_buf, ff, comp_id);
        if (rc)
                GOTO(unlock1, rc);
 
@@ -3359,6 +3482,8 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env,
                int idx2;
 
                rc = lfsck_layout_get_lovea(env, parent, lovea);
+               if (unlikely(rc == -ENODATA))
+                       rc = 0;
                if (rc <= 0)
                        GOTO(unlock2, rc);
 
@@ -3531,7 +3656,6 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env,
 {
        struct lfsck_thread_info        *info   = lfsck_env_info(env);
        struct filter_fid               *ff     = &info->lti_ff;
-       struct ost_layout               *ol     = &ff->ff_layout;
        struct dt_object                *child  = llr->llr_child;
        struct dt_device                *dev    = lfsck_obj2dev(child);
        const struct lu_fid             *tfid   = lu_object_fid(&parent->do_lu);
@@ -3556,10 +3680,12 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env,
        ff->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx);
 
        rc = lfsck_layout_get_lovea(env, parent, tbuf);
-       if (rc < 0)
+       if (unlikely(rc == -ENODATA))
+               rc = 0;
+       if (rc <= 0)
                GOTO(unlock1, rc);
 
-       rc = lfsck_lmm2layout(tbuf->lb_buf, ol, llr->llr_comp_id);
+       rc = lfsck_lov2layout(tbuf->lb_buf, ff, llr->llr_comp_id);
        if (rc)
                GOTO(unlock1, rc);
 
@@ -3752,8 +3878,10 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env,
                GOTO(unlock, rc = 0);
 
        rc = lfsck_layout_get_lovea(env, parent, buf);
-       if (unlikely(!rc || rc == -ENODATA))
-               GOTO(unlock, rc = 0);
+       if (unlikely(rc == -ENODATA))
+               rc = 0;
+       if (rc <= 0)
+               GOTO(unlock, rc);
 
        lmm = buf->lb_buf;
        magic = le32_to_cpu(lmm->lmm_magic);
@@ -3955,9 +4083,12 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
         * is in such layout. If yes, it is multiple referenced, otherwise it
         * is unmatched referenced case. */
        rc = lfsck_layout_get_lovea(env, tobj, buf);
-       if (rc == 0 || rc == -ENOENT)
+       if (rc == 0 || rc == -ENODATA || rc == -ENOENT)
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
 
+       if (unlikely(rc == -EOPNOTSUPP))
+               GOTO(out, rc = LLIT_NONE);
+
        if (rc < 0)
                GOTO(out, rc);
 
@@ -4745,9 +4876,6 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env,
        if (rc < 0)
                GOTO(unlock, rc);
 
-       if (rc == 0)
-               GOTO(unlock, rc = -ENODATA);
-
        lmm = buf->lb_buf;
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_COMP_V1) {
@@ -4900,6 +5028,8 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env,
 
        rc = __lfsck_layout_update_pfid(env, obj, &lrl->lrl_ff_client.ff_parent,
                                        &lrl->lrl_ff_client.ff_layout,
+                                       lrl->lrl_ff_client.ff_layout_version,
+                                       lrl->lrl_ff_client.ff_range,
                                        lrl->lrl_ff_client.ff_parent.f_ver);
 
        GOTO(unlock, rc);
@@ -5434,10 +5564,12 @@ again:
                GOTO(out, rc = 0);
 
        rc = lfsck_layout_get_lovea(env, obj, buf);
-       if (rc <= 0)
+       if (rc == -EINVAL || rc == -ENODATA || rc == -EOPNOTSUPP)
                /* Skip bad lov EA during the 1st cycle scanning, and
                 * try to recover it via orphan in the 2nd scanning. */
-               GOTO(out, rc = (rc == -EINVAL ? 0 : rc));
+               rc = 0;
+       if (rc <= 0)
+               GOTO(out, rc);
 
        size = rc;
        lmm = buf->lb_buf;
@@ -6791,7 +6923,7 @@ struct lfsck_orphan_it {
        struct lfsck_rbtree_node         *loi_lrn;
        struct lfsck_layout_slave_target *loi_llst;
        struct lu_fid                     loi_key;
-       struct lu_orphan_rec_v2           loi_rec;
+       struct lu_orphan_rec_v3           loi_rec;
        __u64                             loi_hash;
        unsigned int                      loi_over:1;
 };
@@ -7047,7 +7179,7 @@ static int lfsck_orphan_it_next(const struct lu_env *env,
        struct lu_attr                  *la     = &info->lti_la;
        struct lfsck_orphan_it          *it     = (struct lfsck_orphan_it *)di;
        struct lu_fid                   *key    = &it->loi_key;
-       struct lu_orphan_rec_v2         *rec    = &it->loi_rec;
+       struct lu_orphan_rec_v3         *rec    = &it->loi_rec;
        struct ost_layout               *ol     = &rec->lor_layout;
        struct lfsck_component          *com    = it->loi_com;
        struct lfsck_instance           *lfsck  = com->lc_lfsck;
@@ -7188,6 +7320,8 @@ again1:
                        rec->lor_rec.lor_uid = la->la_uid;
                        rec->lor_rec.lor_gid = la->la_gid;
                        memset(ol, 0, sizeof(*ol));
+                       rec->lor_layout_version = 0;
+                       rec->lor_range = 0;
 
                        GOTO(out, rc = 0);
                }
@@ -7223,13 +7357,18 @@ again1:
        rec->lor_rec.lor_uid = la->la_uid;
        rec->lor_rec.lor_gid = la->la_gid;
        ost_layout_le_to_cpu(ol, &ff->ff_layout);
+       rec->lor_layout_version =
+               le32_to_cpu(ff->ff_layout_version & ~LU_LAYOUT_RESYNC);
+       rec->lor_range = le32_to_cpu(ff->ff_range);
 
        CDEBUG(D_LFSCK, "%s: return orphan "DFID", PFID "DFID", owner %u:%u, "
               "stripe size %u, stripe count %u, COMP id %u, COMP start %llu, "
-              "COMP end %llu\n", lfsck_lfsck2name(com->lc_lfsck), PFID(key),
+              "COMP end %llu, layout version %u, range %u\n",
+              lfsck_lfsck2name(com->lc_lfsck), PFID(key),
               PFID(&rec->lor_rec.lor_fid), rec->lor_rec.lor_uid,
               rec->lor_rec.lor_gid, ol->ol_stripe_size, ol->ol_stripe_count,
-              ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end);
+              ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end,
+              rec->lor_layout_version, rec->lor_range);
 
        GOTO(out, rc = 0);
 
@@ -7292,7 +7431,7 @@ static int lfsck_orphan_it_rec(const struct lu_env *env,
 {
        struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
 
-       *(struct lu_orphan_rec_v2 *)rec = it->loi_rec;
+       *(struct lu_orphan_rec_v3 *)rec = it->loi_rec;
 
        return 0;
 }
index 5a8241a..d52bcc5 100644 (file)
@@ -587,8 +587,8 @@ const struct dt_index_features dt_lfsck_layout_orphan_features = {
        .dif_flags              = 0,
        .dif_keysize_min        = sizeof(struct lu_fid),
        .dif_keysize_max        = sizeof(struct lu_fid),
-       .dif_recsize_min        = sizeof(struct lu_orphan_rec_v2),
-       .dif_recsize_max        = sizeof(struct lu_orphan_rec_v2),
+       .dif_recsize_min        = sizeof(struct lu_orphan_rec_v3),
+       .dif_recsize_max        = sizeof(struct lu_orphan_rec_v3),
        .dif_ptrsize            = 4
 };
 EXPORT_SYMBOL(dt_lfsck_layout_orphan_features);
index 3e1edc2..f1a14fb 100644 (file)
@@ -1573,7 +1573,9 @@ int osp_declare_destroy(const struct lu_env *env, struct dt_object *dt,
        ENTRY;
 
        LASSERT(!osp->opd_connect_mdt);
-       rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th);
+
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ))
+               rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th);
 
        RETURN(rc);
 }
@@ -1607,11 +1609,14 @@ static int osp_destroy(const struct lu_env *env, struct dt_object *dt,
        o->opo_non_exist = 1;
 
        LASSERT(!osp->opd_connect_mdt);
-       /* once transaction is committed put proper command on
-        * the queue going to our OST. */
-       rc = osp_sync_add(env, o, MDS_UNLINK64_REC, th, NULL);
-       if (rc < 0)
-               RETURN(rc);
+
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ)) {
+               /* once transaction is committed put proper command on
+                * the queue going to our OST. */
+               rc = osp_sync_add(env, o, MDS_UNLINK64_REC, th, NULL);
+               if (rc < 0)
+                       RETURN(rc);
+       }
 
        /* not needed in cache any more */
        set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags);
@@ -1973,6 +1978,13 @@ again:
                it->ooi_pos_ent++;
                if (it->ooi_pos_ent < idxpage->lip_nr) {
                        if (it->ooi_rec_size ==
+                                       sizeof(struct lu_orphan_rec_v3)) {
+                               it->ooi_ent =
+                               (struct lu_orphan_ent_v3 *)idxpage->lip_entries+
+                                                       it->ooi_pos_ent;
+                               if (it->ooi_swab)
+                                       lustre_swab_orphan_ent_v3(it->ooi_ent);
+                       } else if (it->ooi_rec_size ==
                                        sizeof(struct lu_orphan_rec_v2)) {
                                it->ooi_ent =
                                (struct lu_orphan_ent_v2 *)idxpage->lip_entries+
@@ -2031,7 +2043,13 @@ static int osp_orphan_it_rec(const struct lu_env *env, const struct dt_it *di,
        struct osp_it *it = (struct osp_it *)di;
 
        if (likely(it->ooi_ent)) {
-               if (it->ooi_rec_size == sizeof(struct lu_orphan_rec_v2)) {
+               if (it->ooi_rec_size == sizeof(struct lu_orphan_rec_v3)) {
+                       struct lu_orphan_ent_v3 *ent =
+                               (struct lu_orphan_ent_v3 *)it->ooi_ent;
+
+                       *(struct lu_orphan_rec_v3 *)rec = ent->loe_rec;
+               } else if (it->ooi_rec_size ==
+                               sizeof(struct lu_orphan_rec_v2)) {
                        struct lu_orphan_ent_v2 *ent =
                                (struct lu_orphan_ent_v2 *)it->ooi_ent;
 
index d07c952..21163e6 100644 (file)
@@ -2312,7 +2312,9 @@ void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum)
                __swab64s(&ent->lcme_extent.e_end);
                __swab32s(&ent->lcme_offset);
                __swab32s(&ent->lcme_size);
-               CLASSERT(offsetof(typeof(*ent), lcme_padding) != 0);
+               __swab32s(&ent->lcme_layout_gen);
+               CLASSERT(offsetof(typeof(*ent), lcme_padding_1) != 0);
+               CLASSERT(offsetof(typeof(*ent), lcme_padding_2) != 0);
 
                v1 = (struct lov_user_md_v1 *)((char *)lum + off);
                stripe_count = v1->lmm_stripe_count;
@@ -2842,6 +2844,18 @@ void lustre_swab_orphan_ent_v2(struct lu_orphan_ent_v2 *ent)
 }
 EXPORT_SYMBOL(lustre_swab_orphan_ent_v2);
 
+void lustre_swab_orphan_ent_v3(struct lu_orphan_ent_v3 *ent)
+{
+       lustre_swab_lu_fid(&ent->loe_key);
+       lustre_swab_orphan_rec(&ent->loe_rec.lor_rec);
+       lustre_swab_ost_layout(&ent->loe_rec.lor_layout);
+       __swab32s(&ent->loe_rec.lor_layout_version);
+       __swab32s(&ent->loe_rec.lor_range);
+       CLASSERT(offsetof(typeof(ent->loe_rec), lor_padding_1) != 0);
+       CLASSERT(offsetof(typeof(ent->loe_rec), lor_padding_2) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_orphan_ent_v3);
+
 void lustre_swab_ladvise(struct lu_ladvise *ladvise)
 {
        __swab16s(&ladvise->lla_advice);
index 631cdef..3aa9b98 100644 (file)
@@ -1732,10 +1732,18 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_size));
        LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size));
-       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding) == 32, "found %lld\n",
-                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding));
-       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding) == 16, "found %lld\n",
-                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1) == 36, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2) == 40, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
        LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",
index b39a59a..56fab70 100644 (file)
@@ -5204,6 +5204,323 @@ test_35()
 }
 run_test 35 "LFSCK can rebuild the lost agent entry"
 
+# It will be replaced by "lfs getstripe -N" via LU-11124.
+get_mirrors_count() {
+       local mirrors=$($LFS getstripe $1 |
+                       awk '/lcm_mirror_count/ { print $2 }')
+       echo $mirrors
+}
+
+test_36a() {
+       [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
+
+       echo "#####"
+       echo "The target MDT-object's LOV EA corrupted as to lose one of the "
+       echo "mirrors information. The layout LFSCK should rebuild the LOV EA "
+       echo "with the PFID EA of related OST-object(s) belong to the mirror."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
+               -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 ||
+               error "(0) Fail to create mirror file $DIR/$tdir/f0"
+       $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
+               -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f1 ||
+               error "(1) Fail to create mirror file $DIR/$tdir/f1"
+       $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
+               -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f2 ||
+               error "(2) Fail to create mirror file $DIR/$tdir/f2"
+
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
+               error "(3) Fail to write $DIR/$tdir/f0"
+       dd if=/dev/zero of=$DIR/$tdir/f1 bs=1M count=4 ||
+               error "(4) Fail to write $DIR/$tdir/f1"
+       dd if=/dev/zero of=$DIR/$tdir/f2 bs=1M count=4 ||
+               error "(5) Fail to write $DIR/$tdir/f2"
+
+       $LFS mirror resync $DIR/$tdir/f0 ||
+               error "(6) Fail to resync $DIR/$tdir/f0"
+       $LFS mirror resync $DIR/$tdir/f1 ||
+               error "(7) Fail to resync $DIR/$tdir/f1"
+       $LFS mirror resync $DIR/$tdir/f2 ||
+               error "(8) Fail to resync $DIR/$tdir/f2"
+
+       cancel_lru_locks mdc
+       cancel_lru_locks osc
+
+       $LFS getstripe $DIR/$tdir/f0 ||
+               error "(9) Fail to getstripe for $DIR/$tdir/f0"
+       $LFS getstripe $DIR/$tdir/f1 ||
+               error "(10) Fail to getstripe for $DIR/$tdir/f1"
+       $LFS getstripe $DIR/$tdir/f2 ||
+               error "(11) Fail to getstripe for $DIR/$tdir/f2"
+
+       echo "Inject failure, to simulate the case of missing one mirror in LOV"
+       #define OBD_FAIL_LFSCK_LOST_MDTOBJ      0x1616
+       do_facet mds1 $LCTL set_param fail_loc=0x1616
+
+       $LFS mirror split --mirror-id 1 -d $DIR/$tdir/f0 ||
+               error "(12) Fail to split 1st mirror from $DIR/$tdir/f0"
+       $LFS mirror split --mirror-id 2 -d $DIR/$tdir/f1 ||
+               error "(13) Fail to split 2nd mirror from $DIR/$tdir/f1"
+       $LFS mirror split --mirror-id 3 -d $DIR/$tdir/f2 ||
+               error "(14) Fail to split 3rd mirror from $DIR/$tdir/f2"
+
+       sync
+       sleep 2
+       do_facet mds1 $LCTL set_param fail_loc=0
+
+       $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" &&
+               error "(15) The 1st of mirror is not destroyed"
+       $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" &&
+               error "(16) The 2nd of mirror is not destroyed"
+       $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" &&
+               error "(17) The 3rd of mirror is not destroyed"
+
+       local mirrors
+
+       mirrors=$(get_mirrors_count $DIR/$tdir/f0)
+       [ $mirrors -eq 2 ] || error "(18) $DIR/$tdir/f0 has $mirrors mirrors"
+       mirrors=$(get_mirrors_count $DIR/$tdir/f1)
+       [ $mirrors -eq 2 ] || error "(19) $DIR/$tdir/f1 has $mirrors mirrors"
+       mirrors=$(get_mirrors_count $DIR/$tdir/f2)
+       [ $mirrors -eq 2 ] || error "(20) $DIR/$tdir/f2 has $mirrors mirrors"
+
+       echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+       $START_LAYOUT -r -o || error "(21) Fail to start LFSCK for layout!"
+
+       for k in $(seq $MDSCOUNT); do
+               # The LFSCK status query internal is 30 seconds. For the case
+               # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+               # time to guarantee the status sync up.
+               wait_update_facet mds${k} "$LCTL get_param -n \
+                       mdd.$(facet_svc mds${k}).lfsck_layout |
+                       awk '/^status/ { print \\\$2 }'" "completed" 32 ||
+                       error "(22) MDS${k} is not the expected 'completed'"
+       done
+
+       for k in $(seq $OSTCOUNT); do
+               local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+                               obdfilter.$(facet_svc ost${k}).lfsck_layout |
+                               awk '/^status/ { print $2 }')
+               [ "$cur_status" == "completed" ] ||
+               error "(23) OST${k} Expect 'completed', but got '$cur_status'"
+       done
+
+       local repaired=$(do_facet mds1 $LCTL get_param -n \
+                        mdd.$(facet_svc mds1).lfsck_layout |
+                        awk '/^repaired_orphan/ { print $2 }')
+       [ $repaired -eq 9 ] ||
+               error "(24) Expect 9 fixed on mds1, but got: $repaired"
+
+       mirrors=$(get_mirrors_count $DIR/$tdir/f0)
+       [ $mirrors -eq 3 ] || error "(25) $DIR/$tdir/f0 has $mirrors mirrors"
+       mirrors=$(get_mirrors_count $DIR/$tdir/f1)
+       [ $mirrors -eq 3 ] || error "(26) $DIR/$tdir/f1 has $mirrors mirrors"
+       mirrors=$(get_mirrors_count $DIR/$tdir/f2)
+       [ $mirrors -eq 3 ] || error "(27) $DIR/$tdir/f2 has $mirrors mirrors"
+
+       $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" || {
+               $LFS getstripe $DIR/$tdir/f0
+               error "(28) The 1st of mirror is not recovered"
+       }
+
+       $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" || {
+               $LFS getstripe $DIR/$tdir/f1
+               error "(29) The 2nd of mirror is not recovered"
+       }
+
+       $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" || {
+               $LFS getstripe $DIR/$tdir/f2
+               error "(30) The 3rd of mirror is not recovered"
+       }
+}
+run_test 36a "rebuild LOV EA for mirrored file (1)"
+
+test_36b() {
+       [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
+
+       echo "#####"
+       echo "The mirrored file lost its MDT-object, but relatd OST-objects "
+       echo "are still there. The layout LFSCK should rebuild the LOV EA "
+       echo "with the PFID EA of related OST-object(s) belong to the file. "
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
+               -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 ||
+               error "(0) Fail to create mirror file $DIR/$tdir/f0"
+
+       local fid=$($LFS path2fid $DIR/$tdir/f0)
+
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
+               error "(1) Fail to write $DIR/$tdir/f0"
+       $LFS mirror resync $DIR/$tdir/f0 ||
+               error "(2) Fail to resync $DIR/$tdir/f0"
+
+       cancel_lru_locks mdc
+       cancel_lru_locks osc
+
+       $LFS getstripe $DIR/$tdir/f0 ||
+               error "(3) Fail to getstripe for $DIR/$tdir/f0"
+
+       echo "Inject failure, to simulate the case of missing the MDT-object"
+       #define OBD_FAIL_LFSCK_LOST_MDTOBJ      0x1616
+       do_facet mds1 $LCTL set_param fail_loc=0x1616
+       rm -f $DIR/$tdir/f0 || error "(4) Fail to remove $DIR/$tdir/f0"
+
+       sync
+       sleep 2
+       do_facet mds1 $LCTL set_param fail_loc=0
+
+       echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+       $START_LAYOUT -r -o || error "(5) Fail to start LFSCK for layout!"
+
+       for k in $(seq $MDSCOUNT); do
+               # The LFSCK status query internal is 30 seconds. For the case
+               # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+               # time to guarantee the status sync up.
+               wait_update_facet mds${k} "$LCTL get_param -n \
+                       mdd.$(facet_svc mds${k}).lfsck_layout |
+                       awk '/^status/ { print \\\$2 }'" "completed" 32 ||
+                       error "(6) MDS${k} is not the expected 'completed'"
+       done
+
+       for k in $(seq $OSTCOUNT); do
+               local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+                               obdfilter.$(facet_svc ost${k}).lfsck_layout |
+                               awk '/^status/ { print $2 }')
+               [ "$cur_status" == "completed" ] ||
+               error "(7) OST${k} Expect 'completed', but got '$cur_status'"
+       done
+
+       local count=$(do_facet mds1 $LCTL get_param -n \
+                     mdd.$(facet_svc mds1).lfsck_layout |
+                     awk '/^repaired_orphan/ { print $2 }')
+       [ $count -eq 9 ] || error "(8) Expect 9 fixed on mds1, but got: $count"
+
+       local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0
+       count=$($LFS getstripe $name | awk '/lcm_mirror_count/ { print $2 }')
+       [ $count -eq 3 ] || error "(9) $DIR/$tdir/f0 has $count mirrors"
+
+       count=$($LFS getstripe $name | awk '/lcm_entry_count/ { print $2 }')
+       [ $count -eq 6 ] || error "(10) $DIR/$tdir/f0 has $count entries"
+
+       $LFS getstripe $name | grep "lcme_mirror_id:.*1" || {
+               $LFS getstripe $name
+               error "(11) The 1st of mirror is not recovered"
+       }
+
+       $LFS getstripe $name | grep "lcme_mirror_id:.*2" || {
+               $LFS getstripe $name
+               error "(12) The 2nd of mirror is not recovered"
+       }
+
+       $LFS getstripe $name | grep "lcme_mirror_id:.*3" || {
+               $LFS getstripe $name
+               error "(13) The 3rd of mirror is not recovered"
+       }
+}
+run_test 36b "rebuild LOV EA for mirrored file (2)"
+
+test_36c() {
+       [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return
+
+       echo "#####"
+       echo "The mirrored file has been modified, not resynced yet, then "
+       echo "lost its MDT-object, but relatd OST-objects are still there. "
+       echo "The layout LFSCK should rebuild the LOV EA and relatd status "
+       echo "with the PFID EA of related OST-object(s) belong to the file. "
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \
+               $DIR/$tdir/f0 ||
+               error "(0) Fail to create mirror file $DIR/$tdir/f0"
+
+       local fid=$($LFS path2fid $DIR/$tdir/f0)
+
+       # The 1st dd && resync makes all related OST-objects have been written
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
+               error "(1.1) Fail to write $DIR/$tdir/f0"
+       $LFS mirror resync $DIR/$tdir/f0 ||
+               error "(1.2) Fail to resync $DIR/$tdir/f0"
+       # The 2nd dd makes one mirror to be stale
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 ||
+               error "(1.3) Fail to write $DIR/$tdir/f0"
+
+       cancel_lru_locks mdc
+       cancel_lru_locks osc
+
+       $LFS getstripe $DIR/$tdir/f0 ||
+               error "(2) Fail to getstripe for $DIR/$tdir/f0"
+
+       local saved_flags1=$($LFS getstripe $DIR/$tdir/f0 | head -n 10 |
+                            awk '/lcme_flags/ { print $2 }')
+       local saved_flags2=$($LFS getstripe $DIR/$tdir/f0 | tail -n 10 |
+                            awk '/lcme_flags/ { print $2 }')
+
+       echo "Inject failure, to simulate the case of missing the MDT-object"
+       #define OBD_FAIL_LFSCK_LOST_MDTOBJ      0x1616
+       do_facet mds1 $LCTL set_param fail_loc=0x1616
+       rm -f $DIR/$tdir/f0 || error "(3) Fail to remove $DIR/$tdir/f0"
+
+       sync
+       sleep 2
+       do_facet mds1 $LCTL set_param fail_loc=0
+
+       echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
+       $START_LAYOUT -r -o || error "(4) Fail to start LFSCK for layout!"
+
+       for k in $(seq $MDSCOUNT); do
+               # The LFSCK status query internal is 30 seconds. For the case
+               # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+               # time to guarantee the status sync up.
+               wait_update_facet mds${k} "$LCTL get_param -n \
+                       mdd.$(facet_svc mds${k}).lfsck_layout |
+                       awk '/^status/ { print \\\$2 }'" "completed" 32 ||
+                       error "(5) MDS${k} is not the expected 'completed'"
+       done
+
+       for k in $(seq $OSTCOUNT); do
+               local cur_status=$(do_facet ost${k} $LCTL get_param -n \
+                               obdfilter.$(facet_svc ost${k}).lfsck_layout |
+                               awk '/^status/ { print $2 }')
+               [ "$cur_status" == "completed" ] ||
+               error "(6) OST${k} Expect 'completed', but got '$cur_status'"
+       done
+
+       local count=$(do_facet mds1 $LCTL get_param -n \
+                     mdd.$(facet_svc mds1).lfsck_layout |
+                     awk '/^repaired_orphan/ { print $2 }')
+       [ $count -eq 6 ] || error "(7) Expect 9 fixed on mds1, but got: $count"
+
+       local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0
+       count=$($LFS getstripe $name | awk '/lcm_mirror_count/ { print $2 }')
+       [ $count -eq 2 ] || error "(8) $DIR/$tdir/f0 has $count mirrors"
+
+       count=$($LFS getstripe $name | awk '/lcm_entry_count/ { print $2 }')
+       [ $count -eq 4 ] || error "(9) $DIR/$tdir/f0 has $count entries"
+
+       local flags=$($LFS getstripe $name | head -n 10 |
+               awk '/lcme_flags/ { print $2 }')
+       [ "$flags" == "$saved_flags1" ] || {
+               $LFS getstripe $name
+               error "(10) expect flags $saved_flags1, got $flags"
+       }
+
+       flags=$($LFS getstripe $name | tail -n 10 |
+               awk '/lcme_flags/ { print $2 }')
+       [ "$flags" == "$saved_flags2" ] || {
+               $LFS getstripe $name
+               error "(11) expect flags $saved_flags2, got $flags"
+       }
+}
+run_test 36c "rebuild LOV EA for mirrored file (3)"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}
index dae1a98..ab8be2a 100644 (file)
@@ -795,7 +795,9 @@ check_lov_comp_md_entry_v1(void)
        CHECK_MEMBER(lov_comp_md_entry_v1, lcme_extent);
        CHECK_MEMBER(lov_comp_md_entry_v1, lcme_offset);
        CHECK_MEMBER(lov_comp_md_entry_v1, lcme_size);
-       CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding);
+       CHECK_MEMBER(lov_comp_md_entry_v1, lcme_layout_gen);
+       CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding_1);
+       CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding_2);
 
        CHECK_VALUE_X(LCME_FL_INIT);
        CHECK_VALUE_X(LCME_FL_NEG);
index 116f70a..cb64cd4 100644 (file)
@@ -1753,10 +1753,18 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_size));
        LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size));
-       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding) == 32, "found %lld\n",
-                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding));
-       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding) == 16, "found %lld\n",
-                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1) == 36, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1));
+       LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2) == 40, "found %lld\n",
+                (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2));
+       LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2));
        LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
                (unsigned)LCME_FL_INIT);
        LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",