From 36ba989752c62cc76b06089373fcd6cec6da9008 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sun, 15 Jul 2018 05:15:21 +0800 Subject: [PATCH] LU-10288 lfsck: layout LFSCK for mirrored file This patch makes the layout LFSCK to support mirrored file as following: 1. Verify mirrored file's LOV EA and PFID EA, including all kinds of inconsistencies as non-mirrored file may hit. 2. Rebuild mirrored file's LOV EA from orphan OST-objects, recover the component's status/flags before the crash: init, stale, and so on. 3. For the mirrored file with dangling reference (OST object), it does NOT rebuild the lost OST-object from other replica, instead, it either reports the curruption or re-create empty OST-object that follows the same rules as non-mirrored case. Some code cleanup and new test cases for LFSCK against mirrored file. Signed-off-by: Fan Yong Change-Id: I560746fc2aae40101dcb0e8513b6c7ed54902ec6 Reviewed-on: https://review.whamcloud.com/32705 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lustre/include/lustre_swab.h | 1 + lustre/include/uapi/linux/lustre/lustre_idl.h | 17 ++ lustre/include/uapi/linux/lustre/lustre_user.h | 4 +- lustre/lfsck/lfsck_internal.h | 2 +- lustre/lfsck/lfsck_layout.c | 313 +++++++++++++++++------- lustre/obdclass/dt_object.c | 4 +- lustre/osp/osp_object.c | 32 ++- lustre/ptlrpc/pack_generic.c | 16 +- lustre/ptlrpc/wiretest.c | 16 +- lustre/tests/sanity-lfsck.sh | 317 +++++++++++++++++++++++++ lustre/utils/wirecheck.c | 4 +- lustre/utils/wiretest.c | 16 +- 12 files changed, 634 insertions(+), 108 deletions(-) diff --git a/lustre/include/lustre_swab.h b/lustre/include/lustre_swab.h index f553fa9..cece542 100644 --- a/lustre/include/lustre_swab.h +++ b/lustre/include/lustre_swab.h @@ -52,6 +52,7 @@ void lustre_swab_orphan_ent(struct lu_orphan_ent *ent); void lustre_swab_orphan_ent_v2(struct lu_orphan_ent_v2 *ent); +void lustre_swab_orphan_ent_v3(struct lu_orphan_ent_v3 *ent); void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); void lustre_swab_connect(struct obd_connect_data *ocd); void lustre_swab_hsm_user_state(struct hsm_user_state *hus); diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index cf39abb..9e5d4f0 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -384,6 +384,23 @@ struct lu_orphan_ent_v2 { struct lu_orphan_rec_v2 loe_rec; }; +struct lu_orphan_rec_v3 { + struct lu_orphan_rec lor_rec; + struct ost_layout lor_layout; + /* The OST-object declared layout version in PFID EA.*/ + __u32 lor_layout_version; + /* The OST-object declared layout range (of version) in PFID EA.*/ + __u32 lor_range; + __u32 lor_padding_1; + __u64 lor_padding_2; +}; + +struct lu_orphan_ent_v3 { + /* The orphan OST-object's FID */ + struct lu_fid loe_key; + struct lu_orphan_rec_v3 loe_rec; +}; + /** @} lu_fid */ /** \defgroup lu_dir lu_dir diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 3316e4d..cd1b2af 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -624,7 +624,9 @@ struct lov_comp_md_entry_v1 { __u32 lcme_offset; /* offset of component blob, start from lov_comp_md_v1 */ __u32 lcme_size; /* size of component blob */ - __u64 lcme_padding[2]; + __u32 lcme_layout_gen; + __u32 lcme_padding_1; + __u64 lcme_padding_2; } __attribute__((packed)); #define SEQ_ID_MAX 0x0000FFFF diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index ac94f8f..a92181f 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -903,7 +903,7 @@ struct lfsck_thread_info { struct ldlm_res_id lti_resid; struct filter_fid lti_ff; struct dt_allocation_hint lti_hint; - struct lu_orphan_rec_v2 lti_rec; + struct lu_orphan_rec_v3 lti_rec; struct lov_user_md lti_lum; struct dt_insert_rec lti_dt_rec; struct lu_object_conf lti_conf; diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index d7c938d..cdb46d8c 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -328,7 +328,8 @@ out: } static int lfsck_layout_verify_header_v1v3(struct dt_object *obj, - struct lov_mds_md_v1 *lmm) + struct lov_mds_md_v1 *lmm, + __u64 start, __u32 comp_id) { __u32 magic; __u32 pattern; @@ -353,10 +354,24 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj, } pattern = le32_to_cpu(lmm->lmm_pattern); - /* XXX: currently, we only support LOV_PATTERN_RAID0. */ + +#if 0 + /* XXX: DoM file verification will be supportted via LU-11081. */ + if (lov_pattern(pattern) == LOV_PATTERN_MDT) { + if (start != 0) { + CDEBUG(D_LFSCK, "The DoM entry for "DFID" is not " + "the first component in the mirror %x/%llu\n", + PFID(lfsck_dto2fid(obj)), comp_id, start); + + return -EINVAL; + } + } +#endif + if (lov_pattern(pattern) != LOV_PATTERN_RAID0) { CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file " - DFID"\n", pattern, PFID(lfsck_dto2fid(obj))); + DFID" in the component %x\n", + pattern, PFID(lfsck_dto2fid(obj)), comp_id); return -EOPNOTSUPP; } @@ -382,7 +397,7 @@ static int lfsck_layout_verify_header(struct dt_object *obj, return -EINVAL; } - for (i = 0; i < count; i++) { + for (i = 0; i < count && !rc; i++) { struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i]; __u64 start = le64_to_cpu(lcme->lcme_extent.e_start); @@ -411,13 +426,12 @@ static int lfsck_layout_verify_header(struct dt_object *obj, } rc = lfsck_layout_verify_header_v1v3(obj, - (struct lov_mds_md_v1 *)((char *)lmm + - le32_to_cpu(lcme->lcme_offset))); - if (rc) - return rc; + (struct lov_mds_md_v1 *)((char *)lmm + + le32_to_cpu(lcme->lcme_offset)), start, + comp_id); } } else { - rc = lfsck_layout_verify_header_v1v3(obj, lmm); + rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0); } return rc; @@ -434,7 +448,7 @@ again: if (rc == -ERANGE) { rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV); if (rc <= 0) - return rc; + return !rc ? -ENODATA : rc; lu_buf_realloc(buf, rc); if (buf->lb_buf == NULL) @@ -443,11 +457,8 @@ again: goto again; } - if (rc == -ENODATA) - rc = 0; - if (rc <= 0) - return rc; + return !rc ? -ENODATA : rc; if (unlikely(buf->lb_buf == NULL)) { lu_buf_alloc(buf, rc); @@ -1788,12 +1799,13 @@ static int lfsck_layout_new_v1_lovea(const struct lu_env *env, } static int lfsck_layout_new_comp_lovea(const struct lu_env *env, - struct ost_layout *ol, - struct dt_object *parent, - struct lu_buf *buf, __u32 ea_off, - struct lov_mds_md_v1 **lmm, - struct lov_ost_data_v1 **objs) + struct lu_orphan_rec_v3 *rec, + struct dt_object *parent, + struct lu_buf *buf, __u32 ea_off, + struct lov_mds_md_v1 **lmm, + struct lov_ost_data_v1 **objs) { + struct ost_layout *ol = &rec->lor_layout; struct lov_comp_md_v1 *lcm; struct lov_comp_md_entry_v1 *lcme; __u32 pattern = LOV_PATTERN_RAID0; @@ -1808,9 +1820,22 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, lcm = buf->lb_buf; lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1); lcm->lcm_size = cpu_to_le32(size); - lcm->lcm_layout_gen = cpu_to_le32(1); - lcm->lcm_flags = 0; + if (rec->lor_range) { + lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version + + rec->lor_range); + lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING); + } else if (rec->lor_layout_version) { + lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version + + rec->lor_range); + lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE); + } else { + lcm->lcm_layout_gen = cpu_to_le32(1); + lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE); + } lcm->lcm_entry_count = cpu_to_le16(1); + /* Currently, we do not know how many mirrors will be, set it as zero + * at the beginning. It will be updated when more mirrors are found. */ + lcm->lcm_mirror_count = 0; lcme = &lcm->lcm_entries[0]; lcme->lcme_id = cpu_to_le32(ol->ol_comp_id); @@ -1819,6 +1844,7 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, lcme->lcme_extent.e_end = cpu_to_le64(ol->ol_comp_end); lcme->lcme_offset = cpu_to_le32(offset); lcme->lcme_size = cpu_to_le32(lcme_size); + lcme->lcme_layout_gen = lcm->lcm_layout_gen; if (ol->ol_stripe_count > 1) pattern |= LOV_PATTERN_F_HOLE; @@ -1830,15 +1856,66 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, return size; } -static int lfsck_layout_add_comp_comp(const struct lu_env *env, - struct lfsck_instance *lfsck, - struct thandle *handle, - struct ost_layout *ol, - struct dt_object *parent, - const struct lu_fid *cfid, - struct lu_buf *buf, __u32 ost_idx, - __u32 ea_off, int pos) +static void lfsck_layout_update_lcm(struct lov_comp_md_v1 *lcm, + struct lov_comp_md_entry_v1 *lcme, + __u32 version, __u32 range) +{ + struct lov_comp_md_entry_v1 *tmp; + __u64 start = le64_to_cpu(lcme->lcme_extent.e_start); + __u64 end = le64_to_cpu(lcme->lcme_extent.e_end); + __u32 gen = version + range; + __u32 tmp_gen; + int i; + __u16 count = le16_to_cpu(lcm->lcm_entry_count); + __u16 flags = le16_to_cpu(lcm->lcm_flags); + + if (!gen) + gen = 1; + lcme->lcme_layout_gen = cpu_to_le32(gen); + if (le32_to_cpu(lcm->lcm_layout_gen) < gen) + lcm->lcm_layout_gen = cpu_to_le32(gen); + + if (range) + lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING); + else if (flags == LCM_FL_NONE && le16_to_cpu(lcm->lcm_mirror_count) > 0) + lcm->lcm_flags = cpu_to_le16(LCM_FL_RDONLY); + + for (i = 0; i < count; i++) { + tmp = &lcm->lcm_entries[i]; + if (le64_to_cpu(tmp->lcme_extent.e_end) <= start) + continue; + + if (le64_to_cpu(tmp->lcme_extent.e_start) >= end) + continue; + + if (le32_to_cpu(tmp->lcme_flags) & LCME_FL_STALE) + continue; + + tmp_gen = le32_to_cpu(tmp->lcme_layout_gen); + /* "lcme_layout_gen == 0" but without LCME_FL_STALE flag, + * then it should be the latest version of all mirrors. */ + if (tmp_gen == 0 || tmp_gen > gen) { + lcme->lcme_flags = cpu_to_le32( + le32_to_cpu(lcme->lcme_flags) | LCME_FL_STALE); + break; + } + + if (tmp_gen < gen) + tmp->lcme_flags = cpu_to_le32( + le32_to_cpu(tmp->lcme_flags) | LCME_FL_STALE); + } +} + +static int lfsck_layout_add_comp(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct thandle *handle, + struct lu_orphan_rec_v3 *rec, + struct dt_object *parent, + const struct lu_fid *cfid, + struct lu_buf *buf, __u32 ost_idx, + __u32 ea_off, int pos, bool new_mirror) { + struct ost_layout *ol = &rec->lor_layout; struct lov_comp_md_v1 *lcm = buf->lb_buf; struct lov_comp_md_entry_v1 *lcme; struct lov_mds_md_v1 *lmm; @@ -1858,8 +1935,9 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, * have reallocated the buf. */ lcm = buf->lb_buf; lcm->lcm_size = cpu_to_le32(size); - le32_add_cpu(&lcm->lcm_layout_gen, 1); lcm->lcm_entry_count = cpu_to_le16(count + 1); + if (new_mirror) + le16_add_cpu(&lcm->lcm_mirror_count, 1); /* 1. Move the component bodies from [pos, count-1] to [pos+1, count] * with distance of 'added'. */ @@ -1924,6 +2002,10 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, ol->ol_stripe_size, ea_off, pattern, ol->ol_stripe_count); + /* 6. Update mirror related flags and version. */ + lfsck_layout_update_lcm(lcm, lcme, rec->lor_layout_version, + rec->lor_range); + rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid, buf, lmm, objs, LU_XATTR_REPLACE, ost_idx, le32_to_cpu(lcm->lcm_size)); @@ -1931,10 +2013,12 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant add new COMP for " DFID": parent "DFID", OST-index %u, stripe-index %u, " "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, " - "comp_end %llu, %s LOV EA hole: rc = %d\n", + "comp_end %llu, layout version %u, range %u, " + "%s LOV EA hole: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count, ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range, le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ? "with" : "without", rc); @@ -2014,24 +2098,25 @@ static int lfsck_layout_extend_v1v3_lovea(const struct lu_env *env, static int lfsck_layout_update_lovea(const struct lu_env *env, struct lfsck_instance *lfsck, struct thandle *handle, - struct ost_layout *ol, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, const struct lu_fid *cfid, struct lu_buf *buf, int fl, __u32 ost_idx, __u32 ea_off) { + struct ost_layout *ol = &rec->lor_layout; struct lov_mds_md_v1 *lmm = NULL; struct lov_ost_data_v1 *objs = NULL; int rc = 0; ENTRY; if (ol->ol_comp_id != 0) - rc = lfsck_layout_new_comp_lovea(env, ol, parent, buf, ea_off, - &lmm, &objs); + rc = lfsck_layout_new_comp_lovea(env, rec, parent, buf, ea_off, + &lmm, &objs); else - rc = lfsck_layout_new_v1_lovea(env, lfsck, ol, parent, buf, - ea_off, &lmm, &objs); - + rc = lfsck_layout_new_v1_lovea(env, lfsck, &rec->lor_layout, + parent, buf, ea_off, &lmm, + &objs); if (rc > 0) rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid, buf, lmm, objs, fl, ost_idx, rc); @@ -2039,10 +2124,12 @@ static int lfsck_layout_update_lovea(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant created layout EA for " DFID": parent "DFID", OST-index %u, stripe-index %u, " "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, " - "comp_end %llu, fl %d, %s LOV EA hole: rc = %d\n", + "comp_end %llu, layout version %u, range %u, fl %d, " + "%s LOV EA hole: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count, - ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, fl, + ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range, fl, le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ? "with" : "without", rc); @@ -2052,7 +2139,8 @@ static int lfsck_layout_update_lovea(const struct lu_env *env, static int __lfsck_layout_update_pfid(const struct lu_env *env, struct dt_object *child, const struct lu_fid *pfid, - const struct ost_layout *ol, __u32 offset) + const struct ost_layout *ol, __u32 offset, + __u32 version, __u32 range) { struct dt_device *dev = lfsck_obj2dev(child); struct filter_fid *ff = &lfsck_env_info(env)->lti_ff; @@ -2067,6 +2155,8 @@ static int __lfsck_layout_update_pfid(const struct lu_env *env, * parent MDT-object's layout EA. */ ff->ff_parent.f_stripe_idx = cpu_to_le32(offset); ost_layout_cpu_to_le(&ff->ff_layout, ol); + ff->ff_layout_version = cpu_to_le32(version); + ff->ff_range = cpu_to_le32(range); lfsck_buf_init(&buf, ff, sizeof(*ff)); handle = dt_trans_create(env, dev); @@ -2101,7 +2191,7 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, struct dt_object *parent, struct lu_fid *cfid, struct dt_device *cdev, - struct ost_layout *ol, __u32 ea_off) + struct lu_orphan_rec_v3 *rec, __u32 ea_off) { struct dt_object *child; int rc = 0; @@ -2113,7 +2203,9 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, rc = __lfsck_layout_update_pfid(env, child, lu_object_fid(&parent->do_lu), - ol, ea_off); + &rec->lor_layout, ea_off, + rec->lor_layout_version, + rec->lor_range); lfsck_object_put(env, child); RETURN(rc == 0 ? 1 : rc); @@ -2190,7 +2282,7 @@ static int lfsck_lovea_size(struct ost_layout *ol, __u32 ea_off) static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct lu_fid *cfid, const char *infix, const char *type, @@ -2202,7 +2294,6 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lu_attr *la = &info->lti_la2; struct dt_object_format *dof = &info->lti_dof; struct lfsck_instance *lfsck = com->lc_lfsck; - struct ost_layout *ol = &rec->lor_layout; struct lu_fid *pfid = &rec->lor_rec.lor_fid; struct lu_fid *tfid = &info->lti_fid3; struct dt_device *dev = lfsck->li_bottom; @@ -2264,7 +2355,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, * the stripe(s). The LFSCK will specify the LOV EA via * lfsck_layout_update_lovea(). */ - size = lfsck_lovea_size(ol, ea_off); + size = lfsck_lovea_size(&rec->lor_layout, ea_off); if (ea_buf->lb_len < size) { lu_buf_realloc(ea_buf, size); if (ea_buf->lb_buf == NULL) @@ -2341,7 +2432,7 @@ again: dt_write_lock(env, pobj, 0); rc = dt_create(env, pobj, la, NULL, dof, th); if (rc == 0) - rc = lfsck_layout_update_lovea(env, lfsck, th, ol, pobj, cfid, + rc = lfsck_layout_update_lovea(env, lfsck, th, rec, pobj, cfid, &lov_buf, LU_XATTR_CREATE, ltd->ltd_index, ea_off); dt_write_unlock(env, pobj); if (rc < 0) @@ -2358,7 +2449,10 @@ again: th = NULL; /* The 2nd transaction. */ - rc = __lfsck_layout_update_pfid(env, cobj, pfid, ol, ea_off); + rc = __lfsck_layout_update_pfid(env, cobj, pfid, + &rec->lor_layout, ea_off, + rec->lor_layout_version, + rec->lor_range); } GOTO(stop, rc); @@ -2561,7 +2655,7 @@ put: static int lfsck_layout_conflict_create(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, struct lu_fid *cfid, struct lu_buf *ea_buf, @@ -2665,7 +2759,7 @@ out: static int lfsck_layout_recreate_lovea(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, struct lu_fid *cfid, __u32 ost_idx, __u32 ea_off) @@ -2691,8 +2785,10 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, int rc = 0; int rc1; int i; - __u16 count; - bool locked = false; + int pos = 0; + __u16 count; + bool locked = false; + bool new_mirror = true; ENTRY; rc = lfsck_ibits_lock(env, lfsck, parent, &lh, @@ -2702,11 +2798,12 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to recreate " "LOV EA for "DFID": parent "DFID", OST-index %u, " "stripe-index %u, comp_id %u, comp_start %llu, " - "comp_end %llu: rc = %d\n", + "comp_end %llu, layout version %u, range %u: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_comp_id, ol->ol_comp_start, - ol->ol_comp_end, rc); + ol->ol_comp_end, rec->lor_layout_version, + rec->lor_range, rc); RETURN(rc); } @@ -2777,7 +2874,7 @@ again: LASSERT(buf->lb_len >= lovea_size); - rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent, + rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent, cfid, buf, fl, ost_idx, ea_off); GOTO(unlock_parent, rc); @@ -2793,28 +2890,41 @@ again: LASSERT(buf->lb_len >= lovea_size); - rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent, + rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent, cfid, buf, fl, ost_idx, ea_off); GOTO(unlock_parent, rc); } /* For other unknown magic/pattern, keep the current LOV EA. */ - if (rc1 != 0) + if (rc1 == -EOPNOTSUPP) + GOTO(unlock_parent, rc1 = 0); + + if (rc1) GOTO(unlock_parent, rc = rc1); magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_COMP_V1) { __u64 start; __u64 end; + __u16 mirror_id0 = mirror_id_of(ol->ol_comp_id); + __u16 mirror_id1; lcm = buf->lb_buf; count = le16_to_cpu(lcm->lcm_entry_count); - for (i = 0; i < count; i++) { + for (i = 0; i < count; pos = ++i) { lcme = &lcm->lcm_entries[i]; start = le64_to_cpu(lcme->lcme_extent.e_start); end = le64_to_cpu(lcme->lcme_extent.e_end); + mirror_id1 = mirror_id_of(le32_to_cpu(lcme->lcme_id)); + + if (mirror_id0 > mirror_id1) + continue; + if (mirror_id0 < mirror_id1) + break; + + new_mirror = false; if (end <= ol->ol_comp_start) continue; @@ -2827,8 +2937,8 @@ again: goto further; } - rc = lfsck_layout_add_comp_comp(env, lfsck, handle, ol, parent, - cfid, buf, ost_idx, ea_off, i); + rc = lfsck_layout_add_comp(env, lfsck, handle, rec, parent, + cfid, buf, ost_idx, ea_off, pos, new_mirror); GOTO(unlock_parent, rc); } @@ -2851,8 +2961,14 @@ further: goto again; } - if (lcme && !(flags & LCME_FL_INIT)) + if (lcm) { + LASSERT(lcme); + lcme->lcme_flags = cpu_to_le32(flags | LCME_FL_INIT); + lfsck_layout_update_lcm(lcm, lcme, + rec->lor_layout_version, + rec->lor_range); + } rc = lfsck_layout_extend_v1v3_lovea(env, lfsck, handle, ol, parent, cfid, buf, ost_idx, ea_off); @@ -2915,11 +3031,12 @@ further: GOTO(unlock_parent, rc = -EINVAL); } - le32_add_cpu(&lcm->lcm_layout_gen, 1); lovea_size = le32_to_cpu(lcm->lcm_size); - if (!(flags & LCME_FL_INIT)) - lcme->lcme_flags = cpu_to_le32(flags | - LCME_FL_INIT); + lcme->lcme_flags = cpu_to_le32(flags | + LCME_FL_INIT); + lfsck_layout_update_lcm(lcm, lcme, + rec->lor_layout_version, + rec->lor_range); } LASSERTF(buf->lb_len >= lovea_size, @@ -2969,7 +3086,7 @@ further: lfsck_ibits_unlock(&lh, LCK_EX); rc = lfsck_layout_update_pfid(env, com, parent, cfid, ltd->ltd_tgt, - ol, i); + rec, i); CDEBUG(D_LFSCK, "%s layout LFSCK assistant " "updated OST-object's pfid for "DFID @@ -3019,7 +3136,7 @@ unlock_layout: static int lfsck_layout_scan_orphan_one(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct lu_fid *cfid) { struct lfsck_layout *lo = com->lc_file_ram; @@ -3158,7 +3275,7 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, do { struct dt_key *key; - struct lu_orphan_rec_v2 *rec = &info->lti_rec; + struct lu_orphan_rec_v3 *rec = &info->lti_rec; if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) && unlikely(!thread_is_running(&lfsck->li_thread))) @@ -3197,9 +3314,10 @@ log: return rc > 0 ? 0 : rc; } -static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, +static int lfsck_lov2layout(struct lov_mds_md_v1 *lmm, struct filter_fid *ff, __u32 comp_id) { + struct ost_layout *ol = &ff->ff_layout; __u32 magic = le32_to_cpu(lmm->lmm_magic); int rc = 0; ENTRY; @@ -3210,6 +3328,8 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, ol->ol_comp_start = 0; ol->ol_comp_end = 0; ol->ol_comp_id = 0; + ff->ff_layout_version = 0; + ff->ff_range = 0; } else if (magic == LOV_MAGIC_COMP_V1) { struct lov_comp_md_v1 *lcm = (struct lov_comp_md_v1 *)lmm; struct lov_comp_md_entry_v1 *lcme = NULL; @@ -3236,6 +3356,8 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, ol->ol_comp_start = le64_to_cpu(lcme->lcme_extent.e_start); ol->ol_comp_end = le64_to_cpu(lcme->lcme_extent.e_end); ol->ol_comp_id = le32_to_cpu(lcme->lcme_id); + ff->ff_layout_version = le32_to_cpu(lcme->lcme_layout_gen); + ff->ff_range = 0; } else { GOTO(out, rc = -EINVAL); } @@ -3279,7 +3401,6 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *ff = &info->lti_ff; - struct ost_layout *ol = &ff->ff_layout; struct dt_object_format *dof = &info->lti_dof; struct lu_attr *la = &info->lti_la; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -3319,10 +3440,12 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); rc = lfsck_layout_get_lovea(env, parent, tbuf); - if (rc < 0) + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) GOTO(unlock1, rc); - rc = lfsck_lmm2layout(tbuf->lb_buf, ol, comp_id); + rc = lfsck_lov2layout(tbuf->lb_buf, ff, comp_id); if (rc) GOTO(unlock1, rc); @@ -3359,6 +3482,8 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, int idx2; rc = lfsck_layout_get_lovea(env, parent, lovea); + if (unlikely(rc == -ENODATA)) + rc = 0; if (rc <= 0) GOTO(unlock2, rc); @@ -3531,7 +3656,6 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *ff = &info->lti_ff; - struct ost_layout *ol = &ff->ff_layout; struct dt_object *child = llr->llr_child; struct dt_device *dev = lfsck_obj2dev(child); const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); @@ -3556,10 +3680,12 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, ff->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx); rc = lfsck_layout_get_lovea(env, parent, tbuf); - if (rc < 0) + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) GOTO(unlock1, rc); - rc = lfsck_lmm2layout(tbuf->lb_buf, ol, llr->llr_comp_id); + rc = lfsck_lov2layout(tbuf->lb_buf, ff, llr->llr_comp_id); if (rc) GOTO(unlock1, rc); @@ -3752,8 +3878,10 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, GOTO(unlock, rc = 0); rc = lfsck_layout_get_lovea(env, parent, buf); - if (unlikely(!rc || rc == -ENODATA)) - GOTO(unlock, rc = 0); + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) + GOTO(unlock, rc); lmm = buf->lb_buf; magic = le32_to_cpu(lmm->lmm_magic); @@ -3955,9 +4083,12 @@ static int lfsck_layout_check_parent(const struct lu_env *env, * is in such layout. If yes, it is multiple referenced, otherwise it * is unmatched referenced case. */ rc = lfsck_layout_get_lovea(env, tobj, buf); - if (rc == 0 || rc == -ENOENT) + if (rc == 0 || rc == -ENODATA || rc == -ENOENT) GOTO(out, rc = LLIT_UNMATCHED_PAIR); + if (unlikely(rc == -EOPNOTSUPP)) + GOTO(out, rc = LLIT_NONE); + if (rc < 0) GOTO(out, rc); @@ -4745,9 +4876,6 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, if (rc < 0) GOTO(unlock, rc); - if (rc == 0) - GOTO(unlock, rc = -ENODATA); - lmm = buf->lb_buf; magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_COMP_V1) { @@ -4900,6 +5028,8 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env, rc = __lfsck_layout_update_pfid(env, obj, &lrl->lrl_ff_client.ff_parent, &lrl->lrl_ff_client.ff_layout, + lrl->lrl_ff_client.ff_layout_version, + lrl->lrl_ff_client.ff_range, lrl->lrl_ff_client.ff_parent.f_ver); GOTO(unlock, rc); @@ -5434,10 +5564,12 @@ again: GOTO(out, rc = 0); rc = lfsck_layout_get_lovea(env, obj, buf); - if (rc <= 0) + if (rc == -EINVAL || rc == -ENODATA || rc == -EOPNOTSUPP) /* Skip bad lov EA during the 1st cycle scanning, and * try to recover it via orphan in the 2nd scanning. */ - GOTO(out, rc = (rc == -EINVAL ? 0 : rc)); + rc = 0; + if (rc <= 0) + GOTO(out, rc); size = rc; lmm = buf->lb_buf; @@ -6791,7 +6923,7 @@ struct lfsck_orphan_it { struct lfsck_rbtree_node *loi_lrn; struct lfsck_layout_slave_target *loi_llst; struct lu_fid loi_key; - struct lu_orphan_rec_v2 loi_rec; + struct lu_orphan_rec_v3 loi_rec; __u64 loi_hash; unsigned int loi_over:1; }; @@ -7047,7 +7179,7 @@ static int lfsck_orphan_it_next(const struct lu_env *env, struct lu_attr *la = &info->lti_la; struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di; struct lu_fid *key = &it->loi_key; - struct lu_orphan_rec_v2 *rec = &it->loi_rec; + struct lu_orphan_rec_v3 *rec = &it->loi_rec; struct ost_layout *ol = &rec->lor_layout; struct lfsck_component *com = it->loi_com; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -7188,6 +7320,8 @@ again1: rec->lor_rec.lor_uid = la->la_uid; rec->lor_rec.lor_gid = la->la_gid; memset(ol, 0, sizeof(*ol)); + rec->lor_layout_version = 0; + rec->lor_range = 0; GOTO(out, rc = 0); } @@ -7223,13 +7357,18 @@ again1: rec->lor_rec.lor_uid = la->la_uid; rec->lor_rec.lor_gid = la->la_gid; ost_layout_le_to_cpu(ol, &ff->ff_layout); + rec->lor_layout_version = + le32_to_cpu(ff->ff_layout_version & ~LU_LAYOUT_RESYNC); + rec->lor_range = le32_to_cpu(ff->ff_range); CDEBUG(D_LFSCK, "%s: return orphan "DFID", PFID "DFID", owner %u:%u, " "stripe size %u, stripe count %u, COMP id %u, COMP start %llu, " - "COMP end %llu\n", lfsck_lfsck2name(com->lc_lfsck), PFID(key), + "COMP end %llu, layout version %u, range %u\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(key), PFID(&rec->lor_rec.lor_fid), rec->lor_rec.lor_uid, rec->lor_rec.lor_gid, ol->ol_stripe_size, ol->ol_stripe_count, - ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end); + ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range); GOTO(out, rc = 0); @@ -7292,7 +7431,7 @@ static int lfsck_orphan_it_rec(const struct lu_env *env, { struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di; - *(struct lu_orphan_rec_v2 *)rec = it->loi_rec; + *(struct lu_orphan_rec_v3 *)rec = it->loi_rec; return 0; } diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index 5a8241a..d52bcc5 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -587,8 +587,8 @@ const struct dt_index_features dt_lfsck_layout_orphan_features = { .dif_flags = 0, .dif_keysize_min = sizeof(struct lu_fid), .dif_keysize_max = sizeof(struct lu_fid), - .dif_recsize_min = sizeof(struct lu_orphan_rec_v2), - .dif_recsize_max = sizeof(struct lu_orphan_rec_v2), + .dif_recsize_min = sizeof(struct lu_orphan_rec_v3), + .dif_recsize_max = sizeof(struct lu_orphan_rec_v3), .dif_ptrsize = 4 }; EXPORT_SYMBOL(dt_lfsck_layout_orphan_features); diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 3e1edc2..f1a14fb 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -1573,7 +1573,9 @@ int osp_declare_destroy(const struct lu_env *env, struct dt_object *dt, ENTRY; LASSERT(!osp->opd_connect_mdt); - rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); + + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ)) + rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); RETURN(rc); } @@ -1607,11 +1609,14 @@ static int osp_destroy(const struct lu_env *env, struct dt_object *dt, o->opo_non_exist = 1; LASSERT(!osp->opd_connect_mdt); - /* once transaction is committed put proper command on - * the queue going to our OST. */ - rc = osp_sync_add(env, o, MDS_UNLINK64_REC, th, NULL); - if (rc < 0) - RETURN(rc); + + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ)) { + /* once transaction is committed put proper command on + * the queue going to our OST. */ + rc = osp_sync_add(env, o, MDS_UNLINK64_REC, th, NULL); + if (rc < 0) + RETURN(rc); + } /* not needed in cache any more */ set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); @@ -1973,6 +1978,13 @@ again: it->ooi_pos_ent++; if (it->ooi_pos_ent < idxpage->lip_nr) { if (it->ooi_rec_size == + sizeof(struct lu_orphan_rec_v3)) { + it->ooi_ent = + (struct lu_orphan_ent_v3 *)idxpage->lip_entries+ + it->ooi_pos_ent; + if (it->ooi_swab) + lustre_swab_orphan_ent_v3(it->ooi_ent); + } else if (it->ooi_rec_size == sizeof(struct lu_orphan_rec_v2)) { it->ooi_ent = (struct lu_orphan_ent_v2 *)idxpage->lip_entries+ @@ -2031,7 +2043,13 @@ static int osp_orphan_it_rec(const struct lu_env *env, const struct dt_it *di, struct osp_it *it = (struct osp_it *)di; if (likely(it->ooi_ent)) { - if (it->ooi_rec_size == sizeof(struct lu_orphan_rec_v2)) { + if (it->ooi_rec_size == sizeof(struct lu_orphan_rec_v3)) { + struct lu_orphan_ent_v3 *ent = + (struct lu_orphan_ent_v3 *)it->ooi_ent; + + *(struct lu_orphan_rec_v3 *)rec = ent->loe_rec; + } else if (it->ooi_rec_size == + sizeof(struct lu_orphan_rec_v2)) { struct lu_orphan_ent_v2 *ent = (struct lu_orphan_ent_v2 *)it->ooi_ent; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index d07c952..21163e6 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2312,7 +2312,9 @@ void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum) __swab64s(&ent->lcme_extent.e_end); __swab32s(&ent->lcme_offset); __swab32s(&ent->lcme_size); - CLASSERT(offsetof(typeof(*ent), lcme_padding) != 0); + __swab32s(&ent->lcme_layout_gen); + CLASSERT(offsetof(typeof(*ent), lcme_padding_1) != 0); + CLASSERT(offsetof(typeof(*ent), lcme_padding_2) != 0); v1 = (struct lov_user_md_v1 *)((char *)lum + off); stripe_count = v1->lmm_stripe_count; @@ -2842,6 +2844,18 @@ void lustre_swab_orphan_ent_v2(struct lu_orphan_ent_v2 *ent) } EXPORT_SYMBOL(lustre_swab_orphan_ent_v2); +void lustre_swab_orphan_ent_v3(struct lu_orphan_ent_v3 *ent) +{ + lustre_swab_lu_fid(&ent->loe_key); + lustre_swab_orphan_rec(&ent->loe_rec.lor_rec); + lustre_swab_ost_layout(&ent->loe_rec.lor_layout); + __swab32s(&ent->loe_rec.lor_layout_version); + __swab32s(&ent->loe_rec.lor_range); + CLASSERT(offsetof(typeof(ent->loe_rec), lor_padding_1) != 0); + CLASSERT(offsetof(typeof(ent->loe_rec), lor_padding_2) != 0); +} +EXPORT_SYMBOL(lustre_swab_orphan_ent_v3); + void lustre_swab_ladvise(struct lu_ladvise *ladvise) { __swab16s(&ladvise->lla_advice); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 631cdef..3aa9b98 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1732,10 +1732,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_size)); LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size) == 4, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size)); - LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding) == 32, "found %lld\n", - (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding)); - LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen) == 32, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1) == 36, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2) == 40, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2)); LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n", (unsigned)LCME_FL_INIT); LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n", diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index b39a59a..56fab70 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -5204,6 +5204,323 @@ test_35() } run_test 35 "LFSCK can rebuild the lost agent entry" +# It will be replaced by "lfs getstripe -N" via LU-11124. +get_mirrors_count() { + local mirrors=$($LFS getstripe $1 | + awk '/lcm_mirror_count/ { print $2 }') + echo $mirrors +} + +test_36a() { + [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return + + echo "#####" + echo "The target MDT-object's LOV EA corrupted as to lose one of the " + echo "mirrors information. The layout LFSCK should rebuild the LOV EA " + echo "with the PFID EA of related OST-object(s) belong to the mirror." + echo "#####" + + check_mount_and_prep + + $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \ + -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 || + error "(0) Fail to create mirror file $DIR/$tdir/f0" + $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \ + -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f1 || + error "(1) Fail to create mirror file $DIR/$tdir/f1" + $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \ + -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f2 || + error "(2) Fail to create mirror file $DIR/$tdir/f2" + + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 || + error "(3) Fail to write $DIR/$tdir/f0" + dd if=/dev/zero of=$DIR/$tdir/f1 bs=1M count=4 || + error "(4) Fail to write $DIR/$tdir/f1" + dd if=/dev/zero of=$DIR/$tdir/f2 bs=1M count=4 || + error "(5) Fail to write $DIR/$tdir/f2" + + $LFS mirror resync $DIR/$tdir/f0 || + error "(6) Fail to resync $DIR/$tdir/f0" + $LFS mirror resync $DIR/$tdir/f1 || + error "(7) Fail to resync $DIR/$tdir/f1" + $LFS mirror resync $DIR/$tdir/f2 || + error "(8) Fail to resync $DIR/$tdir/f2" + + cancel_lru_locks mdc + cancel_lru_locks osc + + $LFS getstripe $DIR/$tdir/f0 || + error "(9) Fail to getstripe for $DIR/$tdir/f0" + $LFS getstripe $DIR/$tdir/f1 || + error "(10) Fail to getstripe for $DIR/$tdir/f1" + $LFS getstripe $DIR/$tdir/f2 || + error "(11) Fail to getstripe for $DIR/$tdir/f2" + + echo "Inject failure, to simulate the case of missing one mirror in LOV" + #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616 + do_facet mds1 $LCTL set_param fail_loc=0x1616 + + $LFS mirror split --mirror-id 1 -d $DIR/$tdir/f0 || + error "(12) Fail to split 1st mirror from $DIR/$tdir/f0" + $LFS mirror split --mirror-id 2 -d $DIR/$tdir/f1 || + error "(13) Fail to split 2nd mirror from $DIR/$tdir/f1" + $LFS mirror split --mirror-id 3 -d $DIR/$tdir/f2 || + error "(14) Fail to split 3rd mirror from $DIR/$tdir/f2" + + sync + sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 + + $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" && + error "(15) The 1st of mirror is not destroyed" + $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" && + error "(16) The 2nd of mirror is not destroyed" + $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" && + error "(17) The 3rd of mirror is not destroyed" + + local mirrors + + mirrors=$(get_mirrors_count $DIR/$tdir/f0) + [ $mirrors -eq 2 ] || error "(18) $DIR/$tdir/f0 has $mirrors mirrors" + mirrors=$(get_mirrors_count $DIR/$tdir/f1) + [ $mirrors -eq 2 ] || error "(19) $DIR/$tdir/f1 has $mirrors mirrors" + mirrors=$(get_mirrors_count $DIR/$tdir/f2) + [ $mirrors -eq 2 ] || error "(20) $DIR/$tdir/f2 has $mirrors mirrors" + + echo "Trigger layout LFSCK on all devices to find out orphan OST-object" + $START_LAYOUT -r -o || error "(21) Fail to start LFSCK for layout!" + + for k in $(seq $MDSCOUNT); do + # The LFSCK status query internal is 30 seconds. For the case + # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough + # time to guarantee the status sync up. + wait_update_facet mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "(22) MDS${k} is not the expected 'completed'" + done + + for k in $(seq $OSTCOUNT); do + local cur_status=$(do_facet ost${k} $LCTL get_param -n \ + obdfilter.$(facet_svc ost${k}).lfsck_layout | + awk '/^status/ { print $2 }') + [ "$cur_status" == "completed" ] || + error "(23) OST${k} Expect 'completed', but got '$cur_status'" + done + + local repaired=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $repaired -eq 9 ] || + error "(24) Expect 9 fixed on mds1, but got: $repaired" + + mirrors=$(get_mirrors_count $DIR/$tdir/f0) + [ $mirrors -eq 3 ] || error "(25) $DIR/$tdir/f0 has $mirrors mirrors" + mirrors=$(get_mirrors_count $DIR/$tdir/f1) + [ $mirrors -eq 3 ] || error "(26) $DIR/$tdir/f1 has $mirrors mirrors" + mirrors=$(get_mirrors_count $DIR/$tdir/f2) + [ $mirrors -eq 3 ] || error "(27) $DIR/$tdir/f2 has $mirrors mirrors" + + $LFS getstripe $DIR/$tdir/f0 | grep "lcme_mirror_id:.*1" || { + $LFS getstripe $DIR/$tdir/f0 + error "(28) The 1st of mirror is not recovered" + } + + $LFS getstripe $DIR/$tdir/f1 | grep "lcme_mirror_id:.*2" || { + $LFS getstripe $DIR/$tdir/f1 + error "(29) The 2nd of mirror is not recovered" + } + + $LFS getstripe $DIR/$tdir/f2 | grep "lcme_mirror_id:.*3" || { + $LFS getstripe $DIR/$tdir/f2 + error "(30) The 3rd of mirror is not recovered" + } +} +run_test 36a "rebuild LOV EA for mirrored file (1)" + +test_36b() { + [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return + + echo "#####" + echo "The mirrored file lost its MDT-object, but relatd OST-objects " + echo "are still there. The layout LFSCK should rebuild the LOV EA " + echo "with the PFID EA of related OST-object(s) belong to the file. " + echo "#####" + + check_mount_and_prep + + $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \ + -N -E 3M -o 2,0 -E -1 -o 1 $DIR/$tdir/f0 || + error "(0) Fail to create mirror file $DIR/$tdir/f0" + + local fid=$($LFS path2fid $DIR/$tdir/f0) + + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 || + error "(1) Fail to write $DIR/$tdir/f0" + $LFS mirror resync $DIR/$tdir/f0 || + error "(2) Fail to resync $DIR/$tdir/f0" + + cancel_lru_locks mdc + cancel_lru_locks osc + + $LFS getstripe $DIR/$tdir/f0 || + error "(3) Fail to getstripe for $DIR/$tdir/f0" + + echo "Inject failure, to simulate the case of missing the MDT-object" + #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616 + do_facet mds1 $LCTL set_param fail_loc=0x1616 + rm -f $DIR/$tdir/f0 || error "(4) Fail to remove $DIR/$tdir/f0" + + sync + sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 + + echo "Trigger layout LFSCK on all devices to find out orphan OST-object" + $START_LAYOUT -r -o || error "(5) Fail to start LFSCK for layout!" + + for k in $(seq $MDSCOUNT); do + # The LFSCK status query internal is 30 seconds. For the case + # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough + # time to guarantee the status sync up. + wait_update_facet mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "(6) MDS${k} is not the expected 'completed'" + done + + for k in $(seq $OSTCOUNT); do + local cur_status=$(do_facet ost${k} $LCTL get_param -n \ + obdfilter.$(facet_svc ost${k}).lfsck_layout | + awk '/^status/ { print $2 }') + [ "$cur_status" == "completed" ] || + error "(7) OST${k} Expect 'completed', but got '$cur_status'" + done + + local count=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $count -eq 9 ] || error "(8) Expect 9 fixed on mds1, but got: $count" + + local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0 + count=$($LFS getstripe $name | awk '/lcm_mirror_count/ { print $2 }') + [ $count -eq 3 ] || error "(9) $DIR/$tdir/f0 has $count mirrors" + + count=$($LFS getstripe $name | awk '/lcm_entry_count/ { print $2 }') + [ $count -eq 6 ] || error "(10) $DIR/$tdir/f0 has $count entries" + + $LFS getstripe $name | grep "lcme_mirror_id:.*1" || { + $LFS getstripe $name + error "(11) The 1st of mirror is not recovered" + } + + $LFS getstripe $name | grep "lcme_mirror_id:.*2" || { + $LFS getstripe $name + error "(12) The 2nd of mirror is not recovered" + } + + $LFS getstripe $name | grep "lcme_mirror_id:.*3" || { + $LFS getstripe $name + error "(13) The 3rd of mirror is not recovered" + } +} +run_test 36b "rebuild LOV EA for mirrored file (2)" + +test_36c() { + [ $OSTCOUNT -lt 3 ] && skip "needs >= 3 OSTs" && return + + echo "#####" + echo "The mirrored file has been modified, not resynced yet, then " + echo "lost its MDT-object, but relatd OST-objects are still there. " + echo "The layout LFSCK should rebuild the LOV EA and relatd status " + echo "with the PFID EA of related OST-object(s) belong to the file. " + echo "#####" + + check_mount_and_prep + + $LFS setstripe -N -E 1M -o 0,1 -E -1 -o 2 -N -E 2M -o 1,2 -E -1 -o 0 \ + $DIR/$tdir/f0 || + error "(0) Fail to create mirror file $DIR/$tdir/f0" + + local fid=$($LFS path2fid $DIR/$tdir/f0) + + # The 1st dd && resync makes all related OST-objects have been written + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 || + error "(1.1) Fail to write $DIR/$tdir/f0" + $LFS mirror resync $DIR/$tdir/f0 || + error "(1.2) Fail to resync $DIR/$tdir/f0" + # The 2nd dd makes one mirror to be stale + dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=4 || + error "(1.3) Fail to write $DIR/$tdir/f0" + + cancel_lru_locks mdc + cancel_lru_locks osc + + $LFS getstripe $DIR/$tdir/f0 || + error "(2) Fail to getstripe for $DIR/$tdir/f0" + + local saved_flags1=$($LFS getstripe $DIR/$tdir/f0 | head -n 10 | + awk '/lcme_flags/ { print $2 }') + local saved_flags2=$($LFS getstripe $DIR/$tdir/f0 | tail -n 10 | + awk '/lcme_flags/ { print $2 }') + + echo "Inject failure, to simulate the case of missing the MDT-object" + #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616 + do_facet mds1 $LCTL set_param fail_loc=0x1616 + rm -f $DIR/$tdir/f0 || error "(3) Fail to remove $DIR/$tdir/f0" + + sync + sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 + + echo "Trigger layout LFSCK on all devices to find out orphan OST-object" + $START_LAYOUT -r -o || error "(4) Fail to start LFSCK for layout!" + + for k in $(seq $MDSCOUNT); do + # The LFSCK status query internal is 30 seconds. For the case + # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough + # time to guarantee the status sync up. + wait_update_facet mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "(5) MDS${k} is not the expected 'completed'" + done + + for k in $(seq $OSTCOUNT); do + local cur_status=$(do_facet ost${k} $LCTL get_param -n \ + obdfilter.$(facet_svc ost${k}).lfsck_layout | + awk '/^status/ { print $2 }') + [ "$cur_status" == "completed" ] || + error "(6) OST${k} Expect 'completed', but got '$cur_status'" + done + + local count=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $count -eq 6 ] || error "(7) Expect 9 fixed on mds1, but got: $count" + + local name=$MOUNT/.lustre/lost+found/MDT0000/${fid}-R-0 + count=$($LFS getstripe $name | awk '/lcm_mirror_count/ { print $2 }') + [ $count -eq 2 ] || error "(8) $DIR/$tdir/f0 has $count mirrors" + + count=$($LFS getstripe $name | awk '/lcm_entry_count/ { print $2 }') + [ $count -eq 4 ] || error "(9) $DIR/$tdir/f0 has $count entries" + + local flags=$($LFS getstripe $name | head -n 10 | + awk '/lcme_flags/ { print $2 }') + [ "$flags" == "$saved_flags1" ] || { + $LFS getstripe $name + error "(10) expect flags $saved_flags1, got $flags" + } + + flags=$($LFS getstripe $name | tail -n 10 | + awk '/lcme_flags/ { print $2 }') + [ "$flags" == "$saved_flags2" ] || { + $LFS getstripe $name + error "(11) expect flags $saved_flags2, got $flags" + } +} +run_test 36c "rebuild LOV EA for mirrored file (3)" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index dae1a98..ab8be2a 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -795,7 +795,9 @@ check_lov_comp_md_entry_v1(void) CHECK_MEMBER(lov_comp_md_entry_v1, lcme_extent); CHECK_MEMBER(lov_comp_md_entry_v1, lcme_offset); CHECK_MEMBER(lov_comp_md_entry_v1, lcme_size); - CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding); + CHECK_MEMBER(lov_comp_md_entry_v1, lcme_layout_gen); + CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding_1); + CHECK_MEMBER(lov_comp_md_entry_v1, lcme_padding_2); CHECK_VALUE_X(LCME_FL_INIT); CHECK_VALUE_X(LCME_FL_NEG); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 116f70a..cb64cd4 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1753,10 +1753,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_size)); LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size) == 4, "found %lld\n", (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_size)); - LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding) == 32, "found %lld\n", - (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding)); - LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen) == 32, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_layout_gen)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_layout_gen)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1) == 36, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_1)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_1)); + LASSERTF((int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2) == 40, "found %lld\n", + (long long)(int)offsetof(struct lov_comp_md_entry_v1, lcme_padding_2)); + LASSERTF((int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding_2)); LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n", (unsigned)LCME_FL_INIT); LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n", -- 1.8.3.1