From fc46da199378d12f1c3d8b4b9200b815b57e8af7 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 31 Jan 2014 14:55:02 +0800 Subject: [PATCH] LU-3593 lfsck: repair inconsistent layout EA The layout EA storing on the MDT-object records not only the file layout but also some information which indicates the layout owner, such as lov_mds_md.lmm_oi. They are generated from MDT-object FID, with them we can know which file the layout EA belongs to. In the LFSCK phase II, we need to verify whether such information in the layout EA is correct or not by re-caculating from the MDT-object FID. If inconsistency is found, trust the MDT-object FID rather than the FID information in the layout EA, and repair the later. Signed-off-by: Fan Yong Change-Id: I3c31e19e9fabe66fe7ffdba2fe8569795ae49b4a Reviewed-on: http://review.whamcloud.com/7456 Reviewed-by: Oleg Drokin Tested-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/lfsck/lfsck_internal.h | 17 ++ lustre/lfsck/lfsck_layout.c | 430 ++++++++++++++++++++++++++++++++++++++++-- lustre/lfsck/lfsck_lib.c | 1 + lustre/lod/lod_dev.c | 26 +-- lustre/lod/lod_internal.h | 3 +- lustre/lod/lod_lov.c | 5 +- lustre/lod/lod_object.c | 105 +++++------ lustre/tests/sanity-lfsck.sh | 47 ++++- 9 files changed, 537 insertions(+), 98 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 24671c3..369cde6 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -500,6 +500,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c #define OBD_FAIL_LFSCK_SKIP_LASTID 0x160d #define OBD_FAIL_LFSCK_DELAY4 0x160e +#define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f #define OBD_FAIL_LFSCK_NOTIFY_NET 0x16f0 #define OBD_FAIL_LFSCK_QUERY_NET 0x16f1 diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 01d3c60..a97947a 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -521,6 +521,7 @@ struct lfsck_thread_info { struct lu_name lti_name; struct lu_buf lti_buf; struct lu_buf lti_linkea_buf; + struct lu_buf lti_big_buf; struct lu_fid lti_fid; struct lu_fid lti_fid2; struct lu_attr lti_la; @@ -538,6 +539,9 @@ struct lfsck_thread_info { struct lfsck_request lti_lr; struct lfsck_async_interpret_args lti_laia; struct lfsck_stop lti_stop; + ldlm_policy_data_t lti_policy; + struct ldlm_res_id lti_resid; + struct filter_fid_old lti_pfid; }; /* lfsck_lib.c */ @@ -743,6 +747,19 @@ static inline void lfsck_object_put(const struct lu_env *env, lu_object_put(env, &obj->do_lu); } +static inline struct dt_object * +lfsck_object_find_by_dev(const struct lu_env *env, struct dt_device *dev, + const struct lu_fid *fid) +{ + struct dt_object *obj; + + obj = lu2dt(lu_object_find_slice(env, dt2lu_dev(dev), fid, NULL)); + if (unlikely(obj == NULL)) + return ERR_PTR(-ENOENT); + + return obj; +} + static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds, __u32 index) { diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index efe305a..3ee9592 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -133,6 +133,34 @@ struct lfsck_layout_slave_async_args { struct lfsck_layout_slave_target *llsaa_llst; }; +static struct lfsck_layout_object * +lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj, + __u16 gen) +{ + struct lfsck_layout_object *llo; + int rc; + + OBD_ALLOC_PTR(llo); + if (llo == NULL) + return ERR_PTR(-ENOMEM); + + rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA); + if (rc != 0) { + OBD_FREE_PTR(llo); + + return ERR_PTR(rc); + } + + lu_object_get(&obj->do_lu); + llo->llo_obj = obj; + /* The gen can be used to check whether some others have changed the + * file layout after LFSCK pre-fetching but before real verification. */ + llo->llo_gen = gen; + atomic_set(&llo->llo_ref, 1); + + return llo; +} + static inline void lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst) { @@ -222,6 +250,26 @@ static inline void lfsck_layout_object_put(const struct lu_env *env, } } +static struct lfsck_layout_req * +lfsck_layout_req_init(struct lfsck_layout_object *parent, + struct dt_object *child, __u32 ost_idx, __u32 lov_idx) +{ + struct lfsck_layout_req *llr; + + OBD_ALLOC_PTR(llr); + if (llr == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&llr->llr_list); + atomic_inc(&parent->llo_ref); + llr->llr_parent = parent; + llr->llr_child = child; + llr->llr_ost_idx = ost_idx; + llr->llr_lov_idx = lov_idx; + + return llr; +} + static inline void lfsck_layout_req_fini(const struct lu_env *env, struct lfsck_layout_req *llr) { @@ -242,6 +290,70 @@ static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd) return empty; } +static int lfsck_layout_get_lovea(const struct lu_env *env, + struct dt_object *obj, + struct lu_buf *buf, ssize_t *buflen) +{ + int rc; + +again: + rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA); + if (rc == -ERANGE) { + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV, + BYPASS_CAPA); + if (rc <= 0) + return rc; + + lu_buf_realloc(buf, rc); + if (buflen != NULL) + *buflen = buf->lb_len; + + if (buf->lb_buf == NULL) + return -ENOMEM; + + goto again; + } + + if (rc == -ENODATA) + rc = 0; + + if (rc <= 0) + return rc; + + if (unlikely(buf->lb_buf == NULL)) { + lu_buf_alloc(buf, rc); + if (buflen != NULL) + *buflen = buf->lb_len; + + if (buf->lb_buf == NULL) + return -ENOMEM; + + goto again; + } + + return rc; +} + +static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm) +{ + __u32 magic; + __u32 patten; + + magic = le32_to_cpu(lmm->lmm_magic); + /* If magic crashed, keep it there. Sometime later, during OST-object + * orphan handling, if some OST-object(s) back-point to it, it can be + * verified and repaired. */ + if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3) + return -EINVAL; + + patten = le32_to_cpu(lmm->lmm_pattern); + /* XXX: currently, we only support LOV_PATTERN_RAID0. */ + if (patten != LOV_PATTERN_RAID0) + return -EOPNOTSUPP; + + return 0; +} + static void lfsck_layout_le_to_cpu(struct lfsck_layout *des, const struct lfsck_layout *src) { @@ -428,12 +540,6 @@ static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt, if (rc == sizeof(*lma)) { lustre_lma_swab(lma); - /* Generally, the low layer OSD create handler or OI scrub - * will set the LMAC_FID_ON_OST for all external visible - * OST-objects. But to make the otable-based iteration to - * be independent from OI scrub in spite of it got failure - * or not, we check the LMAC_FID_ON_OST here to guarantee - * that the LFSCK will not repair something by wrong. */ return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0; } @@ -1223,6 +1329,45 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, return rc; } +static int lfsck_layout_lock(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *obj, + struct lustre_handle *lh, __u64 bits) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + ldlm_policy_data_t *policy = &info->lti_policy; + struct ldlm_res_id *resid = &info->lti_resid; + struct lfsck_instance *lfsck = com->lc_lfsck; + __u64 flags = LDLM_FL_ATOMIC_CB; + int rc; + + LASSERT(lfsck->li_namespace != NULL); + + memset(policy, 0, sizeof(*policy)); + policy->l_inodebits.bits = bits; + fid_build_reg_res_name(lfsck_dto2fid(obj), resid); + rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS, + policy, LCK_EX, &flags, ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, 0, + LVB_T_NONE, NULL, lh); + if (rc == ELDLM_OK) { + rc = 0; + } else { + memset(lh, 0, sizeof(*lh)); + rc = -EIO; + } + + return rc; +} + +static void lfsck_layout_unlock(struct lustre_handle *lh) +{ + if (lustre_handle_is_used(lh)) { + ldlm_lock_decref(lh, LCK_EX); + memset(lh, 0, sizeof(*lh)); + } +} + static int lfsck_layout_scan_orphan(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd) @@ -1983,23 +2128,272 @@ static int lfsck_layout_master_prep(const struct lu_env *env, RETURN(rc); } +/* Pre-fetch the attribute for each stripe in the given layout EA. */ +static int lfsck_layout_scan_stripes(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *parent, + struct lov_mds_md_v1 *lmm) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct lfsck_layout_object *llo = NULL; + struct lov_ost_data_v1 *objs; + struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct l_wait_info lwi = { 0 }; + struct lu_buf *buf; + int rc = 0; + int i; + __u16 count; + __u16 gen; + ENTRY; + + buf = lfsck_buf_get(env, &info->lti_pfid, + sizeof(struct filter_fid_old)); + count = le16_to_cpu(lmm->lmm_stripe_count); + gen = le16_to_cpu(lmm->lmm_layout_gen); + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) + objs = &(lmm->lmm_objects[0]); + else + objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; + + for (i = 0; i < count; i++, objs++) { + struct lu_fid *fid = &info->lti_fid; + struct ost_id *oi = &info->lti_oi; + struct lfsck_layout_req *llr; + struct lfsck_tgt_desc *tgt = NULL; + struct dt_object *cobj = NULL; + __u32 index = + le32_to_cpu(objs->l_ost_idx); + bool wakeup = false; + + l_wait_event(mthread->t_ctl_waitq, + bk->lb_async_windows == 0 || + llmd->llmd_prefetched < bk->lb_async_windows || + !thread_is_running(mthread) || + thread_is_stopped(athread), + &lwi); + + if (unlikely(!thread_is_running(mthread)) || + thread_is_stopped(athread)) + GOTO(out, rc = 0); + + ostid_le_to_cpu(&objs->l_ost_oi, oi); + ostid_to_fid(fid, oi, index); + tgt = lfsck_tgt_get(ltds, index); + if (unlikely(tgt == NULL)) { + lo->ll_flags |= LF_INCOMPLETE; + goto next; + } + + cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid); + if (IS_ERR(cobj)) { + rc = PTR_ERR(cobj); + goto next; + } + + rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA); + if (rc != 0) + goto next; + + rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID, + BYPASS_CAPA); + if (rc != 0) + goto next; + + if (llo == NULL) { + llo = lfsck_layout_object_init(env, parent, gen); + if (IS_ERR(llo)) { + rc = PTR_ERR(llo); + goto next; + } + } + + llr = lfsck_layout_req_init(llo, cobj, index, i); + if (IS_ERR(llr)) { + rc = PTR_ERR(llr); + goto next; + } + + cobj = NULL; + spin_lock(&llmd->llmd_lock); + if (llmd->llmd_assistant_status < 0) { + spin_unlock(&llmd->llmd_lock); + lfsck_layout_req_fini(env, llr); + lfsck_tgt_put(tgt); + RETURN(llmd->llmd_assistant_status); + } + + list_add_tail(&llr->llr_list, &llmd->llmd_req_list); + if (llmd->llmd_prefetched == 0) + wakeup = true; + + llmd->llmd_prefetched++; + spin_unlock(&llmd->llmd_lock); + if (wakeup) + wake_up_all(&athread->t_ctl_waitq); + +next: + down_write(&com->lc_sem); + com->lc_new_checked++; + if (rc < 0) + lo->ll_objs_failed_phase1++; + up_write(&com->lc_sem); + + if (cobj != NULL && !IS_ERR(cobj)) + lu_object_put(env, &cobj->do_lu); + + if (likely(tgt != NULL)) + lfsck_tgt_put(tgt); + + if (rc < 0 && bk->lb_param & LPF_FAILOUT) + GOTO(out, rc); + } + + GOTO(out, rc = 0); + +out: + if (llo != NULL && !IS_ERR(llo)) + lfsck_layout_object_put(env, llo); + + return rc; +} + +/* For the given object, read its layout EA locally. For each stripe, pre-fetch + * the OST-object's attribute and generate an structure lfsck_layout_req on the + * list ::llmd_req_list. + * + * For each request on above list, the lfsck_layout_assistant thread compares + * the OST side attribute with local attribute, if inconsistent, then repair it. + * + * All above processing is async mode with pipeline. */ static int lfsck_layout_master_exec_oit(const struct lu_env *env, struct lfsck_component *com, struct dt_object *obj) { - /* XXX: To be implemented in other patches. - * - * For the given object, read its layout EA locally. For each stripe, - * pre-fetch the OST-object's attribute and generate an structure - * lfsck_layout_req on the list ::llmd_req_list. - * - * For each request on the ::llmd_req_list, the lfsck_layout_assistant - * thread will compare the OST side attribute with local attribute, - * if inconsistent, then repair it. - * - * All above processing is async mode with pipeline. */ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct ost_id *oi = &info->lti_oi; + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct thandle *handle = NULL; + struct lu_buf *buf = &info->lti_big_buf; + struct lov_mds_md_v1 *lmm = NULL; + struct dt_device *dev = lfsck->li_bottom; + struct lustre_handle lh = { 0 }; + ssize_t buflen = buf->lb_len; + int rc = 0; + bool locked = false; + bool stripe = false; + ENTRY; - return 0; + if (!S_ISREG(lfsck_object_type(obj))) + GOTO(out, rc = 0); + + if (llmd->llmd_assistant_status < 0) + GOTO(out, rc = -ESRCH); + + fid_to_lmm_oi(lfsck_dto2fid(obj), oi); + lmm_oi_cpu_to_le(oi, oi); + dt_read_lock(env, obj, 0); + locked = true; + +again: + rc = lfsck_layout_get_lovea(env, obj, buf, &buflen); + if (rc <= 0) + GOTO(out, rc); + + buf->lb_len = rc; + lmm = buf->lb_buf; + rc = lfsck_layout_verify_header(lmm); + if (rc != 0) + GOTO(out, rc); + + if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) == 0) + GOTO(out, stripe = true); + + /* Inconsistent lmm_oi, should be repaired. */ + CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n", + PFID(lfsck_dto2fid(obj))); + + if (bk->lb_param & LPF_DRYRUN) { + down_write(&com->lc_sem); + lo->ll_objs_repaired[LLIT_OTHERS - 1]++; + up_write(&com->lc_sem); + + GOTO(out, stripe = true); + } + + if (!lustre_handle_is_used(&lh)) { + dt_read_unlock(env, obj); + locked = false; + buf->lb_len = buflen; + rc = lfsck_layout_lock(env, com, obj, &lh, + MDS_INODELOCK_LAYOUT | + MDS_INODELOCK_XATTR); + if (rc != 0) + GOTO(out, rc); + + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(out, rc = PTR_ERR(handle)); + + rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle); + if (rc != 0) + GOTO(out, rc); + + rc = dt_trans_start_local(env, dev, handle); + if (rc != 0) + GOTO(out, rc); + + dt_write_lock(env, obj, 0); + locked = true; + + goto again; + } + + lmm->lmm_oi = *oi; + rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV, + LU_XATTR_REPLACE, handle, BYPASS_CAPA); + if (rc != 0) + GOTO(out, rc); + + down_write(&com->lc_sem); + lo->ll_objs_repaired[LLIT_OTHERS - 1]++; + up_write(&com->lc_sem); + + GOTO(out, stripe = true); + +out: + if (locked) { + if (lustre_handle_is_used(&lh)) + dt_write_unlock(env, obj); + else + dt_read_unlock(env, obj); + } + + if (handle != NULL && !IS_ERR(handle)) + dt_trans_stop(env, dev, handle); + + lfsck_layout_unlock(&lh); + if (stripe) { + rc = lfsck_layout_scan_stripes(env, com, obj, lmm); + } else { + down_write(&com->lc_sem); + com->lc_new_checked++; + if (rc < 0) + lo->ll_objs_failed_phase1++; + up_write(&com->lc_sem); + } + buf->lb_len = buflen; + + return rc; } static int lfsck_layout_slave_exec_oit(const struct lu_env *env, diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 3eb3a02..9dd35f9 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -51,6 +51,7 @@ static void lfsck_key_fini(const struct lu_context *ctx, struct lfsck_thread_info *info = data; lu_buf_free(&info->lti_linkea_buf); + lu_buf_free(&info->lti_big_buf); OBD_FREE_PTR(info); } diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 0a77113..65a8244 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -57,7 +57,7 @@ * \param type indidcate the FID is on MDS or OST. **/ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, - const struct lu_fid *fid, __u32 *tgt, int type) + const struct lu_fid *fid, __u32 *tgt, int *type) { struct lu_seq_range range = { 0 }; struct lu_server_fld *server_fld; @@ -65,24 +65,29 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, ENTRY; LASSERTF(fid_is_sane(fid), "Invalid FID "DFID"\n", PFID(fid)); + if (fid_is_idif(fid)) { *tgt = fid_idif_ost_idx(fid); + *type = LU_SEQ_RANGE_OST; RETURN(rc); } if (!lod->lod_initialized || (!fid_seq_in_fldb(fid_seq(fid)))) { LASSERT(lu_site2seq(lod2lu_dev(lod)->ld_site) != NULL); + *tgt = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id; + *type = LU_SEQ_RANGE_MDT; RETURN(rc); } server_fld = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_server_fld; - fld_range_set_type(&range, type); + fld_range_set_type(&range, *type); rc = fld_server_lookup(env, server_fld, fid_seq(fid), &range); if (rc) RETURN(rc); *tgt = range.lsr_index; + *type = range.lsr_flags; CDEBUG(D_INFO, "LOD: got tgt %x for sequence: " LPX64"\n", *tgt, fid_seq(fid)); @@ -91,7 +96,6 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, } extern struct lu_object_operations lod_lu_obj_ops; -extern struct lu_object_operations lod_lu_robj_ops; extern struct dt_object_operations lod_obj_ops; /* Slab for OSD object allocation */ @@ -117,29 +121,17 @@ struct lu_object *lod_object_alloc(const struct lu_env *env, { struct lod_object *lod_obj; struct lu_object *lu_obj; - const struct lu_fid *fid = &hdr->loh_fid; - mdsno_t mds; - int rc = 0; ENTRY; OBD_SLAB_ALLOC_PTR_GFP(lod_obj, lod_object_kmem, __GFP_IO); if (lod_obj == NULL) RETURN(ERR_PTR(-ENOMEM)); - rc = lod_fld_lookup(env, lu2lod_dev(dev), fid, &mds, LU_SEQ_RANGE_MDT); - if (rc) { - OBD_SLAB_FREE_PTR(lod_obj, lod_object_kmem); - RETURN(ERR_PTR(rc)); - } - - lod_obj->ldo_mds_num = mds; lu_obj = lod2lu_obj(lod_obj); dt_object_init(&lod_obj->ldo_obj, NULL, dev); lod_obj->ldo_obj.do_ops = &lod_obj_ops; - if (likely(mds == lu_site2seq(dev->ld_site)->ss_node_id)) - lu_obj->lo_ops = &lod_lu_obj_ops; - else - lu_obj->lo_ops = &lod_lu_robj_ops; + lu_obj->lo_ops = &lod_lu_obj_ops; + RETURN(lu_obj); } diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index b98caf2..2a00890 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -247,7 +247,6 @@ struct lod_object { __u16 ldo_def_stripenr; __u16 ldo_def_stripe_offset; struct lod_dir_stripe_info *ldo_dir_stripe; - mdsno_t ldo_mds_num; }; #define ldo_dir_stripe_offset ldo_dir_stripe->ldsi_stripe_offset @@ -356,7 +355,7 @@ static inline struct lod_thread_info *lod_env_info(const struct lu_env *env) /* lod_dev.c */ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, - const struct lu_fid *fid, mdsno_t *tgt, int flags); + const struct lu_fid *fid, __u32 *tgt, int *flags); /* lod_lov.c */ void lod_getref(struct lod_tgt_descs *ltd); void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index 5ae4a43..a8b738b 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -536,6 +536,8 @@ int lod_generate_and_set_lovea(const struct lu_env *env, lmm->lmm_magic = cpu_to_le32(magic); lmm->lmm_pattern = cpu_to_le32(lo->ldo_pattern); fid_to_lmm_oi(fid, &lmm->lmm_oi); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_LMMOI)) + lmm->lmm_oi.oi.oi_id++; lmm_oi_cpu_to_le(&lmm->lmm_oi, &lmm->lmm_oi); lmm->lmm_stripe_size = cpu_to_le32(lo->ldo_stripe_size); lmm->lmm_stripe_count = cpu_to_le16(lo->ldo_stripenr); @@ -557,6 +559,7 @@ int lod_generate_and_set_lovea(const struct lu_env *env, const struct lu_fid *fid; struct lod_device *lod; __u32 index; + int type = LU_SEQ_RANGE_OST; lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); LASSERT(lo->ldo_stripe[i]); @@ -567,7 +570,7 @@ int lod_generate_and_set_lovea(const struct lu_env *env, ostid_cpu_to_le(&info->lti_ostid, &objs[i].l_ost_oi); objs[i].l_ost_gen = cpu_to_le32(0); - rc = lod_fld_lookup(env, lod, fid, &index, LU_SEQ_RANGE_OST); + rc = lod_fld_lookup(env, lod, fid, &index, &type); if (rc < 0) { CERROR("%s: Can not locate "DFID": rc = %d\n", lod2obd(lod)->obd_name, PFID(fid), rc); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index ba83d07..2cbd028 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "lod_internal.h" @@ -2075,23 +2076,57 @@ static const struct dt_body_operations lod_body_lnk_ops = { .dbo_write = lod_write }; -static int lod_object_init(const struct lu_env *env, struct lu_object *o, +static int lod_object_init(const struct lu_env *env, struct lu_object *lo, const struct lu_object_conf *conf) { - struct lod_device *d = lu2lod_dev(o->lo_dev); - struct lu_object *below; - struct lu_device *under; + struct lod_device *lod = lu2lod_dev(lo->lo_dev); + struct lu_device *cdev = NULL; + struct lu_object *cobj; + struct lod_tgt_descs *ltd = NULL; + struct lod_tgt_desc *tgt; + mdsno_t idx = 0; + int type = LU_SEQ_RANGE_ANY; + int rc; ENTRY; - /* - * create local object - */ - under = &d->lod_child->dd_lu_dev; - below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under); - if (below == NULL) + rc = lod_fld_lookup(env, lod, lu_object_fid(lo), &idx, &type); + if (rc != 0) + RETURN(rc); + + if (type == LU_SEQ_RANGE_MDT && + idx == lu_site2seq(lo->lo_dev->ld_site)->ss_node_id) { + cdev = &lod->lod_child->dd_lu_dev; + } else if (type == LU_SEQ_RANGE_MDT) { + ltd = &lod->lod_mdt_descs; + lod_getref(ltd); + } else if (type == LU_SEQ_RANGE_OST) { + ltd = &lod->lod_ost_descs; + lod_getref(ltd); + } else { + LBUG(); + } + + if (ltd != NULL) { + if (ltd->ltd_tgts_size > idx && + cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) { + tgt = LTD_TGT(ltd, idx); + + LASSERT(tgt != NULL); + LASSERT(tgt->ltd_tgt != NULL); + + cdev = &(tgt->ltd_tgt->dd_lu_dev); + } + lod_putref(lod, ltd); + } + + if (unlikely(cdev == NULL)) + RETURN(-ENOENT); + + cobj = cdev->ld_ops->ldo_object_alloc(env, lo->lo_header, cdev); + if (unlikely(cobj == NULL)) RETURN(-ENOMEM); - lu_object_add(o, below); + lu_object_add(lo, cobj); RETURN(0); } @@ -2170,51 +2205,3 @@ struct lu_object_operations lod_lu_obj_ops = { .loo_object_release = lod_object_release, .loo_object_print = lod_object_print, }; - -/** - * Init remote lod object - */ -static int lod_robject_init(const struct lu_env *env, struct lu_object *lo, - const struct lu_object_conf *conf) -{ - struct lod_device *lod = lu2lod_dev(lo->lo_dev); - struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - struct lu_device *c_dev = NULL; - struct lu_object *c_obj; - int i; - ENTRY; - - lod_getref(ltd); - if (ltd->ltd_tgts_size > 0) { - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; - tgt = LTD_TGT(ltd, i); - LASSERT(tgt && tgt->ltd_tgt); - if (tgt->ltd_index == - lu2lod_obj(lo)->ldo_mds_num) { - c_dev = &(tgt->ltd_tgt->dd_lu_dev); - break; - } - } - } - lod_putref(lod, ltd); - - if (unlikely(c_dev == NULL)) - RETURN(-ENOENT); - - c_obj = c_dev->ld_ops->ldo_object_alloc(env, lo->lo_header, c_dev); - if (unlikely(c_obj == NULL)) - RETURN(-ENOMEM); - - lu_object_add(lo, c_obj); - - RETURN(0); -} - -struct lu_object_operations lod_lu_robj_ops = { - .loo_object_init = lod_robject_init, - .loo_object_start = lod_object_start, - .loo_object_free = lod_object_free, - .loo_object_release = lod_object_release, - .loo_object_print = lod_object_print, -}; diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 898b373..9b48d7c 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -43,7 +43,7 @@ check_and_setup_lustre ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c" [[ $(lustre_version_code ost1) -lt $(version_code 2.5.50) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12" + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13" build_test_filter @@ -54,10 +54,14 @@ OST_DEV="${FSNAME}-OST0000" MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) START_NAMESPACE="do_facet $SINGLEMDS \ $LCTL lfsck_start -M ${MDT_DEV} -t namespace" +START_LAYOUT="do_facet $SINGLEMDS \ + $LCTL lfsck_start -M ${MDT_DEV} -t layout" START_LAYOUT_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t layout" STOP_LFSCK="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_NAMESPACE="do_facet $SINGLEMDS \ $LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace" +SHOW_LAYOUT="do_facet $SINGLEMDS \ + $LCTL get_param -n mdd.${MDT_DEV}.lfsck_layout" SHOW_LAYOUT_ON_OST="do_facet ost1 \ $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout" MOUNT_OPTS_SCRUB="-o user_xattr" @@ -1229,6 +1233,47 @@ test_12() { } run_test 12 "single command to trigger LFSCK on all devices" +test_13() { + echo "#####" + echo "The lmm_oi in layout EA should be consistent with the MDT-object" + echo "FID; otherwise, the LFSCK should re-generate the lmm_oi from the" + echo "MDT-object FID." + echo "#####" + + echo "stopall" + stopall > /dev/null + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + + mkdir -p $DIR/$tdir + + echo "Inject failure stub to simulate bad lmm_oi" + #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160f + createmany -o $DIR/$tdir/f 32 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + + echo "stopall to cleanup object cache" + stopall > /dev/null + echo "setupall" + setupall > /dev/null + + echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them" + $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + + local repaired=$($SHOW_LAYOUT | + awk '/^repaired_others/ { print $2 }') + [ $repaired -eq 32 ] || + error "(3) Fail to repair crashed lmm_oi: $repaired" +} +run_test 13 "LFSCK can repair crashed lmm_oi" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size -- 1.8.3.1