From: Fan Yong Date: Fri, 8 Aug 2014 19:16:14 +0000 (+0800) Subject: LU-5506 lfsck: skip orphan MDT-object handling for failed MDTs X-Git-Tag: 2.6.90~83 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=846dd0cb79fc309703afdaae7622e4ddeb0e2e49;ds=sidebyside LU-5506 lfsck: skip orphan MDT-object handling for failed MDTs The namespace LFSCK will record the failed MDTs in the LFSCK tracing file (lfsck_namespace) during the first-stage scanning, then when it moves to the second-stage scanning, it can know which (the failed) MDT(s) contain the MDT-objects that may have un-verified name entries on other MDTs. Then the LFSCK will skip the orphan MDT-objects handling on those ever failed MDTs. But other MDTs can be handled as normal case without affected by the failed MDTs. Signed-off-by: Fan Yong Change-Id: Ic53d2fa24f81fb918d130dbc27694d3b68dc19d6 Reviewed-on: http://review.whamcloud.com/11444 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger --- diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index a51610d..d3dc0b0 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1711,6 +1711,7 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, #define XATTR_NAME_SOM "trusted.som" #define XATTR_NAME_HSM "trusted.hsm" #define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace" +#define XATTR_NAME_LFSCK_BITMAP "trusted.lfsck_bitmap" #define XATTR_NAME_MAX_LEN 32 /* increase this, if there is longer name. */ struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */ diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index 34277e2..4d232d9 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -1374,11 +1374,14 @@ again: list = <d->ltd_layout_list; gen = <d->ltd_layout_gen; } else { + struct lfsck_namespace *ns = com->lc_file_ram; + ltd = list_entry(lad->lad_mdt_list.next, struct lfsck_tgt_desc, ltd_namespace_list); list = <d->ltd_namespace_list; gen = <d->ltd_namespace_gen; + lr->lr_flags2 = ns->ln_flags & ~LF_INCOMPLETE; } if (*gen == lad->lad_touch_gen) @@ -1386,6 +1389,9 @@ again: *gen = lad->lad_touch_gen; list_move_tail(list, &lad->lad_mdt_list); + if (ltd->ltd_namespace_failed) + continue; + atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index a00f451..b32b147 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -219,8 +219,13 @@ struct lfsck_namespace { /* How many lost name entries have been re-inserted. */ __u64 ln_lost_dirent_repaired; + /* The size of MDT targets bitmap with nbits. Such bitmap records + * the MDTs that contain non-verified MDT-objects. */ + __u32 ln_bitmap_size; + + __u32 ln_reserved_1; /* For further using. 256-bytes aligned now. */ - __u64 ln_reserved[26]; + __u64 ln_reserved[25]; }; enum lfsck_layout_inconsistency_type { @@ -382,7 +387,8 @@ struct lfsck_tgt_desc { __u32 ltd_namespace_gen; unsigned int ltd_dead:1, ltd_layout_done:1, - ltd_namespace_done:1; + ltd_namespace_done:1, + ltd_namespace_failed:1; }; struct lfsck_tgt_desc_idx { diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index b5e5d80..f91bdcd 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -131,6 +131,7 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst, dst->ln_bad_type_repaired = le64_to_cpu(src->ln_bad_type_repaired); dst->ln_lost_dirent_repaired = le64_to_cpu(src->ln_lost_dirent_repaired); + dst->ln_bitmap_size = le32_to_cpu(src->ln_bitmap_size); } static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, @@ -176,6 +177,7 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, dst->ln_bad_type_repaired = cpu_to_le64(src->ln_bad_type_repaired); dst->ln_lost_dirent_repaired = cpu_to_le64(src->ln_lost_dirent_repaired); + dst->ln_bitmap_size = cpu_to_le32(src->ln_bitmap_size); } static void lfsck_namespace_record_failure(const struct lu_env *env, @@ -200,6 +202,81 @@ static void lfsck_namespace_record_failure(const struct lu_env *env, } /** + * Load the MDT bitmap from the lfsck_namespace tracing file. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * + * \retval positive number for data corruption + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_namespace_load_bitmap(const struct lu_env *env, + struct lfsck_component *com) +{ + struct dt_object *obj = com->lc_obj; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_namespace *ns = com->lc_file_ram; + cfs_bitmap_t *bitmap = lad->lad_bitmap; + ssize_t size; + __u32 nbits; + int rc; + ENTRY; + + if (com->lc_lfsck->li_mdt_descs.ltd_tgts_bitmap->size > + ns->ln_bitmap_size) + nbits = com->lc_lfsck->li_mdt_descs.ltd_tgts_bitmap->size; + else + nbits = ns->ln_bitmap_size; + + if (unlikely(nbits < BITS_PER_LONG)) + nbits = BITS_PER_LONG; + + if (nbits > bitmap->size) { + __u32 new_bits = bitmap->size; + cfs_bitmap_t *new_bitmap; + + while (new_bits < nbits) + new_bits <<= 1; + + new_bitmap = CFS_ALLOCATE_BITMAP(new_bits); + if (new_bitmap == NULL) + RETURN(-ENOMEM); + + lad->lad_bitmap = new_bitmap; + CFS_FREE_BITMAP(bitmap); + bitmap = new_bitmap; + } + + if (ns->ln_bitmap_size == 0) { + lad->lad_incomplete = 0; + CFS_RESET_BITMAP(bitmap); + + RETURN(0); + } + + size = (ns->ln_bitmap_size + 7) >> 3; + rc = dt_xattr_get(env, obj, + lfsck_buf_get(env, bitmap->data, size), + XATTR_NAME_LFSCK_BITMAP, BYPASS_CAPA); + if (rc == -ERANGE || rc == -ENODATA || rc == 0) + RETURN(1); + + if (rc < 0) + RETURN(rc); + + if (rc != size) + RETURN(rc); + + if (cfs_bitmap_check_empty(bitmap)) + lad->lad_incomplete = 0; + else + lad->lad_incomplete = 1; + + RETURN(0); +} + +/** * \retval +ve: the lfsck_namespace is broken, the caller should reset it. * \retval 0: succeed. * \retval -ve: failed cases. @@ -237,17 +314,30 @@ static int lfsck_namespace_load(const struct lu_env *env, } static int lfsck_namespace_store(const struct lu_env *env, - struct lfsck_component *com, bool init) + struct lfsck_component *com) { - struct dt_object *obj = com->lc_obj; - struct lfsck_instance *lfsck = com->lc_lfsck; - struct thandle *handle; - int len = com->lc_file_size; - int rc; + struct dt_object *obj = com->lc_obj; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_assistant_data *lad = com->lc_data; + cfs_bitmap_t *bitmap = NULL; + struct thandle *handle; + __u32 nbits = 0; + int len = com->lc_file_size; + int rc; ENTRY; + if (lad != NULL) { + bitmap = lad->lad_bitmap; + nbits = bitmap->size; + + LASSERT(nbits > 0); + LASSERTF((nbits & 7) == 0, "Invalid nbits %u\n", nbits); + } + + ns->ln_bitmap_size = nbits; lfsck_namespace_cpu_to_le((struct lfsck_namespace *)com->lc_file_disk, - (struct lfsck_namespace *)com->lc_file_ram); + ns); handle = dt_trans_create(env, lfsck->li_bottom); if (IS_ERR(handle)) GOTO(log, rc = PTR_ERR(handle)); @@ -258,15 +348,26 @@ static int lfsck_namespace_store(const struct lu_env *env, if (rc != 0) GOTO(out, rc); + if (bitmap != NULL) { + rc = dt_declare_xattr_set(env, obj, + lfsck_buf_get(env, bitmap->data, nbits >> 3), + XATTR_NAME_LFSCK_BITMAP, 0, handle); + if (rc != 0) + GOTO(out, rc); + } + rc = dt_trans_start_local(env, lfsck->li_bottom, handle); if (rc != 0) GOTO(out, rc); rc = dt_xattr_set(env, obj, lfsck_buf_get(env, com->lc_file_disk, len), - XATTR_NAME_LFSCK_NAMESPACE, - init ? LU_XATTR_CREATE : LU_XATTR_REPLACE, - handle, BYPASS_CAPA); + XATTR_NAME_LFSCK_NAMESPACE, 0, handle, BYPASS_CAPA); + if (rc == 0 && bitmap != NULL) + rc = dt_xattr_set(env, obj, + lfsck_buf_get(env, bitmap->data, nbits >> 3), + XATTR_NAME_LFSCK_BITMAP, 0, handle, + BYPASS_CAPA); GOTO(out, rc); @@ -290,7 +391,7 @@ static int lfsck_namespace_init(const struct lu_env *env, ns->ln_magic = LFSCK_NAMESPACE_MAGIC; ns->ln_status = LS_INIT; down_write(&com->lc_sem); - rc = lfsck_namespace_store(env, com, true); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); return rc; } @@ -2068,6 +2169,7 @@ lfsck_namespace_dsd_orphan(const struct lu_env *env, enum lfsck_namespace_inconsistency_type *type) { struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_namespace *ns = com->lc_file_ram; int rc; ENTRY; @@ -2078,6 +2180,17 @@ lfsck_namespace_dsd_orphan(const struct lu_env *env, RETURN(rc); *type = LNIT_MUL_REF; + + /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT has + * ever tried to verify some remote MDT-object that resides on this + * MDT, but this MDT failed to respond such request. So means there + * may be some remote name entry on other MDT that references this + * object with another name, so we cannot know whether this linkEA + * is valid or not. So keep it there and maybe resolved when next + * LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) + RETURN(0); + /* The unique linkEA is invalid, even if the ".." name entry may be * valid, we still cannot know via which name entry this directory * will be referenced. Then handle it as pure orphan. */ @@ -2126,6 +2239,7 @@ lfsck_namespace_dsd_single(const struct lu_env *env, struct lu_name *cname = &info->lti_name; const struct lu_fid *cfid = lfsck_dto2fid(child); struct lu_fid *tfid = &info->lti_fid3; + struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_object *parent = NULL; int rc = 0; @@ -2153,6 +2267,16 @@ lfsck_namespace_dsd_single(const struct lu_env *env, * name entry the child will be referenced, since all known entries * have been verified during the first-stage scanning. */ if (!dt_object_exists(parent)) { + /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT + * has ever tried to verify some remote MDT-object that resides + * on this MDT, but this MDT failed to respond such request. So + * means there may be some remote name entry on other MDT that + * references this object with another name, so we cannot know + * whether this linkEA is valid or not. So keep it there and + * maybe resolved when next LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) + GOTO(out, rc = 0); + if (!lustre_handle_is_used(lh) && retry != NULL) { *retry = true; @@ -2208,6 +2332,16 @@ lost_parent: rc = dt_lookup(env, parent, (struct dt_rec *)tfid, (const struct dt_key *)cname->ln_name, BYPASS_CAPA); if (rc == -ENOENT) { + /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT + * has ever tried to verify some remote MDT-object that resides + * on this MDT, but this MDT failed to respond such request. So + * means there may be some remote name entry on other MDT that + * references this object with another name, so we cannot know + * whether this linkEA is valid or not. So keep it there and + * maybe resolved when next LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) + GOTO(out, rc = 0); + if (!lustre_handle_is_used(lh) && retry != NULL) { *retry = true; @@ -2596,7 +2730,8 @@ static int lfsck_namespace_double_scan_dir(const struct lu_env *env, LASSERT(!dt_object_remote(child)); - if (!(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) { + if (flags & (LNTF_CHECK_LINKEA | LNTF_CHECK_PARENT) && + !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) { CDEBUG(D_LFSCK, "%s: some MDT(s) maybe NOT take part in the" "the namespace LFSCK, then the LFSCK cannot guarantee" "all the name entries have been verified in first-stage" @@ -2681,6 +2816,20 @@ lock: * directory, then handle it as orphan. */ lfsck_ibits_unlock(&lh, LCK_EX); type = LNIT_MUL_REF; + + /* If the LFSCK is marked as LF_INCOMPLETE, + * then means some MDT has ever tried to + * verify some remote MDT-object that resides + * on this MDT, but this MDT failed to respond + * such request. So means there may be some + * remote name entry on other MDT that + * references this object with another name, + * so we cannot know whether this linkEA is + * valid or not. So keep it there and maybe + * resolved when next LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) + GOTO(out, rc = 0); + snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf), "-"DFID, PFID(pfid)); rc = lfsck_namespace_insert_orphan(env, com, child, @@ -2859,6 +3008,22 @@ lost_parent: rc = lfsck_namespace_shrink_linkea(env, com, child, &ldata, cname, pfid, true); } else { + /* If the LFSCK is marked as LF_INCOMPLETE, + * then means some MDT has ever tried to + * verify some remote MDT-object that resides + * on this MDT, but this MDT failed to respond + * such request. So means there may be some + * remote name entry on other MDT that + * references this object with another name, + * so we cannot know whether this linkEA is + * valid or not. So keep it there and maybe + * resolved when next LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) { + lfsck_object_put(env, parent); + + GOTO(out, rc = 0); + } + /* Create the lost parent as an orphan. */ rc = lfsck_namespace_create_orphan(env, com, parent); @@ -2979,6 +3144,19 @@ lost_parent: continue; } + /* If the LFSCK is marked as LF_INCOMPLETE, then means some + * MDT has ever tried to verify some remote MDT-object that + * resides on this MDT, but this MDT failed to respond such + * request. So means there may be some remote name entry on + * other MDT that references this object with another name, + * so we cannot know whether this linkEA is valid or not. + * So keep it there and maybe resolved when next LFSCK run. */ + if (ns->ln_flags & LF_INCOMPLETE) { + lfsck_object_put(env, parent); + + GOTO(out, rc = 0); + } + /* Add the missing name entry back to the namespace. */ rc = lfsck_namespace_insert_normal(env, com, parent, child, cname->ln_name); @@ -3023,7 +3201,14 @@ out: count = ldata.ld_leh->leh_reccount; } - if (count == 0) { + /* If the LFSCK is marked as LF_INCOMPLETE, then means some + * MDT has ever tried to verify some remote MDT-object that + * resides on this MDT, but this MDT failed to respond such + * request. So means there may be some remote name entry on + * other MDT that references this object with another name, + * so we cannot know whether this linkEA is valid or not. + * So keep it there and maybe resolved when next LFSCK run. */ + if (count == 0 && !(ns->ln_flags & LF_INCOMPLETE)) { /* If the child becomes orphan, then insert it into * the global .lustre/lost+found/MDTxxxx directory. */ rc = lfsck_namespace_insert_orphan(env, com, child, "", "O", @@ -3113,11 +3298,12 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m, static int lfsck_namespace_reset(const struct lu_env *env, struct lfsck_component *com, bool init) { - struct lfsck_instance *lfsck = com->lc_lfsck; - struct lfsck_namespace *ns = com->lc_file_ram; - struct dt_object *root; - struct dt_object *dto; - int rc; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_assistant_data *lad = com->lc_data; + struct dt_object *root; + struct dt_object *dto; + int rc; ENTRY; root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid); @@ -3160,7 +3346,10 @@ static int lfsck_namespace_reset(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - rc = lfsck_namespace_store(env, com, true); + lad->lad_incomplete = 0; + CFS_RESET_BITMAP(lad->lad_bitmap); + + rc = lfsck_namespace_store(env, com); GOTO(out, rc); @@ -3213,7 +3402,7 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env, com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); log: @@ -3235,17 +3424,18 @@ static int lfsck_namespace_prep(const struct lu_env *env, struct lfsck_position *pos = &com->lc_pos_start; int rc; - if (ns->ln_status == LS_COMPLETED) { + rc = lfsck_namespace_load_bitmap(env, com); + if (rc > 0 || (rc == 0 && ns->ln_status == LS_COMPLETED)) { rc = lfsck_namespace_reset(env, com, false); if (rc == 0) rc = lfsck_set_param(env, lfsck, lsp->lsp_start, true); + } - if (rc != 0) { - CDEBUG(D_LFSCK, "%s: namespace LFSCK prep failed: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: namespace LFSCK prep failed: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); - return rc; - } + return rc; } down_write(&com->lc_sem); @@ -3513,7 +3703,7 @@ static int lfsck_namespace_post(const struct lu_env *env, com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: namespace LFSCK post done: rc = %d\n", @@ -3785,8 +3975,8 @@ out_create: } CDEBUG(D_LFSCK, "%s: namespace LFSCK handles notify %u from MDT %x, " - "status %d\n", lfsck_lfsck2name(lfsck), lr->lr_event, - lr->lr_index, lr->lr_status); + "status %d, flags %x\n", lfsck_lfsck2name(lfsck), lr->lr_event, + lr->lr_index, lr->lr_status, lr->lr_flags2); spin_lock(<ds->ltd_lock); ltd = LTD_TGT(ltds, lr->lr_index); @@ -3811,6 +4001,9 @@ out_create: break; } + if (lr->lr_flags2 & LF_INCOMPLETE) + ns->ln_flags |= LF_INCOMPLETE; + if (list_empty(<d->ltd_namespace_list)) list_add_tail(<d->ltd_namespace_list, &lad->lad_mdt_list); @@ -4095,14 +4288,14 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, struct dt_object *dir = lnr->lnr_obj; struct dt_object *obj = NULL; const struct lu_fid *pfid = lfsck_dto2fid(dir); - struct dt_device *dev; + struct dt_device *dev = NULL; struct lustre_handle lh = { 0 }; bool repaired = false; bool dtlocked = false; bool remove; bool newdata; bool log = false; - int idx; + int idx = 0; int count = 0; int rc; enum lfsck_namespace_inconsistency_type type = LNIT_NONE; @@ -4159,7 +4352,7 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: cannot talk with MDT %x which " "did not join the namespace LFSCK\n", lfsck_lfsck2name(lfsck), idx); - ns->ln_flags |= LF_INCOMPLETE; + lfsck_lad_set_bitmap(env, com, idx); GOTO(out, rc = -ENODEV); } @@ -4410,6 +4603,12 @@ out: lnr->lnr_namelen, lnr->lnr_name, rc); lfsck_namespace_record_failure(env, lfsck, ns); + if ((rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -EREMCHG || + rc == -ETIMEDOUT || rc == -EHOSTDOWN || + rc == -EHOSTUNREACH || rc == -EINPROGRESS) && + dev != NULL && dev != lfsck->li_next) + lfsck_lad_set_bitmap(env, com, idx); + if (!(bk->lb_param & LPF_FAILOUT)) rc = 0; } else { @@ -4451,6 +4650,7 @@ out: if (obj != NULL && !IS_ERR(obj)) lfsck_object_put(env, obj); + return rc; } @@ -4558,7 +4758,7 @@ checkpoint: ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); if (rc != 0) GOTO(put, rc); @@ -4639,17 +4839,108 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env, ns->ln_status = LS_FAILED; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); return rc; } +static int +lfsck_namespace_assistant_sync_failures_interpret(const struct lu_env *env, + struct ptlrpc_request *req, + void *args, int rc) +{ + return 0; +} + +/** + * Notify remote LFSCK instances about former failures. + * + * The local LFSCK instance has recorded which MDTs have ever failed to respond + * some LFSCK verification requests (maybe because of network issues or the MDT + * itself trouble). During the respond gap the MDT may missed some name entries + * verification, then the MDT cannot know whether related MDT-objects have been + * referenced by related name entries or not, then in the second-stage scanning, + * these MDT-objects will be regarded as orphan, if the MDT-object contains bad + * linkEA for back reference, then it will misguide the LFSCK to generate wrong + * name entry for repairing the orphan. + * + * To avoid above trouble, when layout LFSCK finishes the first-stage scanning, + * it will scan the bitmap for the ever failed MDTs, and notify them that they + * have ever missed some name entries verification and should skip the handling + * for orphan MDT-objects. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] lr pointer to the lfsck request + */ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env, struct lfsck_component *com, struct lfsck_request *lr) { - /* XXX: TBD */ + struct lfsck_async_interpret_args *laia = + &lfsck_env_info(env)->lti_laia2; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs; + struct lfsck_tgt_desc *ltd; + struct ptlrpc_request_set *set; + int rc = 0; + ENTRY; + + set = ptlrpc_prep_set(); + if (set == NULL) + GOTO(out, rc = -ENOMEM); + + lr->lr_flags2 = ns->ln_flags | LF_INCOMPLETE; + memset(laia, 0, sizeof(*laia)); + lad->lad_touch_gen++; + + spin_lock(<ds->ltd_lock); + while (!list_empty(&lad->lad_mdt_list)) { + ltd = list_entry(lad->lad_mdt_list.next, + struct lfsck_tgt_desc, + ltd_namespace_list); + if (ltd->ltd_namespace_gen == lad->lad_touch_gen) + break; + + ltd->ltd_namespace_gen = lad->lad_touch_gen; + list_move_tail(<d->ltd_namespace_list, + &lad->lad_mdt_list); + if (!lad->lad_incomplete || + !cfs_bitmap_check(lad->lad_bitmap, ltd->ltd_index)) { + ltd->ltd_namespace_failed = 0; + continue; + } + + ltd->ltd_namespace_failed = 1; + spin_unlock(<ds->ltd_lock); + rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + lfsck_namespace_assistant_sync_failures_interpret, + laia, LFSCK_NOTIFY); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail " + "to sync failure with MDT %x: rc = %d\n", + lfsck_lfsck2name(lfsck), ltd->ltd_index, rc); + + spin_lock(<ds->ltd_lock); + } + spin_unlock(<ds->ltd_lock); + + rc = ptlrpc_set_wait(set); + ptlrpc_set_destroy(set); + + GOTO(out, rc); + +out: + if (rc != 0) + CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail " + "to sync failure with MDTs, and related MDTs " + "may handle orphan un-properly: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + + EXIT; } struct lfsck_assistant_operations lfsck_namespace_assistant_ops = { diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 4a76f10..2835bbb 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -959,9 +959,15 @@ int osp_xattr_get(const struct lu_env *env, struct dt_object *dt, LASSERT(name != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NETWORK) && - osp->opd_index == cfs_fail_val && - osp_dev2node(osp) == cfs_fail_val) - RETURN(-ENOTCONN); + osp->opd_index == cfs_fail_val) { + if (is_ost_obj(&dt->do_lu)) { + if (osp_dev2node(osp) == cfs_fail_val) + RETURN(-ENOTCONN); + } else { + if (strcmp(name, XATTR_NAME_LINK) == 0) + RETURN(-ENOTCONN); + } + } if (unlikely(obj->opo_non_exist)) RETURN(-ENOENT); diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 602b13c..c2bb00d 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -46,7 +46,7 @@ setupall ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21" [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27" + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28" build_test_filter @@ -3365,6 +3365,109 @@ test_27b() { } run_test 27b "LFSCK can recreate the lost remote parent directory as orphan" +test_28() { + [ $MDSCOUNT -lt 2 ] && + skip "The test needs at least 2 MDTs" && return + + echo "#####" + echo "The target name entry is lost. The LFSCK should insert the" + echo "orphan MDT-object under .lustre/lost+found/MDTxxxx. But if" + echo "the MDT (on which the orphan MDT-object resides) has ever" + echo "failed to respond some name entry verification durin the" + echo "first stage-scanning, then the LFSCK should skip to handle" + echo "orphan MDT-object on this MDT. But other MDTs should not" + echo "be affected." + echo "#####" + + check_mount_and_prep + $LFS mkdir -i 0 $DIR/$tdir/d1 + $LFS mkdir -i 1 $DIR/$tdir/d1/a1 + $LFS mkdir -i 1 $DIR/$tdir/d1/a2 + + $LFS mkdir -i 1 $DIR/$tdir/d2 + $LFS mkdir -i 0 $DIR/$tdir/d2/a1 + $LFS mkdir -i 0 $DIR/$tdir/d2/a2 + + echo "Inject failure stub on MDT0 to simulate the case that" + echo "d1/a1's name entry will be removed, but the d1/a1's object" + echo "and its linkEA are kept in the system. And the case that" + echo "d2/a2's name entry will be removed, but the d2/a2's object" + echo "and its linkEA are kept in the system." + + #define OBD_FAIL_LFSCK_NO_NAMEENTRY 0x1624 + do_facet mds1 $LCTL set_param fail_loc=0x1624 + do_facet mds2 $LCTL set_param fail_loc=0x1624 + rmdir $DIR/$tdir/d1/a1 || error "(1) Fail to rmdir $DIR/$tdir/d1/a1" + rmdir $DIR/$tdir/d2/a2 || error "(2) Fail to rmdir $DIR/$tdir/d2/a2" + do_facet mds1 $LCTL set_param fail_loc=0 + do_facet mds2 $LCTL set_param fail_loc=0 + + cancel_lru_locks mdc + cancel_lru_locks osc + + echo "Inject failure, to simulate the MDT0 fail to handle" + echo "MDT1 LFSCK request during the first-stage scanning." + #define OBD_FAIL_LFSCK_BAD_NETWORK 0x161c + do_facet mds2 $LCTL set_param fail_loc=0x161c fail_val=0 + + echo "Trigger namespace LFSCK on all devices to find out orphan object" + $START_NAMESPACE -r -A || + error "(3) Fail to start LFSCK for namespace" + + wait_update_facet mds1 "$LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "partial" 32 || { + error "(4) mds1 is not the expected 'partial'" + } + + wait_update_facet mds2 "$LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 32 || { + error "(5) mds2 is not the expected 'completed'" + } + + do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0 + + local repaired=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_namespace | + awk '/^lost_dirent_repaired/ { print $2 }') + [ $repaired -eq 0 ] || + error "(6) Expect 0 fixed on mds1, but got: $repaired" + + repaired=$(do_facet mds2 $LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_namespace | + awk '/^lost_dirent_repaired/ { print $2 }') + [ $repaired -eq 1 ] || + error "(7) Expect 1 fixed on mds2, but got: $repaired" + + echo "Trigger namespace LFSCK on all devices again to cleanup" + $START_NAMESPACE -r -A || + error "(8) Fail to start LFSCK for namespace" + + for k in $(seq $MDSCOUNT); do + # The LFSCK status query internal is 30 seconds. For the case + # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough + # time to guarantee the status sync up. + wait_update_facet mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "(9) MDS${k} is not the expected 'completed'" + done + + local repaired=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_namespace | + awk '/^lost_dirent_repaired/ { print $2 }') + [ $repaired -eq 1 ] || + error "(10) Expect 1 fixed on mds1, but got: $repaired" + + repaired=$(do_facet mds2 $LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_namespace | + awk '/^lost_dirent_repaired/ { print $2 }') + [ $repaired -eq 0 ] || + error "(11) Expect 0 fixed on mds2, but got: $repaired" +} +run_test 28 "Skip the failed MDT(s) when handle orphan MDT-objects" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size