X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_layout.c;h=cceebbd6eb652f8c7dd61555837d2415c22574e0;hb=b69b7de30c3977cb69a741099218bc4a81752717;hp=f4153e8705deb7577e9a0b193cab539db6843792;hpb=cb22837bcded8f95461c0d4760b2b9add0956e71;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index f4153e8..cceebbd 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -492,7 +491,7 @@ static struct lfsck_rbtree_node *lfsck_rbtree_new(const struct lu_env *env, return ERR_PTR(-ENOMEM); } - rb_init_node(&lrn->lrn_node); + RB_CLEAR_NODE(&lrn->lrn_node); lrn->lrn_seq = fid_seq(fid); lrn->lrn_first_oid = fid_oid(fid) & ~LFSCK_RBTREE_BITMAP_MASK; atomic_set(&lrn->lrn_known_count, 0); @@ -566,6 +565,9 @@ static int lfsck_rbtree_setup(const struct lu_env *env, llsd->llsd_rbtree_valid = 1; dev->dd_record_fid_accessed = 1; + CDEBUG(D_LFSCK, "%s: layout LFSCK init OST-objects accessing bitmap\n", + lfsck_lfsck2name(lfsck)); + return 0; } @@ -596,6 +598,9 @@ static void lfsck_rbtree_cleanup(const struct lu_env *env, lu_object_put(env, &llsd->llsd_rb_obj->do_lu); llsd->llsd_rb_obj = NULL; } + + CDEBUG(D_LFSCK, "%s: layout LFSCK fini OST-objects accessing bitmap\n", + lfsck_lfsck2name(lfsck)); } static void lfsck_rbtree_update_bitmap(const struct lu_env *env, @@ -610,9 +615,6 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env, int rc = 0; ENTRY; - CDEBUG(D_LFSCK, "%s: update bitmap for "DFID"\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(fid)); - if (unlikely(!fid_is_sane(fid) || fid_is_last_id(fid))) RETURN_EXIT; @@ -664,24 +666,17 @@ out: if (rc != 0 && accessed) { struct lfsck_layout *lo = com->lc_file_ram; - CERROR("%s: Fail to update object accessed bitmap, will cause " - "incorrect LFSCK OST-object handling, so disable it to " - "cancel orphan handling for related device. rc = %d.\n", + CDEBUG(D_LFSCK, "%s: fail to update OST-objects accessing " + "bitmap, and will cause incorrect LFSCK OST-object " + "handling, so disable it to cancel orphan handling " + "for related device. rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), rc); + lo->ll_flags |= LF_INCOMPLETE; lfsck_rbtree_cleanup(env, com); } } -static inline bool is_dummy_lov_ost_data(struct lov_ost_data_v1 *obj) -{ - if (fid_is_zero(&obj->l_ost_oi.oi_fid) && - obj->l_ost_gen == 0 && obj->l_ost_idx == 0) - return true; - - return false; -} - static void lfsck_layout_le_to_cpu(struct lfsck_layout *des, const struct lfsck_layout *src) { @@ -760,20 +755,20 @@ static int lfsck_layout_load(const struct lu_env *env, if (rc == 0) { return -ENOENT; } else if (rc < 0) { - CWARN("%s: failed to load lfsck_layout: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), rc); + CDEBUG(D_LFSCK, "%s: failed to load lfsck_layout: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); return rc; } else if (rc != size) { - CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), rc); + CDEBUG(D_LFSCK, "%s: lfsck_layout size %u != %u; reset it\n", + lfsck_lfsck2name(com->lc_lfsck), rc, (unsigned int)size); return 1; } lfsck_layout_le_to_cpu(lo, com->lc_file_disk); if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) { - CWARN("%s: invalid lfsck_layout magic %#x != %#x, " - "to be reset\n", lfsck_lfsck2name(com->lc_lfsck), - lo->ll_magic, LFSCK_LAYOUT_MAGIC); + CDEBUG(D_LFSCK, "%s: invalid lfsck_layout magic %#x != %#x, " + "to be reset\n", lfsck_lfsck2name(com->lc_lfsck), + lo->ll_magic, LFSCK_LAYOUT_MAGIC); return 1; } @@ -794,39 +789,30 @@ static int lfsck_layout_store(const struct lu_env *env, lfsck_layout_cpu_to_le(lo, com->lc_file_ram); handle = dt_trans_create(env, lfsck->li_bottom); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - CERROR("%s: fail to create trans for storing lfsck_layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); - RETURN(rc); - } + if (IS_ERR(handle)) + GOTO(log, rc = PTR_ERR(handle)); rc = dt_declare_record_write(env, obj, lfsck_buf_get(env, lo, size), pos, handle); - if (rc != 0) { - CERROR("%s: fail to declare trans for storing lfsck_layout(1): " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + if (rc != 0) GOTO(out, rc); - } rc = dt_trans_start_local(env, lfsck->li_bottom, handle); - if (rc != 0) { - CERROR("%s: fail to start trans for storing lfsck_layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + if (rc != 0) GOTO(out, rc); - } rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos, handle); - if (rc != 0) - CERROR("%s: fail to store lfsck_layout(1): size = %d, " - "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc); GOTO(out, rc); out: dt_trans_stop(env, lfsck->li_bottom, handle); +log: + if (rc != 0) + CDEBUG(D_LFSCK, "%s: fail to store lfsck_layout: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); return rc; } @@ -925,9 +911,6 @@ lfsck_layout_lastid_create(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "To create LAST_ID for "LPX64"\n", - fid_seq(lfsck_dto2fid(obj))); - if (bk->lb_param & LPF_DRYRUN) return 0; @@ -938,7 +921,7 @@ lfsck_layout_lastid_create(const struct lu_env *env, th = dt_trans_create(env, dt); if (IS_ERR(th)) - RETURN(rc = PTR_ERR(th)); + GOTO(log, rc = PTR_ERR(th)); rc = dt_declare_create(env, obj, la, NULL, dof, th); if (rc != 0) @@ -970,6 +953,11 @@ lfsck_layout_lastid_create(const struct lu_env *env, stop: dt_trans_stop(env, dt, th); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK will create LAST_ID for " + LPX64": rc = %d\n", + lfsck_lfsck2name(lfsck), fid_seq(lfsck_dto2fid(obj)), rc); + return rc; } @@ -1050,8 +1038,9 @@ lfsck_layout_lastid_store(const struct lu_env *env, } #endif - CDEBUG(D_LFSCK, "To sync the LAST_ID for "LPX64 - " as "LPU64"\n", lls->lls_seq, lls->lls_lastid); + CDEBUG(D_LFSCK, "%s: layout LFSCK will sync the LAST_ID for " + " "LPX64" as "LPU64"\n", + lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid); if (bk->lb_param & LPF_DRYRUN) { lls->lls_dirty = 0; @@ -1061,7 +1050,8 @@ lfsck_layout_lastid_store(const struct lu_env *env, th = dt_trans_create(env, dt); if (IS_ERR(th)) { rc1 = PTR_ERR(th); - CERROR("%s: (1) failed to store "LPX64": rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " + "the LAST_ID for "LPX64"(1): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); continue; @@ -1091,7 +1081,8 @@ stop: dt_trans_stop(env, dt, th); if (rc != 0) { rc1 = rc; - CERROR("%s: (2) failed to store "LPX64": rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " + "the LAST_ID for "LPX64"(2): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); } @@ -1179,10 +1170,16 @@ static void lfsck_layout_record_failure(const struct lu_env *env, struct lfsck_layout *lo) { lo->ll_objs_failed_phase1++; - if (unlikely(lo->ll_pos_first_inconsistent == 0)) + if (unlikely(lo->ll_pos_first_inconsistent == 0)) { lo->ll_pos_first_inconsistent = lfsck->li_obj_oit->do_index_ops->dio_it.store(env, lfsck->li_di_oit); + + CDEBUG(D_LFSCK, "%s: layout LFSCK hit first non-repaired " + "inconsistency at the pos ["LPU64"]\n", + lfsck_lfsck2name(lfsck), + lo->ll_pos_first_inconsistent); + } } static int lfsck_layout_master_async_interpret(const struct lu_env *env, @@ -1201,8 +1198,9 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, if (rc != 0) { struct lfsck_layout *lo = com->lc_file_ram; - CERROR("%s: fail to notify %s %x for layout start: " - "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for layout " + "start: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); lo->ll_flags |= LF_INCOMPLETE; @@ -1237,11 +1235,11 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, case LE_PHASE2_DONE: case LE_PEER_EXIT: if (rc != 0 && rc != -EALREADY) - CWARN("%s: fail to notify %s %x for layout: " - "event = %d, rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index, lr->lr_event, rc); + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for layout: " + "event = %d, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lr->lr_event, rc); break; case LE_QUERY: { struct lfsck_reply *reply; @@ -1258,7 +1256,7 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, &RMF_LFSCK_REPLY); if (reply == NULL) { rc = -EPROTO; - CERROR("%s: invalid return value: rc = %d\n", + CDEBUG(D_LFSCK, "%s: invalid query reply: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), rc); spin_lock(<ds->ltd_lock); list_del_init(<d->ltd_layout_phase_list); @@ -1296,7 +1294,7 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, break; } default: - CERROR("%s: unexpected event: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK unexpected event: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lr->lr_event); break; } @@ -1333,7 +1331,7 @@ static int lfsck_layout_master_query_others(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -1369,8 +1367,8 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_QUERY); if (rc != 0) { - CERROR("%s: fail to query %s %x for layout: rc = %d\n", - lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to query %s %x: " + "rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); lfsck_tgt_put(ltd); @@ -1427,7 +1425,7 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, RETURN(-ENOMEM); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -1447,8 +1445,8 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify %s %x for layout " - "start: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify %s %x for start: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", idx, rc); @@ -1469,6 +1467,7 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, break; /* link other MDT targets locallly. */ + ltds = &lfsck->li_mdt_descs; spin_lock(<ds->ltd_lock); cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { ltd = LTD_TGT(ltds, idx); @@ -1529,8 +1528,9 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify %s %x for layout " - "stop/phase2: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify %s %x for stop/phase2_done/" + "peer_exit: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); @@ -1577,9 +1577,9 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify MDT %x for layout " - "phase1 done: rc = %d\n", - lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify MDT %x for phase1_done: " + "rc = %d\n", lfsck_lfsck2name(lfsck), ltd->ltd_index, rc); lfsck_tgt_put(ltd); } @@ -1588,7 +1588,7 @@ again: spin_unlock(<ds->ltd_lock); break; default: - CERROR("%s: unexpected LFSCK event: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK unexpected event: rc = %d\n", lfsck_lfsck2name(lfsck), lr->lr_event); rc = -EINVAL; break; @@ -1694,6 +1694,45 @@ static int lfsck_layout_trans_stop(const struct lu_env *env, } /** + * Get the system default stripe size. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[out] size pointer to the default stripe size + * + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_layout_get_def_stripesize(const struct lu_env *env, + struct lfsck_instance *lfsck, + __u32 *size) +{ + struct lov_user_md *lum = &lfsck_env_info(env)->lti_lum; + struct dt_object *root; + int rc; + + root = dt_locate(env, lfsck->li_next, &lfsck->li_local_root_fid); + if (IS_ERR(root)) + return PTR_ERR(root); + + /* Get the default stripe size via xattr_get on the backend root. */ + rc = dt_xattr_get(env, root, lfsck_buf_get(env, lum, sizeof(*lum)), + XATTR_NAME_LOV, BYPASS_CAPA); + if (rc > 0) { + /* The lum->lmm_stripe_size is LE mode. The *size also + * should be LE mode. So it is unnecessary to convert. */ + *size = lum->lmm_stripe_size; + rc = 0; + } else if (unlikely(rc == 0)) { + rc = -EINVAL; + } + + lfsck_object_put(env, root); + + return rc; +} + +/** * \retval +1: repaired * \retval 0: did nothing * \retval -ve: on error @@ -1706,13 +1745,36 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, struct lov_ost_data_v1 *slot, int fl, __u32 ost_idx) { - struct ost_id *oi = &lfsck_env_info(env)->lti_oi; - int rc; + struct ost_id *oi = &lfsck_env_info(env)->lti_oi; + struct lov_mds_md_v1 *lmm = buf->lb_buf; + int rc; fid_to_ostid(cfid, oi); ostid_cpu_to_le(oi, &slot->l_ost_oi); slot->l_ost_gen = cpu_to_le32(0); slot->l_ost_idx = cpu_to_le32(ost_idx); + + if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE) { + struct lov_ost_data_v1 *objs; + int i; + __u16 count; + + count = le16_to_cpu(lmm->lmm_stripe_count); + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) + objs = &lmm->lmm_objects[0]; + else + objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; + for (i = 0; i < count; i++, objs++) { + if (objs != slot && lovea_slot_is_dummy(objs)) + break; + } + + /* If the @slot is the last dummy slot to be refilled, + * then drop LOV_PATTERN_F_HOLE from lmm::lmm_pattern. */ + if (i == count) + lmm->lmm_pattern &= ~cpu_to_le32(LOV_PATTERN_F_HOLE); + } + rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV, fl, handle, BYPASS_CAPA); if (rc == 0) @@ -1727,6 +1789,7 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, * \retval -ve: on error */ static int lfsck_layout_extend_lovea(const struct lu_env *env, + struct lfsck_instance *lfsck, struct thandle *handle, struct dt_object *parent, struct lu_fid *cfid, @@ -1736,53 +1799,71 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env, struct lov_mds_md_v1 *lmm = buf->lb_buf; struct lov_ost_data_v1 *objs; int rc; + __u16 count; + bool hole = false; ENTRY; if (fl == LU_XATTR_CREATE || reset) { - LASSERT(buf->lb_len == lov_mds_md_size(ea_off + 1, - LOV_MAGIC_V1)); + __u32 pattern = LOV_PATTERN_RAID0; + + count = ea_off + 1; + LASSERT(buf->lb_len == lov_mds_md_size(count, LOV_MAGIC_V1)); + + if (ea_off != 0 || reset) { + pattern |= LOV_PATTERN_F_HOLE; + hole = true; + } memset(lmm, 0, buf->lb_len); lmm->lmm_magic = cpu_to_le32(LOV_MAGIC_V1); - /* XXX: currently, we only support LOV_PATTERN_RAID0. */ - lmm->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0); + lmm->lmm_pattern = cpu_to_le32(pattern); fid_to_lmm_oi(lfsck_dto2fid(parent), &lmm->lmm_oi); lmm_oi_cpu_to_le(&lmm->lmm_oi, &lmm->lmm_oi); - /* XXX: We cannot know the stripe size, - * then use the default value (1 MB). */ - lmm->lmm_stripe_size = - cpu_to_le32(LOV_DESC_STRIPE_SIZE_DEFAULT); - objs = &(lmm->lmm_objects[ea_off]); + + rc = lfsck_layout_get_def_stripesize(env, lfsck, + &lmm->lmm_stripe_size); + if (rc != 0) + RETURN(rc); + + objs = &lmm->lmm_objects[ea_off]; } else { - __u16 count = le16_to_cpu(lmm->lmm_stripe_count); - int gap = ea_off - count; __u32 magic = le32_to_cpu(lmm->lmm_magic); + int gap; - /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 - * which has been verified in lfsck_layout_verify_header() - * already. If some new magic introduced in the future, - * then layout LFSCK needs to be updated also. */ - if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[count]); - } else { - LASSERT(magic == LOV_MAGIC_V3); + count = le16_to_cpu(lmm->lmm_stripe_count); + if (magic == LOV_MAGIC_V1) + objs = &lmm->lmm_objects[count]; + else objs = &((struct lov_mds_md_v3 *)lmm)-> lmm_objects[count]; - } - if (gap > 0) + gap = ea_off - count; + if (gap >= 0) + count = ea_off + 1; + LASSERT(buf->lb_len == lov_mds_md_size(count, magic)); + + if (gap > 0) { memset(objs, 0, gap * sizeof(*objs)); + lmm->lmm_pattern |= cpu_to_le32(LOV_PATTERN_F_HOLE); + hole = true; + } + lmm->lmm_layout_gen = cpu_to_le16(le16_to_cpu(lmm->lmm_layout_gen) + 1); objs += gap; - - LASSERT(buf->lb_len == lov_mds_md_size(ea_off + 1, magic)); } - lmm->lmm_stripe_count = cpu_to_le16(ea_off + 1); + lmm->lmm_stripe_count = cpu_to_le16(count); rc = lfsck_layout_refill_lovea(env, handle, parent, cfid, buf, objs, fl, ost_idx); + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant extend layout EA for " + DFID": parent "DFID", OST-index %u, stripe-index %u, fl %d, " + "reset %s, %s LOV EA hole: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), + ost_idx, ea_off, fl, reset ? "yes" : "no", + hole ? "with" : "without", rc); + RETURN(rc); } @@ -1844,20 +1925,61 @@ out: } /** - * \retval +1: repaired - * \retval 0: did nothing - * \retval -ve: on error + * This function will create the MDT-object with the given (partial) LOV EA. + * + * Under some data corruption cases, the MDT-object of the file may be lost, + * but its OST-objects, or some of them are there. The layout LFSCK needs to + * re-create the MDT-object with the orphan OST-object(s) information. + * + * On the other hand, the LFSCK may has created some OST-object for repairing + * dangling LOV EA reference, but as the LFSCK processing, it may find that + * the old OST-object is there and should replace the former new created OST + * object. Unfortunately, some others have modified such newly created object. + * To keep the data (both new and old), the LFSCK will create MDT-object with + * new FID to reference the original OST-object. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] ltd pointer to target device descriptor + * \param[in] rec pointer to the record for the orphan OST-object + * \param[in] cfid pointer to FID for the orphan OST-object + * \param[in] infix additional information, such as the FID for original + * MDT-object and the stripe offset in the LOV EA + * \param[in] type the type for describing why the orphan MDT-object is + * created. The rules are as following: + * + * type "C": Multiple OST-objects claim the same MDT-object and the + * same slot in the layout EA. Then the LFSCK will create + * new MDT-object(s) to hold the conflict OST-object(s). + * + * type "N": The orphan OST-object does not know which one was the + * real parent MDT-object, so the LFSCK uses new FID for + * its parent MDT-object. + * + * type "R": The orphan OST-object knows its parent MDT-object FID, + * but does not know the position (the file name) in the + * namespace. + * + * The orphan name will be like: + * ${FID}-${infix}-${type}-${conflict_version} + * + * \param[in] ea_off the stripe offset in the LOV EA + * + * \retval positive on repaired something + * \retval 0 if needs to repair nothing + * \retval negative error number on failure */ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, struct lu_orphan_rec *rec, struct lu_fid *cfid, - const char *prefix, - const char *postfix, + const char *infix, + const char *type, __u32 ea_off) { struct lfsck_thread_info *info = lfsck_env_info(env); + struct dt_insert_rec *dtrec = &info->lti_dt_rec; char *name = info->lti_key; struct lu_attr *la = &info->lti_la; struct dt_object_format *dof = &info->lti_dof; @@ -1871,6 +1993,9 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lu_buf *pbuf = NULL; struct lu_buf *ea_buf = &info->lti_big_buf; struct lustre_handle lh = { 0 }; + struct linkea_data ldata = { 0 }; + struct lu_buf linkea_buf; + const struct lu_name *pname; int buflen = ea_buf->lb_len; int idx = 0; int rc = 0; @@ -1880,7 +2005,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, if (unlikely(lfsck->li_lpf_obj == NULL)) { rc = lfsck_create_lpf(env, lfsck); if (rc != 0) - RETURN(rc); + GOTO(log, rc); } if (fid_is_zero(pfid)) { @@ -1899,57 +2024,39 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, pbuf = lfsck_buf_get(env, ff, sizeof(struct filter_fid)); cobj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, cfid); if (IS_ERR(cobj)) - RETURN(PTR_ERR(cobj)); + GOTO(log, rc = PTR_ERR(cobj)); } - CDEBUG(D_LFSCK, "Re-create the lost MDT-object: parent " - DFID", child "DFID", OST-index %u, stripe-index %u, " - "prefix %s, postfix %s\n", - PFID(pfid), PFID(cfid), ltd->ltd_index, ea_off, prefix, postfix); - pobj = lfsck_object_find_by_dev(env, lfsck->li_bottom, pfid); if (IS_ERR(pobj)) GOTO(put, rc = PTR_ERR(pobj)); - LASSERT(prefix != NULL); - LASSERT(postfix != NULL); + LASSERT(infix != NULL); + LASSERT(type != NULL); - /** name rules: - * - * 1. Use the MDT-object's FID as the name with prefix and postfix. - * - * 1.1 prefix "C-": More than one OST-objects claim the same - * MDT-object and the same slot in the layout EA. - * It may be created for dangling referenced MDT - * object or may be not. - * 1.2 prefix "N-": The orphan OST-object does not know which one - * is the real parent, so the LFSCK assign a new - * FID as its parent. - * 1.3 prefix "R-": The orphan OST-object know its parent FID but - * does not know the position in the namespace. - * - * 2. If there is name conflict, append more index for new name. */ - sprintf(name, "%s"DFID"%s", prefix, PFID(pfid), postfix); do { + snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, + type, idx++); rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, (const struct dt_key *)name, BYPASS_CAPA); if (rc != 0 && rc != -ENOENT) GOTO(put, rc); - - if (unlikely(rc == 0)) { - CWARN("%s: The name %s under lost+found has been used " - "by the "DFID". Try to increase the FID version " - "for the new file name.\n", - lfsck_lfsck2name(lfsck), name, PFID(tfid)); - sprintf(name, "%s"DFID"%s-%d", prefix, PFID(pfid), - postfix, ++idx); - } } while (rc == 0); + rc = linkea_data_new(&ldata, + &lfsck_env_info(env)->lti_linkea_buf); + if (rc != 0) + GOTO(put, rc); + + pname = lfsck_name_get_const(env, name, strlen(name)); + rc = linkea_add_buf(&ldata, pname, lfsck_dto2fid(lfsck->li_lpf_obj)); + if (rc != 0) + GOTO(put, rc); + memset(la, 0, sizeof(*la)); la->la_uid = rec->lor_uid; la->la_gid = rec->lor_gid; - la->la_mode = S_IFREG | S_IRUSR | S_IWUSR; + la->la_mode = S_IFREG | S_IRUSR; la->la_valid = LA_MODE | LA_UID | LA_GID; memset(dof, 0, sizeof(*dof)); @@ -2002,12 +2109,22 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, GOTO(stop, rc); /* 4a. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ + dtrec->rec_fid = pfid; + dtrec->rec_type = S_IFREG; rc = dt_declare_insert(env, lfsck->li_lpf_obj, - (const struct dt_rec *)pfid, + (const struct dt_rec *)dtrec, (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); + /* 5a. insert linkEA for parent. */ + linkea_buf.lb_buf = ldata.ld_buf->lb_buf; + linkea_buf.lb_len = ldata.ld_leh->leh_len; + rc = dt_declare_xattr_set(env, pobj, &linkea_buf, + XATTR_NAME_LINK, 0, th); + if (rc != 0) + GOTO(stop, rc); + rc = dt_trans_start(env, next, th); if (rc != 0) GOTO(stop, rc); @@ -2025,17 +2142,22 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, rc = dt_create(env, pobj, la, NULL, dof, th); if (rc == 0) /* 3b. Add layout EA for the MDT-object. */ - rc = lfsck_layout_extend_lovea(env, th, pobj, cfid, ea_buf, - LU_XATTR_CREATE, ltd->ltd_index, - ea_off, false); + rc = lfsck_layout_extend_lovea(env, lfsck, th, pobj, cfid, + ea_buf, LU_XATTR_CREATE, + ltd->ltd_index, ea_off, false); dt_write_unlock(env, pobj); if (rc < 0) GOTO(stop, rc); /* 4b. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ - rc = dt_insert(env, lfsck->li_lpf_obj, - (const struct dt_rec *)pfid, + rc = dt_insert(env, lfsck->li_lpf_obj, (const struct dt_rec *)dtrec, (const struct dt_key *)name, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(stop, rc); + + /* 5b. insert linkEA for parent. */ + rc = dt_xattr_set(env, pobj, &linkea_buf, + XATTR_NAME_LINK, 0, th, BYPASS_CAPA); GOTO(stop, rc); @@ -2052,6 +2174,15 @@ put: lu_object_put(env, &pobj->do_lu); ea_buf->lb_len = buflen; +log: + if (rc < 0) + CDEBUG(D_LFSCK, "%s layout LFSCK assistant failed to " + "recreate the lost MDT-object: parent "DFID + ", child "DFID", OST-index %u, stripe-index %u, " + "infix %s, type %s: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), PFID(cfid), + ltd->ltd_index, ea_off, infix, type, rc); + return rc >= 0 ? 1 : rc; } @@ -2091,7 +2222,7 @@ static int lfsck_layout_master_conditional_destroy(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_CONDITIONAL_DESTROY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *fid; tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); @@ -2191,10 +2322,11 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, rc = dt_destroy(env, obj, th); if (rc == 0) - CDEBUG(D_LFSCK, "Destroy the empty OST-object "DFID" which " - "was created for reparing dangling referenced case. " - "But the original missed OST-object is found now.\n", - PFID(fid)); + CDEBUG(D_LFSCK, "%s: layout LFSCK destroyed the empty " + "OST-object "DFID" that was created for reparing " + "dangling referenced case. But the original missed " + "OST-object is found now.\n", + lfsck_lfsck2name(lfsck), PFID(fid)); GOTO(stop, rc); @@ -2237,7 +2369,7 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_fid *cfid2 = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; - char *postfix = info->lti_tmpbuf; + char *infix = info->lti_tmpbuf; struct lov_mds_md_v1 *lmm = ea_buf->lb_buf; struct dt_device *dev = com->lc_lfsck->li_bottom; struct thandle *th = NULL; @@ -2249,12 +2381,6 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, ostid_le_to_cpu(&slot->l_ost_oi, oi); ostid_to_fid(cfid2, oi, ost_idx2); - CDEBUG(D_LFSCK, "Handle layout EA conflict: parent "DFID - ", cur-child "DFID" on the OST %u, orphan-child " - DFID" on the OST %u, stripe-index %u\n", - PFID(lfsck_dto2fid(parent)), PFID(cfid2), ost_idx2, - PFID(cfid), ltd->ltd_index, ea_off); - /* Hold layout lock on the parent to prevent others to access. */ rc = lfsck_layout_lock(env, com, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); @@ -2273,10 +2399,10 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, ea_buf->lb_len = ori_len; fid_zero(&rec->lor_fid); - snprintf(postfix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", + snprintf(infix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", PFID(lu_object_fid(&parent->do_lu)), ea_off); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "C-", postfix, ea_off); + infix, "C", ea_off); RETURN(rc); } @@ -2314,6 +2440,13 @@ unlock: out: ea_buf->lb_len = ori_len; + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant replaced the conflict " + "OST-object "DFID" on the OST %x with the orphan "DFID" on " + "the OST %x: parent "DFID", stripe-index %u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(cfid2), ost_idx2, + PFID(cfid), ltd->ltd_index, PFID(lfsck_dto2fid(parent)), + ea_off, rc); + return rc >= 0 ? 1 : rc; } @@ -2351,14 +2484,17 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, bool locked = false; ENTRY; - CDEBUG(D_LFSCK, "Re-create the crashed layout EA: parent " - DFID", child "DFID", OST-index %u, stripe-index %u\n", - PFID(lfsck_dto2fid(parent)), PFID(cfid), ost_idx, ea_off); - rc = lfsck_layout_lock(env, com, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); - if (rc != 0) + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to recreate " + "LOV EA for "DFID": parent "DFID", OST-index %u, " + "stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, rc); + RETURN(rc); + } again: if (locked) { @@ -2426,8 +2562,8 @@ again: LASSERT(buf->lb_len >= rc); buf->lb_len = rc; - rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf, - fl, ost_idx, ea_off, false); + rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, + buf, fl, ost_idx, ea_off, false); GOTO(unlock_parent, rc); } @@ -2444,8 +2580,8 @@ again: buf->lb_len = rc; memset(lmm, 0, buf->lb_len); - rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf, - fl, ost_idx, ea_off, true); + rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, + buf, fl, ost_idx, ea_off, true); GOTO(unlock_parent, rc); } @@ -2460,7 +2596,7 @@ again: * be updated also. */ magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[0]); + objs = &lmm->lmm_objects[0]; } else { LASSERT(magic == LOV_MAGIC_V3); objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -2482,8 +2618,9 @@ again: goto again; buf->lb_len = rc; - rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf, - fl, ost_idx, ea_off, false); + rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, + buf, fl, ost_idx, ea_off, false); + GOTO(unlock_parent, rc); } @@ -2493,7 +2630,7 @@ again: for (i = 0; i < count; i++, objs++) { /* The MDT-object was created via lfsck_layout_recover_create() * by others before, and we fill the dummy layout EA. */ - if (is_dummy_lov_ost_data(objs)) { + if (lovea_slot_is_dummy(objs)) { if (i != ea_off) continue; @@ -2505,6 +2642,13 @@ again: rc = lfsck_layout_refill_lovea(env, handle, parent, cfid, buf, objs, fl, ost_idx); + + CDEBUG(D_LFSCK, "%s layout LFSCK assistant fill " + "dummy layout slot for "DFID": parent "DFID + ", OST-index %u, stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), ost_idx, i, rc); + GOTO(unlock_parent, rc); } @@ -2530,6 +2674,14 @@ again: rc = lfsck_layout_update_pfid(env, com, parent, cfid, ltd->ltd_tgt, i); + CDEBUG(D_LFSCK, "%s layout LFSCK assistant " + "updated OST-object's pfid for "DFID + ": parent "DFID", OST-index %u, " + "stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), + ltd->ltd_index, i, rc); + RETURN(rc); } } @@ -2545,7 +2697,7 @@ again: dt_trans_stop(env, dt, handle); lfsck_layout_unlock(&lh); if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) - objs = &(lmm->lmm_objects[ea_off]); + objs = &lmm->lmm_objects[ea_off]; else objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[ea_off]; rc = lfsck_layout_conflict_create(env, com, ltd, rec, parent, cfid, @@ -2586,7 +2738,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (fid_is_zero(pfid)) { rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "N-", "", ea_off); + "", "N", ea_off); GOTO(out, rc); } @@ -2604,7 +2756,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (dt_object_exists(parent) == 0) { lu_object_put(env, &parent->do_lu); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "R-", "", ea_off); + "", "R", ea_off); GOTO(out, rc); } @@ -2654,7 +2806,8 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, int rc = 0; ENTRY; - CDEBUG(D_LFSCK, "%s: start the orphan scanning for OST%04x\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant starts the orphan " + "scanning for OST%04x\n", lfsck_lfsck2name(lfsck), ltd->ltd_index); ostid_set_seq(oi, FID_SEQ_IDIF); @@ -2662,7 +2815,7 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, ostid_to_fid(fid, oi, ltd->ltd_index); obj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, fid); if (unlikely(IS_ERR(obj))) - RETURN(PTR_ERR(obj)); + GOTO(log, rc = PTR_ERR(obj)); rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_orphan_features); if (rc != 0) @@ -2731,7 +2884,9 @@ fini: put: lu_object_put(env, &obj->do_lu); - CDEBUG(D_LFSCK, "%s: finish the orphan scanning for OST%04x, rc = %d\n", +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant finished the orphan " + "scanning for OST%04x: rc = %d\n", lfsck_lfsck2name(lfsck), ltd->ltd_index, rc); return rc > 0 ? 0 : rc; @@ -2771,15 +2926,8 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, else create = false; - CDEBUG(D_LFSCK, "Found dangling reference for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u. %s", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid, - create ? "Create the lost OST-object as required.\n" : - "Keep the MDT-object there by default.\n"); - if (!create) - RETURN(1); + GOTO(log, rc = 1); memset(cla, 0, sizeof(*cla)); cla->la_uid = pla->la_uid; @@ -2791,7 +2939,7 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, rc = lfsck_layout_lock(env, com, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) @@ -2842,6 +2990,16 @@ stop: unlock1: lfsck_layout_unlock(&lh); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found dangling " + "reference for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u. %s: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, + llr->llr_lov_idx, pla->la_uid, pla->la_gid, + create ? "Create the lost OST-object as required" : + "Keep the MDT-object there by default", rc); + return rc; } @@ -2866,15 +3024,10 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair unmatched MDT-OST pair for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid); - rc = lfsck_layout_lock(env, com, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) @@ -2931,6 +3084,14 @@ stop: unlock1: lfsck_layout_unlock(&lh); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired unmatched " + "MDT-OST pair for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, + pla->la_uid, pla->la_gid, rc); + return rc; } @@ -2961,15 +3122,10 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair multiple references for: parent "DFID - ", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), llr->llr_ost_idx, - llr->llr_lov_idx, la->la_uid, la->la_gid); - rc = lfsck_layout_lock(env, com, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, pdev); if (IS_ERR(handle)) @@ -3025,7 +3181,7 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, * be updated also. */ magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[0]); + objs = &lmm->lmm_objects[0]; } else { LASSERT(magic == LOV_MAGIC_V3); objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -3053,6 +3209,13 @@ stop: unlock1: lfsck_layout_unlock(&lh); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired multiple " + "references for: parent "DFID", OST-index %u, stripe-index %u, " + "owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid, rc); + return rc; } @@ -3074,14 +3237,9 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair inconsistent file owner for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid); - handle = dt_trans_create(env, dev); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(log, rc = PTR_ERR(handle)); tla->la_uid = pla->la_uid; tla->la_gid = pla->la_gid; @@ -3101,14 +3259,8 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, /* Get the latest parent's owner. */ rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); - if (rc != 0) { - CWARN("%s: fail to get the latest parent's ("DFID") owner, " - "not sure whether some others chown/chgrp during the " - "LFSCK: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), - PFID(lfsck_dto2fid(parent)), rc); - + if (rc != 0) GOTO(unlock, rc); - } /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ if (unlikely(tla->la_uid != pla->la_uid || @@ -3126,6 +3278,14 @@ unlock: stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired inconsistent " + "file owner for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, + pla->la_uid, pla->la_gid, rc); + return rc; } @@ -3199,7 +3359,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env, lmm = buf->lb_buf; magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[0]); + objs = &lmm->lmm_objects[0]; } else { LASSERT(magic == LOV_MAGIC_V3); objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -3210,7 +3370,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env, struct lu_fid *tfid = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; - if (is_dummy_lov_ost_data(objs)) + if (lovea_slot_is_dummy(objs)) continue; ostid_le_to_cpu(&objs->l_ost_oi, oi); @@ -3350,7 +3510,8 @@ out: rc == -EHOSTUNREACH) { /* If cannot touch the target server, * mark the LFSCK as INCOMPLETE. */ - CERROR("%s: Fail to talk with OST %x: rc = %d.\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant fail to " + "talk with OST %x: rc = %d\n", lfsck_lfsck2name(lfsck), llr->llr_ost_idx, rc); lo->ll_flags |= LF_INCOMPLETE; lo->ll_objs_skipped++; @@ -3407,7 +3568,8 @@ static int lfsck_layout_assistant(void *args) rc = lfsck_layout_master_notify_others(env, com, lr); if (rc != 0) { - CERROR("%s: fail to notify others for layout start: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to notify " + "others for LFSCK start: rc = %d\n", lfsck_lfsck2name(lfsck), rc); GOTO(fini, rc); } @@ -3476,8 +3638,9 @@ static int lfsck_layout_assistant(void *args) lr->lr_status = llmd->llmd_post_result; rc = lfsck_layout_master_notify_others(env, com, lr); if (rc != 0) - CERROR("%s: failed to notify others " - "for layout post: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant " + "failed to notify others for LFSCK " + "post: rc = %d\n", lfsck_lfsck2name(lfsck), rc); /* Wakeup the master engine to go ahead. */ @@ -3490,6 +3653,9 @@ static int lfsck_layout_assistant(void *args) llmd->llmd_in_double_scan = 1; wake_up_all(&mthread->t_ctl_waitq); + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant phase2 " + "scan start\n", lfsck_lfsck2name(lfsck)); + com->lc_new_checked = 0; com->lc_new_scanned = 0; com->lc_time_last_checkpoint = cfs_time_current(); @@ -3497,6 +3663,9 @@ static int lfsck_layout_assistant(void *args) com->lc_time_last_checkpoint + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + /* flush all async updating before handling orphan. */ + dt_sync(env, lfsck->li_next); + while (llmd->llmd_in_double_scan) { struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs; @@ -3608,7 +3777,7 @@ cleanup2: lr->lr_status = LS_CO_STOPPED; break; default: - CERROR("%s: unknown status: rc = %d\n", + CDEBUG(D_LFSCK, "%s: unknown status: rc = %d\n", lfsck_lfsck2name(lfsck), lfsck->li_status); lr->lr_status = LS_CO_FAILED; @@ -3627,7 +3796,8 @@ cleanup2: rc1 = lfsck_layout_master_notify_others(env, com, lr); if (rc1 != 0) { - CERROR("%s: failed to notify others for layout quit: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to " + "notify others for LFSCK quit: rc = %d\n", lfsck_lfsck2name(lfsck), rc1); rc = rc1; } @@ -3635,8 +3805,16 @@ cleanup2: /* Under force exit case, some requests may be just freed without * verification, those objects should be re-handled when next run. * So not update the on-disk tracing file under such case. */ - if (llmd->llmd_in_double_scan && !llmd->llmd_exit) - rc1 = lfsck_layout_double_scan_result(env, com, rc); + if (llmd->llmd_in_double_scan) { + struct lfsck_layout *lo = com->lc_file_ram; + + if (!llmd->llmd_exit) + rc1 = lfsck_layout_double_scan_result(env, com, rc); + + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant phase2 scan " + "finished, status %d: rc = %d\n", + lfsck_lfsck2name(lfsck), lo->ll_status, rc1); + } fini: if (llmd->llmd_in_double_scan) @@ -3662,6 +3840,7 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, struct lfsck_component *com = llsaa->llsaa_com; struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst; struct lfsck_layout_slave_data *llsd = com->lc_data; + struct lfsck_reply *lr = NULL; bool done = false; if (rc != 0) { @@ -3670,15 +3849,20 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, * the target finished the LFSCK prcoessing. */ done = true; } else { - struct lfsck_reply *lr; - lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY); if (lr->lr_status != LS_SCANNING_PHASE1 && lr->lr_status != LS_SCANNING_PHASE2) done = true; } - if (done) + + if (done) { + CDEBUG(D_LFSCK, "%s: layout LFSCK slave gets the MDT %x " + "status %d\n", lfsck_lfsck2name(com->lc_lfsck), + llst->llst_index, lr != NULL ? lr->lr_status : rc); + lfsck_layout_llst_del(llsd, llst); + } + lfsck_layout_llst_put(llst); lfsck_component_put(env, com); class_export_put(exp); @@ -3767,12 +3951,12 @@ lfsck_layout_slave_query_master(const struct lu_env *env, set = ptlrpc_prep_set(); if (set == NULL) - RETURN(-ENOMEM); + GOTO(log, rc = -ENOMEM); memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); @@ -3801,9 +3985,11 @@ lfsck_layout_slave_query_master(const struct lu_env *env, rc = lfsck_layout_async_query(env, com, exp, llst, lr, set); if (rc != 0) { - CERROR("%s: slave fail to query %s for layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK slave fail to " + "query %s for layout: rc = %d\n", + lfsck_lfsck2name(lfsck), exp->exp_obd->obd_name, rc); + rc1 = rc; lfsck_layout_llst_put(llst); class_export_put(exp); @@ -3815,7 +4001,13 @@ lfsck_layout_slave_query_master(const struct lu_env *env, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); - RETURN(rc1 != 0 ? rc1 : rc); + GOTO(log, rc = (rc1 != 0 ? rc1 : rc)); + +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave queries master: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); + + return rc; } static void @@ -3832,6 +4024,9 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, int rc; ENTRY; + CDEBUG(D_LFSCK, "%s: layout LFSCK slave notifies master\n", + lfsck_lfsck2name(com->lc_lfsck)); + set = ptlrpc_prep_set(); if (set == NULL) RETURN_EXIT; @@ -3841,7 +4036,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, lr->lr_flags = LEF_FROM_OST; lr->lr_status = result; lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { @@ -3869,9 +4064,11 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, rc = lfsck_layout_async_notify(env, exp, lr, set); if (rc != 0) - CERROR("%s: slave fail to notify %s for layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK slave fail to " + "notify %s for layout: rc = %d\n", + lfsck_lfsck2name(lfsck), exp->exp_obd->obd_name, rc); + lfsck_layout_llst_put(llst); class_export_put(exp); spin_lock(&llsd->llsd_lock); @@ -3934,7 +4131,7 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, * be updated also. */ magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[0]); + objs = &lmm->lmm_objects[0]; } else { LASSERT(magic == LOV_MAGIC_V3); objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -4018,7 +4215,7 @@ static int lfsck_layout_slave_check_pairs(const struct lu_env *env, lr = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_PAIRS_VERIFY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *cfid; /* OST-object itself FID. */ lr->lr_fid2 = *pfid; /* The claimed parent FID. */ @@ -4053,7 +4250,7 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env, obj = lfsck_object_find_by_dev(env, dev, &lr->lr_fid); if (IS_ERR(obj)) - RETURN(PTR_ERR(obj)); + GOTO(log, rc = PTR_ERR(obj)); fid_cpu_to_le(&ff->ff_parent, &lr->lr_fid2); buf = lfsck_buf_get(env, ff, sizeof(*ff)); @@ -4084,6 +4281,11 @@ unlock: dt_write_unlock(env, obj); lu_object_put(env, &obj->do_lu); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave repaired pfid for "DFID + ", parent "DFID": rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + PFID(&lr->lr_fid), PFID(&lr->lr_fid2), rc); + return rc; } @@ -4113,6 +4315,9 @@ static int lfsck_layout_reset(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK reset: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); + return rc; } @@ -4167,6 +4372,10 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos [" + LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, rc); + return rc; } @@ -4181,7 +4390,6 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, return 0; down_write(&com->lc_sem); - if (init) { lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie; } else { @@ -4195,9 +4403,12 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, } rc = lfsck_layout_store(env, com); - up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos [" + LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, rc); + return rc; } @@ -4221,8 +4432,12 @@ static int lfsck_layout_prep(const struct lu_env *env, if (rc == 0) rc = lfsck_set_param(env, lfsck, start, true); - if (rc != 0) + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK prep failed: " + "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + return rc; + } } down_write(&com->lc_sem); @@ -4271,13 +4486,26 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, struct lfsck_start_param *lsp) { struct lfsck_layout_slave_data *llsd = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_start *start = lsp->lsp_start; int rc; rc = lfsck_layout_prep(env, com, start); - if (rc != 0 || !lsp->lsp_index_valid) + if (rc != 0) return rc; + if (lo->ll_flags & LF_CRASHED_LASTID && + list_empty(&llsd->llsd_master_list)) { + LASSERT(lfsck->li_out_notify != NULL); + + lfsck->li_out_notify(env, lfsck->li_out_notify_data, + LE_LASTID_REBUILDING); + } + + if (!lsp->lsp_index_valid) + return 0; + rc = lfsck_layout_llst_add(llsd, lsp->lsp_index); if (rc == 0 && start != NULL && start->ls_flags & LPF_ORPHAN) { LASSERT(!llsd->llsd_rbtree_valid); @@ -4287,6 +4515,10 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, write_unlock(&llsd->llsd_rb_lock); } + CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos [" + LPU64"]\n", lfsck_lfsck2name(lfsck), + com->lc_pos_start.lp_oit_cookie); + return rc; } @@ -4299,7 +4531,8 @@ static int lfsck_layout_master_prep(const struct lu_env *env, struct ptlrpc_thread *mthread = &lfsck->li_thread; struct ptlrpc_thread *athread = &llmd->llmd_thread; struct lfsck_thread_args *lta; - long rc; + struct task_struct *task; + int rc; ENTRY; rc = lfsck_layout_prep(env, com, lsp->lsp_start); @@ -4318,10 +4551,11 @@ static int lfsck_layout_master_prep(const struct lu_env *env, if (IS_ERR(lta)) RETURN(PTR_ERR(lta)); - rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout")); - if (IS_ERR_VALUE(rc)) { - CERROR("%s: Cannot start LFSCK layout assistant thread: " - "rc = %ld\n", lfsck_lfsck2name(lfsck), rc); + task = kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + CERROR("%s: cannot start LFSCK layout assistant thread: " + "rc = %d\n", lfsck_lfsck2name(lfsck), rc); lfsck_thread_args_fini(lta); } else { struct l_wait_info lwi = { 0 }; @@ -4336,6 +4570,10 @@ static int lfsck_layout_master_prep(const struct lu_env *env, rc = 0; } + CDEBUG(D_LFSCK, "%s: layout LFSCK master prep done, start pos [" + LPU64"\n", lfsck_lfsck2name(lfsck), + com->lc_pos_start.lp_oit_cookie); + RETURN(rc); } @@ -4374,7 +4612,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, * be updated also. */ magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &(lmm->lmm_objects[0]); + objs = &lmm->lmm_objects[0]; } else { LASSERT(magic == LOV_MAGIC_V3); objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -4390,7 +4628,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, le32_to_cpu(objs->l_ost_idx); bool wakeup = false; - if (is_dummy_lov_ost_data(objs)) + if (unlikely(lovea_slot_is_dummy(objs))) continue; l_wait_event(mthread->t_ctl_waitq, @@ -4408,8 +4646,8 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, ostid_to_fid(fid, oi, index); tgt = lfsck_tgt_get(ltds, index); if (unlikely(tgt == NULL)) { - CERROR("%s: Cannot talk with OST %x which did not join " - "the layout LFSCK.\n", + CDEBUG(D_LFSCK, "%s: cannot talk with OST %x which " + "did not join the layout LFSCK\n", lfsck_lfsck2name(lfsck), index); lo->ll_flags |= LF_INCOMPLETE; goto next; @@ -4515,6 +4753,7 @@ static int lfsck_layout_master_exec_oit(const struct lu_env *env, int rc = 0; bool locked = false; bool stripe = false; + bool bad_oi = false; ENTRY; if (!S_ISREG(lfsck_object_type(obj))) @@ -4545,8 +4784,7 @@ again: GOTO(out, stripe = true); /* Inconsistent lmm_oi, should be repaired. */ - CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n", - PFID(lfsck_dto2fid(obj))); + bad_oi = true; if (bk->lb_param & LPF_DRYRUN) { down_write(&com->lc_sem); @@ -4609,6 +4847,13 @@ out: dt_trans_stop(env, dev, handle); lfsck_layout_unlock(&lh); + + if (bad_oi) + CDEBUG(D_LFSCK, "%s: layout LFSCK master %s bad lmm_oi for " + DFID": rc = %d\n", lfsck_lfsck2name(lfsck), + bk->lb_param & LPF_DRYRUN ? "found" : "repaired", + PFID(lfsck_dto2fid(obj)), rc); + if (stripe) { rc = lfsck_layout_scan_stripes(env, com, obj, lmm); } else { @@ -4639,6 +4884,17 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, LASSERT(llsd != NULL); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY5) && + cfs_fail_val == lfsck_dev_idx(lfsck->li_bottom)) { + struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(1), + NULL, NULL); + struct ptlrpc_thread *thread = &lfsck->li_thread; + + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + } + lfsck_rbtree_update_bitmap(env, com, fid, false); down_write(&com->lc_sem); @@ -4661,6 +4917,9 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, lls->lls_seq = seq; rc = lfsck_layout_lastid_load(env, com, lls); if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " + "load LAST_ID for "LPX64": rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), seq, rc); lo->ll_objs_failed_phase1++; OBD_FREE_PTR(lls); GOTO(unlock, rc); @@ -4681,8 +4940,8 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, /* OFD may create new objects during LFSCK scanning. */ rc = lfsck_layout_lastid_reload(env, com, lls); if (unlikely(rc != 0)) - CWARN("%s: failed to reload LAST_ID for "LPX64 - ": rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " + "reload LAST_ID for "LPX64": rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc); if (oid <= lls->lls_lastid) @@ -4784,6 +5043,9 @@ static int lfsck_layout_master_post(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK master post done: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + RETURN(rc); } @@ -4803,7 +5065,6 @@ static int lfsck_layout_slave_post(const struct lu_env *env, LASSERT(lfsck->li_out_notify != NULL); down_write(&com->lc_sem); - spin_lock(&lfsck->li_lock); if (!init) lo->ll_pos_last_checkpoint = @@ -4846,7 +5107,6 @@ static int lfsck_layout_slave_post(const struct lu_env *env, } rc = lfsck_layout_store(env, com); - up_write(&com->lc_sem); lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result); @@ -4854,73 +5114,60 @@ static int lfsck_layout_slave_post(const struct lu_env *env, if (result <= 0) lfsck_rbtree_cleanup(env, com); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave post done: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + return rc; } static int lfsck_layout_dump(const struct lu_env *env, - struct lfsck_component *com, char *buf, int len) + struct lfsck_component *com, struct seq_file *m) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; - int save = len; - int ret = -ENOSPC; int rc; down_read(&com->lc_sem); - rc = snprintf(buf, len, - "name: lfsck_layout\n" + seq_printf(m, "name: lfsck_layout\n" "magic: %#x\n" "version: %d\n" "status: %s\n", lo->ll_magic, bk->lb_version, lfsck_status2names(lo->ll_status)); - if (rc <= 0) - goto out; - buf += rc; - len -= rc; - rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names, - "flags"); + rc = lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); if (rc < 0) goto out; - rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names, - "param"); + rc = lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete, + rc = lfsck_time_dump(m, lo->ll_time_last_complete, "time_since_last_completed"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start, + rc = lfsck_time_dump(m, lo->ll_time_latest_start, "time_since_latest_start"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint, + rc = lfsck_time_dump(m, lo->ll_time_last_checkpoint, "time_since_last_checkpoint"); if (rc < 0) goto out; - rc = snprintf(buf, len, - "latest_start_position: "LPU64"\n" + seq_printf(m, "latest_start_position: "LPU64"\n" "last_checkpoint_position: "LPU64"\n" "first_failure_position: "LPU64"\n", lo->ll_pos_latest_start, lo->ll_pos_last_checkpoint, lo->ll_pos_first_inconsistent); - if (rc <= 0) - goto out; - buf += rc; - len -= rc; - - rc = snprintf(buf, len, - "success_count: %u\n" + seq_printf(m, "success_count: %u\n" "repaired_dangling: "LPU64"\n" "repaired_unmatched_pair: "LPU64"\n" "repaired_multiple_referenced: "LPU64"\n" @@ -4940,11 +5187,6 @@ static int lfsck_layout_dump(const struct lu_env *env, lo->ll_objs_skipped, lo->ll_objs_failed_phase1, lo->ll_objs_failed_phase2); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; if (lo->ll_status == LS_SCANNING_PHASE1) { __u64 pos; @@ -4962,8 +5204,7 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(new_checked, duration); if (rtime != 0) do_div(speed, rtime); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" + seq_printf(m, "checked_phase1: "LPU64"\n" "checked_phase2: "LPU64"\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" @@ -4977,11 +5218,6 @@ static int lfsck_layout_dump(const struct lu_env *env, lo->ll_run_time_phase2, speed, new_checked); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; LASSERT(lfsck->li_di_oit != NULL); @@ -4994,12 +5230,8 @@ static int lfsck_layout_dump(const struct lu_env *env, pos = iops->store(env, lfsck->li_di_oit); if (!lfsck->li_current_oit_processed) pos--; - rc = snprintf(buf, len, "current_position: "LPU64"\n", pos); - if (rc <= 0) - goto out; + seq_printf(m, "current_position: "LPU64"\n", pos); - buf += rc; - len -= rc; } else if (lo->ll_status == LS_SCANNING_PHASE2) { cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; @@ -5017,29 +5249,26 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (rtime != 0) do_div(speed2, rtime); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" items/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: "LPU64" items/sec\n" - "current_position: "DFID"\n", - lo->ll_objs_checked_phase1, - checked, - lo->ll_run_time_phase1, - rtime, - speed1, - speed2, - new_checked, - PFID(&com->lc_fid_latest_scanned_phase2)); + rc = seq_printf(m, "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" items/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: "LPU64" items/sec\n" + "current_position: "DFID"\n", + lo->ll_objs_checked_phase1, + checked, + lo->ll_run_time_phase1, + rtime, + speed1, + speed2, + new_checked, + PFID(&com->lc_fid_latest_scanned_phase2)); if (rc <= 0) goto out; - buf += rc; - len -= rc; } else { __u64 speed1 = lo->ll_objs_checked_phase1; __u64 speed2 = lo->ll_objs_checked_phase2; @@ -5048,34 +5277,26 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (lo->ll_run_time_phase2 != 0) do_div(speed2, lo->ll_run_time_phase2); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: N/A\n" - "current_position: N/A\n", - lo->ll_objs_checked_phase1, - lo->ll_objs_checked_phase2, - lo->ll_run_time_phase1, - lo->ll_run_time_phase2, - speed1, - speed2); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; + seq_printf(m, "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" objs/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: N/A\n" + "current_position: N/A\n", + lo->ll_objs_checked_phase1, + lo->ll_objs_checked_phase2, + lo->ll_run_time_phase1, + lo->ll_run_time_phase2, + speed1, + speed2); } - ret = save - len; - out: up_read(&com->lc_sem); - return ret; + return rc; } static int lfsck_layout_master_double_scan(const struct lu_env *env, @@ -5118,6 +5339,9 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, RETURN(0); } + CDEBUG(D_LFSCK, "%s: layout LFSCK slave phase2 scan start\n", + lfsck_lfsck2name(lfsck)); + atomic_inc(&lfsck->li_double_scan_count); com->lc_new_checked = 0; @@ -5164,6 +5388,10 @@ done: if (atomic_dec_and_test(&lfsck->li_double_scan_count)) wake_up_all(&lfsck->li_thread.t_ctl_waitq); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave phase2 scan finished, " + "status %d: rc = %d\n", + lfsck_lfsck2name(lfsck), lo->ll_status, rc); + return rc; } @@ -5197,6 +5425,10 @@ static void lfsck_layout_master_data_release(const struct lu_env *env, ltd_layout_list) { list_del_init(<d->ltd_layout_list); } + spin_unlock(<ds->ltd_lock); + + ltds = &lfsck->li_mdt_descs; + spin_lock(<ds->ltd_lock); list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list, ltd_layout_phase_list) { list_del_init(<d->ltd_layout_phase_list); @@ -5286,6 +5518,11 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, RETURN(rc); } + CDEBUG(D_LFSCK, "%s: layout LFSCK master handle notify %u " + "from %s %x, status %d\n", lfsck_lfsck2name(lfsck), + lr->lr_event, (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + lr->lr_index, lr->lr_status); + if (lr->lr_event != LE_PHASE1_DONE && lr->lr_event != LE_PHASE2_DONE && lr->lr_event != LE_PEER_EXIT) @@ -5309,10 +5546,6 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, if (lr->lr_status <= 0) { ltd->ltd_layout_done = 1; list_del_init(<d->ltd_layout_list); - CWARN("%s: %s %x failed/stopped at phase1: rc = %d.\n", - lfsck_lfsck2name(lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index, lr->lr_status); lo->ll_flags |= LF_INCOMPLETE; fail = true; break; @@ -5340,13 +5573,8 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, fail = true; ltd->ltd_layout_done = 1; list_del_init(<d->ltd_layout_list); - if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) { - CWARN("%s: the peer %s %x exit layout LFSCK.\n", - lfsck_lfsck2name(lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index); + if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) lo->ll_flags |= LF_INCOMPLETE; - } break; default: break; @@ -5419,6 +5647,9 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env, } case LE_PHASE2_DONE: case LE_PEER_EXIT: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave handle notify %u " + "from MDT %x, status %d\n", lfsck_lfsck2name(lfsck), + lr->lr_event, lr->lr_index, lr->lr_status); break; default: RETURN(-EINVAL); @@ -5480,7 +5711,7 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_PEER_EXIT; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_status = LS_CO_PAUSED; if (ltds == &lfsck->li_ost_descs) lr->lr_flags = LEF_TO_OST; @@ -5496,7 +5727,8 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env, lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to notify %s %x " + "for co-stop: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); @@ -5518,10 +5750,17 @@ static int lfsck_layout_slave_join(const struct lu_env *env, int rc = 0; ENTRY; - if (!lsp->lsp_index_valid || start == NULL || - !(start->ls_flags & LPF_ALL_TGT) || - !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) - RETURN(-EALREADY); + if (start == NULL || !(start->ls_flags & LPF_ORPHAN)) + RETURN(0); + + if (!lsp->lsp_index_valid) + RETURN(-EINVAL); + + /* If someone is running the LFSCK without orphan handling, + * it will not maintain the object accessing rbtree. So we + * cannot join it for orphan handling. */ + if (!llsd->llsd_rbtree_valid) + RETURN(-EBUSY); spin_unlock(&lfsck->li_lock); rc = lfsck_layout_llst_add(llsd, lsp->lsp_index); @@ -5592,7 +5831,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) init_rwsem(&com->lc_sem); atomic_set(&com->lc_ref, 1); com->lc_lfsck = lfsck; - com->lc_type = LT_LAYOUT; + com->lc_type = LFSCK_TYPE_LAYOUT; if (lfsck->li_master) { struct lfsck_layout_master_data *llmd; @@ -5670,7 +5909,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) spin_unlock(&lfsck->li_lock); break; default: - CERROR("%s: unknown lfsck_layout status: rc = %u\n", + CERROR("%s: unknown lfsck_layout status %d\n", lfsck_lfsck2name(lfsck), lo->ll_status); /* fall through */ case LS_SCANNING_PHASE1: @@ -5705,8 +5944,11 @@ out: if (root != NULL && !IS_ERR(root)) lu_object_put(env, &root->do_lu); - if (rc != 0) + if (rc != 0) { lfsck_component_cleanup(env, com); + CERROR("%s: fail to init layout LFSCK component: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + } return rc; } @@ -5794,6 +6036,9 @@ static void lfsck_layout_destroy_orphan(const struct lu_env *env, stop: dt_trans_stop(env, dev, handle); + CDEBUG(D_LFSCK, "destroy orphan OST-object "DFID": rc = %d\n", + PFID(lfsck_dto2fid(obj)), rc); + RETURN_EXIT; } @@ -5860,7 +6105,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, if (unlikely(lfsck == NULL)) RETURN(ERR_PTR(-ENXIO)); - com = lfsck_component_find(lfsck, LT_LAYOUT); + com = lfsck_component_find(lfsck, LFSCK_TYPE_LAYOUT); if (unlikely(com == NULL)) GOTO(out, rc = -ENOENT); @@ -5914,6 +6159,10 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, out: if (com != NULL) lfsck_component_put(env, com); + + CDEBUG(D_LFSCK, "%s: init the orphan iteration: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + lfsck_instance_put(env, lfsck); if (rc != 0) { if (it != NULL) @@ -5934,6 +6183,9 @@ static void lfsck_orphan_it_fini(const struct lu_env *env, struct lfsck_layout_slave_target *llst; if (com != NULL) { + CDEBUG(D_LFSCK, "%s: fini the orphan iteration\n", + lfsck_lfsck2name(com->lc_lfsck)); + llsd = com->lc_data; read_unlock(&llsd->llsd_rb_lock); llst = it->loi_llst; @@ -6234,10 +6486,11 @@ static int lfsck_orphan_it_load(const struct lu_env *env, LASSERT(llst != NULL); if (hash != llst->llst_hash) { - CWARN("%s: the given hash "LPU64" for orphan iteration does " - "not match the one when fini "LPU64", to be reset.\n", - lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, - llst->llst_hash); + CDEBUG(D_LFSCK, "%s: the given hash "LPU64" for orphan " + "iteration does not match the one when fini " + LPU64", to be reset.\n", + lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, + llst->llst_hash); fid_zero(&llst->llst_fid); llst->llst_hash = 0; }