X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_layout.c;h=2ea8a49ca81eb555b7c4042743ea0f259890bcb0;hb=ab2dd91a18eb31d4fac35683eb4ebad7bdfc2499;hp=3ee62b21e3e1e4401a0fd85e1fd105025dc158a4;hpb=754bf71c650c427acfb0fe35017e8f9c1eb9fa7d;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 3ee62b2..2ea8a49 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -303,8 +302,7 @@ static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd) } static int lfsck_layout_get_lovea(const struct lu_env *env, - struct dt_object *obj, - struct lu_buf *buf, ssize_t *buflen) + struct dt_object *obj, struct lu_buf *buf) { int rc; @@ -317,9 +315,6 @@ again: return rc; lu_buf_realloc(buf, rc); - if (buflen != NULL) - *buflen = buf->lb_len; - if (buf->lb_buf == NULL) return -ENOMEM; @@ -334,9 +329,6 @@ again: if (unlikely(buf->lb_buf == NULL)) { lu_buf_alloc(buf, rc); - if (buflen != NULL) - *buflen = buf->lb_len; - if (buf->lb_buf == NULL) return -ENOMEM; @@ -359,7 +351,7 @@ static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm) struct ost_id oi; int rc; - lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi); + lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi); if ((magic & LOV_MAGIC_MASK) == LOV_MAGIC_MAGIC) rc = -EOPNOTSUPP; else @@ -377,7 +369,7 @@ static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm) if (lov_pattern(pattern) != LOV_PATTERN_RAID0) { struct ost_id oi; - lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi); + lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi); CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u on "DOSTID"\n", pattern, POSTID(&oi)); @@ -492,7 +484,7 @@ static struct lfsck_rbtree_node *lfsck_rbtree_new(const struct lu_env *env, return ERR_PTR(-ENOMEM); } - rb_init_node(&lrn->lrn_node); + RB_CLEAR_NODE(&lrn->lrn_node); lrn->lrn_seq = fid_seq(fid); lrn->lrn_first_oid = fid_oid(fid) & ~LFSCK_RBTREE_BITMAP_MASK; atomic_set(&lrn->lrn_known_count, 0); @@ -566,6 +558,9 @@ static int lfsck_rbtree_setup(const struct lu_env *env, llsd->llsd_rbtree_valid = 1; dev->dd_record_fid_accessed = 1; + CDEBUG(D_LFSCK, "%s: layout LFSCK init OST-objects accessing bitmap\n", + lfsck_lfsck2name(lfsck)); + return 0; } @@ -596,6 +591,9 @@ static void lfsck_rbtree_cleanup(const struct lu_env *env, lu_object_put(env, &llsd->llsd_rb_obj->do_lu); llsd->llsd_rb_obj = NULL; } + + CDEBUG(D_LFSCK, "%s: layout LFSCK fini OST-objects accessing bitmap\n", + lfsck_lfsck2name(lfsck)); } static void lfsck_rbtree_update_bitmap(const struct lu_env *env, @@ -610,9 +608,6 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env, int rc = 0; ENTRY; - CDEBUG(D_LFSCK, "%s: update bitmap for "DFID"\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(fid)); - if (unlikely(!fid_is_sane(fid) || fid_is_last_id(fid))) RETURN_EXIT; @@ -664,10 +659,12 @@ out: if (rc != 0 && accessed) { struct lfsck_layout *lo = com->lc_file_ram; - CERROR("%s: Fail to update object accessed bitmap, will cause " - "incorrect LFSCK OST-object handling, so disable it to " - "cancel orphan handling for related device. rc = %d.\n", + CDEBUG(D_LFSCK, "%s: fail to update OST-objects accessing " + "bitmap, and will cause incorrect LFSCK OST-object " + "handling, so disable it to cancel orphan handling " + "for related device. rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), rc); + lo->ll_flags |= LF_INCOMPLETE; lfsck_rbtree_cleanup(env, com); } @@ -751,20 +748,20 @@ static int lfsck_layout_load(const struct lu_env *env, if (rc == 0) { return -ENOENT; } else if (rc < 0) { - CWARN("%s: failed to load lfsck_layout: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), rc); + CDEBUG(D_LFSCK, "%s: failed to load lfsck_layout: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); return rc; } else if (rc != size) { - CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), rc); + CDEBUG(D_LFSCK, "%s: lfsck_layout size %u != %u; reset it\n", + lfsck_lfsck2name(com->lc_lfsck), rc, (unsigned int)size); return 1; } lfsck_layout_le_to_cpu(lo, com->lc_file_disk); if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) { - CWARN("%s: invalid lfsck_layout magic %#x != %#x, " - "to be reset\n", lfsck_lfsck2name(com->lc_lfsck), - lo->ll_magic, LFSCK_LAYOUT_MAGIC); + CDEBUG(D_LFSCK, "%s: invalid lfsck_layout magic %#x != %#x, " + "to be reset\n", lfsck_lfsck2name(com->lc_lfsck), + lo->ll_magic, LFSCK_LAYOUT_MAGIC); return 1; } @@ -785,39 +782,30 @@ static int lfsck_layout_store(const struct lu_env *env, lfsck_layout_cpu_to_le(lo, com->lc_file_ram); handle = dt_trans_create(env, lfsck->li_bottom); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - CERROR("%s: fail to create trans for storing lfsck_layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); - RETURN(rc); - } + if (IS_ERR(handle)) + GOTO(log, rc = PTR_ERR(handle)); rc = dt_declare_record_write(env, obj, lfsck_buf_get(env, lo, size), pos, handle); - if (rc != 0) { - CERROR("%s: fail to declare trans for storing lfsck_layout(1): " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + if (rc != 0) GOTO(out, rc); - } rc = dt_trans_start_local(env, lfsck->li_bottom, handle); - if (rc != 0) { - CERROR("%s: fail to start trans for storing lfsck_layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + if (rc != 0) GOTO(out, rc); - } rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos, handle); - if (rc != 0) - CERROR("%s: fail to store lfsck_layout(1): size = %d, " - "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc); GOTO(out, rc); out: dt_trans_stop(env, lfsck->li_bottom, handle); +log: + if (rc != 0) + CDEBUG(D_LFSCK, "%s: fail to store lfsck_layout: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); return rc; } @@ -916,9 +904,6 @@ lfsck_layout_lastid_create(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "To create LAST_ID for "LPX64"\n", - fid_seq(lfsck_dto2fid(obj))); - if (bk->lb_param & LPF_DRYRUN) return 0; @@ -929,7 +914,7 @@ lfsck_layout_lastid_create(const struct lu_env *env, th = dt_trans_create(env, dt); if (IS_ERR(th)) - RETURN(rc = PTR_ERR(th)); + GOTO(log, rc = PTR_ERR(th)); rc = dt_declare_create(env, obj, la, NULL, dof, th); if (rc != 0) @@ -947,7 +932,7 @@ lfsck_layout_lastid_create(const struct lu_env *env, GOTO(stop, rc); dt_write_lock(env, obj, 0); - if (likely(!dt_object_exists(obj))) { + if (likely(dt_object_exists(obj) == 0)) { rc = dt_create(env, obj, la, NULL, dof, th); if (rc == 0) rc = dt_record_write(env, obj, @@ -961,6 +946,11 @@ lfsck_layout_lastid_create(const struct lu_env *env, stop: dt_trans_stop(env, dt, th); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK will create LAST_ID for " + LPX64": rc = %d\n", + lfsck_lfsck2name(lfsck), fid_seq(lfsck_dto2fid(obj)), rc); + return rc; } @@ -993,6 +983,12 @@ lfsck_layout_lastid_reload(const struct lu_env *env, lfsck->li_out_notify(env, lfsck->li_out_notify_data, LE_LASTID_REBUILDING); lo->ll_flags |= LF_CRASHED_LASTID; + + CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " + "LAST_ID file (1) for the sequence "LPX64 + ", old value "LPU64", known value "LPU64"\n", + lfsck_lfsck2name(lfsck), lls->lls_seq, + lastid, lls->lls_lastid); } } else if (lastid >= lls->lls_lastid) { lls->lls_lastid = lastid; @@ -1019,30 +1015,12 @@ lfsck_layout_lastid_store(const struct lu_env *env, list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) { loff_t pos = 0; - /* XXX: Add the code back if we really found related - * inconsistent cases in the future. */ -#if 0 - if (!lls->lls_dirty) { - /* In OFD, before the pre-creation, the LAST_ID - * file will be updated firstly, which may hide - * some potential crashed cases. For example: - * - * The old obj1's ID is higher than old LAST_ID - * but lower than the new LAST_ID, but the LFSCK - * have not touch the obj1 until the OFD updated - * the LAST_ID. So the LFSCK does not regard it - * as crashed case. But when OFD does not create - * successfully, it will set the LAST_ID as the - * real created objects' ID, then LFSCK needs to - * found related inconsistency. */ - rc = lfsck_layout_lastid_reload(env, com, lls); - if (likely(!lls->lls_dirty)) - continue; - } -#endif + if (!lls->lls_dirty) + continue; - CDEBUG(D_LFSCK, "To sync the LAST_ID for "LPX64 - " as "LPU64"\n", lls->lls_seq, lls->lls_lastid); + CDEBUG(D_LFSCK, "%s: layout LFSCK will sync the LAST_ID for " + " "LPX64" as "LPU64"\n", + lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid); if (bk->lb_param & LPF_DRYRUN) { lls->lls_dirty = 0; @@ -1052,7 +1030,8 @@ lfsck_layout_lastid_store(const struct lu_env *env, th = dt_trans_create(env, dt); if (IS_ERR(th)) { rc1 = PTR_ERR(th); - CERROR("%s: (1) failed to store "LPX64": rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " + "the LAST_ID for "LPX64"(1): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); continue; @@ -1082,7 +1061,8 @@ stop: dt_trans_stop(env, dt, th); if (rc != 0) { rc1 = rc; - CERROR("%s: (2) failed to store "LPX64": rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " + "the LAST_ID for "LPX64"(2): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); } @@ -1110,7 +1090,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, RETURN(PTR_ERR(obj)); /* LAST_ID crashed, to be rebuilt */ - if (!dt_object_exists(obj)) { + if (dt_object_exists(obj) == 0) { if (!(lo->ll_flags & LF_CRASHED_LASTID)) { LASSERT(lfsck->li_out_notify != NULL); @@ -1118,6 +1098,10 @@ lfsck_layout_lastid_load(const struct lu_env *env, LE_LASTID_REBUILDING); lo->ll_flags |= LF_CRASHED_LASTID; + CDEBUG(D_LFSCK, "%s: layout LFSCK cannot find the " + "LAST_ID file for sequence "LPX64"\n", + lfsck_lfsck2name(lfsck), lls->lls_seq); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) && cfs_fail_val > 0) { struct l_wait_info lwi = LWI_TIMEOUT( @@ -1148,6 +1132,11 @@ lfsck_layout_lastid_load(const struct lu_env *env, lfsck->li_out_notify(env, lfsck->li_out_notify_data, LE_LASTID_REBUILDING); lo->ll_flags |= LF_CRASHED_LASTID; + + CDEBUG(D_LFSCK, "%s: layout LFSCK finds invalid " + "LAST_ID file for the sequence "LPX64 + ": rc = %d\n", + lfsck_lfsck2name(lfsck), lls->lls_seq, rc); } lls->lls_lastid = le64_to_cpu(lls->lls_lastid); @@ -1170,10 +1159,16 @@ static void lfsck_layout_record_failure(const struct lu_env *env, struct lfsck_layout *lo) { lo->ll_objs_failed_phase1++; - if (unlikely(lo->ll_pos_first_inconsistent == 0)) + if (unlikely(lo->ll_pos_first_inconsistent == 0)) { lo->ll_pos_first_inconsistent = lfsck->li_obj_oit->do_index_ops->dio_it.store(env, lfsck->li_di_oit); + + CDEBUG(D_LFSCK, "%s: layout LFSCK hit first non-repaired " + "inconsistency at the pos ["LPU64"]\n", + lfsck_lfsck2name(lfsck), + lo->ll_pos_first_inconsistent); + } } static int lfsck_layout_master_async_interpret(const struct lu_env *env, @@ -1192,8 +1187,9 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, if (rc != 0) { struct lfsck_layout *lo = com->lc_file_ram; - CERROR("%s: fail to notify %s %x for layout start: " - "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for layout " + "start: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); lo->ll_flags |= LF_INCOMPLETE; @@ -1228,11 +1224,11 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, case LE_PHASE2_DONE: case LE_PEER_EXIT: if (rc != 0 && rc != -EALREADY) - CWARN("%s: fail to notify %s %x for layout: " - "event = %d, rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index, lr->lr_event, rc); + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for layout: " + "event = %d, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lr->lr_event, rc); break; case LE_QUERY: { struct lfsck_reply *reply; @@ -1249,7 +1245,7 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, &RMF_LFSCK_REPLY); if (reply == NULL) { rc = -EPROTO; - CERROR("%s: invalid return value: rc = %d\n", + CDEBUG(D_LFSCK, "%s: invalid query reply: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), rc); spin_lock(<ds->ltd_lock); list_del_init(<d->ltd_layout_phase_list); @@ -1287,7 +1283,7 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env, break; } default: - CERROR("%s: unexpected event: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK unexpected event: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lr->lr_event); break; } @@ -1324,7 +1320,7 @@ static int lfsck_layout_master_query_others(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -1351,8 +1347,7 @@ again: break; ltd->ltd_layout_gen = llmd->llmd_touch_gen; - list_del(<d->ltd_layout_phase_list); - list_add_tail(<d->ltd_layout_phase_list, head); + list_move_tail(<d->ltd_layout_phase_list, head); atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); @@ -1360,8 +1355,8 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_QUERY); if (rc != 0) { - CERROR("%s: fail to query %s %x for layout: rc = %d\n", - lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to query %s %x: " + "rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); lfsck_tgt_put(ltd); @@ -1418,7 +1413,7 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, RETURN(-ENOMEM); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -1438,8 +1433,8 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify %s %x for layout " - "start: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify %s %x for start: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", idx, rc); @@ -1460,6 +1455,7 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, break; /* link other MDT targets locallly. */ + ltds = &lfsck->li_mdt_descs; spin_lock(<ds->ltd_lock); cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { ltd = LTD_TGT(ltds, idx); @@ -1520,8 +1516,9 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify %s %x for layout " - "stop/phase2: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify %s %x for stop/phase2_done/" + "peer_exit: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); @@ -1558,9 +1555,8 @@ again: break; ltd->ltd_layout_gen = llmd->llmd_touch_gen; - list_del_init(<d->ltd_layout_phase_list); - list_add_tail(<d->ltd_layout_phase_list, - &llmd->llmd_mdt_phase1_list); + list_move_tail(<d->ltd_layout_phase_list, + &llmd->llmd_mdt_phase1_list); atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; spin_unlock(<ds->ltd_lock); @@ -1568,9 +1564,9 @@ again: lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: fail to notify MDT %x for layout " - "phase1 done: rc = %d\n", - lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to " + "notify MDT %x for phase1_done: " + "rc = %d\n", lfsck_lfsck2name(lfsck), ltd->ltd_index, rc); lfsck_tgt_put(ltd); } @@ -1579,7 +1575,7 @@ again: spin_unlock(<ds->ltd_lock); break; default: - CERROR("%s: unexpected LFSCK event: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK unexpected event: rc = %d\n", lfsck_lfsck2name(lfsck), lr->lr_event); rc = -EINVAL; break; @@ -1629,45 +1625,6 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, return rc; } -static int lfsck_layout_lock(const struct lu_env *env, - struct lfsck_component *com, - struct dt_object *obj, - struct lustre_handle *lh, __u64 bits) -{ - struct lfsck_thread_info *info = lfsck_env_info(env); - ldlm_policy_data_t *policy = &info->lti_policy; - struct ldlm_res_id *resid = &info->lti_resid; - struct lfsck_instance *lfsck = com->lc_lfsck; - __u64 flags = LDLM_FL_ATOMIC_CB; - int rc; - - LASSERT(lfsck->li_namespace != NULL); - - memset(policy, 0, sizeof(*policy)); - policy->l_inodebits.bits = bits; - fid_build_reg_res_name(lfsck_dto2fid(obj), resid); - rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS, - policy, LCK_EX, &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, - LVB_T_NONE, NULL, lh); - if (rc == ELDLM_OK) { - rc = 0; - } else { - memset(lh, 0, sizeof(*lh)); - rc = -EIO; - } - - return rc; -} - -static void lfsck_layout_unlock(struct lustre_handle *lh) -{ - if (lustre_handle_is_used(lh)) { - ldlm_lock_decref(lh, LCK_EX); - memset(lh, 0, sizeof(*lh)); - } -} - static int lfsck_layout_trans_stop(const struct lu_env *env, struct dt_device *dev, struct thandle *handle, int result) @@ -1738,7 +1695,13 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, { struct ost_id *oi = &lfsck_env_info(env)->lti_oi; struct lov_mds_md_v1 *lmm = buf->lb_buf; + struct lu_buf ea_buf; int rc; + __u32 magic; + __u16 count; + + magic = le32_to_cpu(lmm->lmm_magic); + count = le16_to_cpu(lmm->lmm_stripe_count); fid_to_ostid(cfid, oi); ostid_cpu_to_le(oi, &slot->l_ost_oi); @@ -1748,10 +1711,8 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE) { struct lov_ost_data_v1 *objs; int i; - __u16 count; - count = le16_to_cpu(lmm->lmm_stripe_count); - if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) + if (magic == LOV_MAGIC_V1) objs = &lmm->lmm_objects[0]; else objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; @@ -1766,7 +1727,8 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, lmm->lmm_pattern &= ~cpu_to_le32(LOV_PATTERN_F_HOLE); } - rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV, fl, handle, + lfsck_buf_init(&ea_buf, lmm, lov_mds_md_size(count, magic)); + rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, fl, handle, BYPASS_CAPA); if (rc == 0) rc = 1; @@ -1791,16 +1753,19 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env, struct lov_ost_data_v1 *objs; int rc; __u16 count; + bool hole = false; ENTRY; if (fl == LU_XATTR_CREATE || reset) { __u32 pattern = LOV_PATTERN_RAID0; count = ea_off + 1; - LASSERT(buf->lb_len == lov_mds_md_size(count, LOV_MAGIC_V1)); + LASSERT(buf->lb_len >= lov_mds_md_size(count, LOV_MAGIC_V1)); - if (ea_off != 0 || reset) + if (ea_off != 0 || reset) { pattern |= LOV_PATTERN_F_HOLE; + hole = true; + } memset(lmm, 0, buf->lb_len); lmm->lmm_magic = cpu_to_le32(LOV_MAGIC_V1); @@ -1828,11 +1793,12 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env, gap = ea_off - count; if (gap >= 0) count = ea_off + 1; - LASSERT(buf->lb_len == lov_mds_md_size(count, magic)); + LASSERT(buf->lb_len >= lov_mds_md_size(count, magic)); if (gap > 0) { memset(objs, 0, gap * sizeof(*objs)); lmm->lmm_pattern |= cpu_to_le32(LOV_PATTERN_F_HOLE); + hole = true; } lmm->lmm_layout_gen = @@ -1844,6 +1810,13 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env, rc = lfsck_layout_refill_lovea(env, handle, parent, cfid, buf, objs, fl, ost_idx); + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant extend layout EA for " + DFID": parent "DFID", OST-index %u, stripe-index %u, fl %d, " + "reset %s, %s LOV EA hole: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), + ost_idx, ea_off, fl, reset ? "yes" : "no", + hole ? "with" : "without", rc); + RETURN(rc); } @@ -1905,20 +1878,61 @@ out: } /** - * \retval +1: repaired - * \retval 0: did nothing - * \retval -ve: on error + * This function will create the MDT-object with the given (partial) LOV EA. + * + * Under some data corruption cases, the MDT-object of the file may be lost, + * but its OST-objects, or some of them are there. The layout LFSCK needs to + * re-create the MDT-object with the orphan OST-object(s) information. + * + * On the other hand, the LFSCK may has created some OST-object for repairing + * dangling LOV EA reference, but as the LFSCK processing, it may find that + * the old OST-object is there and should replace the former new created OST + * object. Unfortunately, some others have modified such newly created object. + * To keep the data (both new and old), the LFSCK will create MDT-object with + * new FID to reference the original OST-object. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] ltd pointer to target device descriptor + * \param[in] rec pointer to the record for the orphan OST-object + * \param[in] cfid pointer to FID for the orphan OST-object + * \param[in] infix additional information, such as the FID for original + * MDT-object and the stripe offset in the LOV EA + * \param[in] type the type for describing why the orphan MDT-object is + * created. The rules are as following: + * + * type "C": Multiple OST-objects claim the same MDT-object and the + * same slot in the layout EA. Then the LFSCK will create + * new MDT-object(s) to hold the conflict OST-object(s). + * + * type "N": The orphan OST-object does not know which one was the + * real parent MDT-object, so the LFSCK uses new FID for + * its parent MDT-object. + * + * type "R": The orphan OST-object knows its parent MDT-object FID, + * but does not know the position (the file name) in the + * namespace. + * + * The orphan name will be like: + * ${FID}-${infix}-${type}-${conflict_version} + * + * \param[in] ea_off the stripe offset in the LOV EA + * + * \retval positive on repaired something + * \retval 0 if needs to repair nothing + * \retval negative error number on failure */ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, struct lu_orphan_rec *rec, struct lu_fid *cfid, - const char *prefix, - const char *postfix, + const char *infix, + const char *type, __u32 ea_off) { struct lfsck_thread_info *info = lfsck_env_info(env); + struct dt_insert_rec *dtrec = &info->lti_dt_rec; char *name = info->lti_key; struct lu_attr *la = &info->lti_la; struct dt_object_format *dof = &info->lti_dof; @@ -1929,10 +1943,14 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct dt_object *pobj = NULL; struct dt_object *cobj = NULL; struct thandle *th = NULL; - struct lu_buf *pbuf = NULL; + struct lu_buf pbuf = { 0 }; struct lu_buf *ea_buf = &info->lti_big_buf; + struct lu_buf lov_buf; struct lustre_handle lh = { 0 }; - int buflen = ea_buf->lb_len; + struct linkea_data ldata = { 0 }; + struct lu_buf linkea_buf; + const struct lu_name *pname; + int size = 0; int idx = 0; int rc = 0; ENTRY; @@ -1941,7 +1959,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, if (unlikely(lfsck->li_lpf_obj == NULL)) { rc = lfsck_create_lpf(env, lfsck); if (rc != 0) - RETURN(rc); + GOTO(log, rc); } if (fid_is_zero(pfid)) { @@ -1957,56 +1975,38 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, * real parent MDT-object's FID::f_ver, instead it is the * OST-object index in its parent MDT-object's layout EA. */ ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); - pbuf = lfsck_buf_get(env, ff, sizeof(struct filter_fid)); + lfsck_buf_init(&pbuf, ff, sizeof(struct filter_fid)); cobj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, cfid); if (IS_ERR(cobj)) - RETURN(PTR_ERR(cobj)); + GOTO(log, rc = PTR_ERR(cobj)); } - CDEBUG(D_LFSCK, "Re-create the lost MDT-object: parent " - DFID", child "DFID", OST-index %u, stripe-index %u, " - "prefix %s, postfix %s\n", - PFID(pfid), PFID(cfid), ltd->ltd_index, ea_off, prefix, postfix); - pobj = lfsck_object_find_by_dev(env, lfsck->li_bottom, pfid); if (IS_ERR(pobj)) GOTO(put, rc = PTR_ERR(pobj)); - LASSERT(prefix != NULL); - LASSERT(postfix != NULL); + LASSERT(infix != NULL); + LASSERT(type != NULL); - /** name rules: - * - * 1. Use the MDT-object's FID as the name with prefix and postfix. - * - * 1.1 prefix "C-": More than one OST-objects claim the same - * MDT-object and the same slot in the layout EA. - * It may be created for dangling referenced MDT - * object or may be not. - * 1.2 prefix "N-": The orphan OST-object does not know which one - * is the real parent, so the LFSCK assign a new - * FID as its parent. - * 1.3 prefix "R-": The orphan OST-object know its parent FID but - * does not know the position in the namespace. - * - * 2. If there is name conflict, append more index for new name. */ - sprintf(name, "%s"DFID"%s", prefix, PFID(pfid), postfix); do { + snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, + type, idx++); rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, (const struct dt_key *)name, BYPASS_CAPA); if (rc != 0 && rc != -ENOENT) GOTO(put, rc); - - if (unlikely(rc == 0)) { - CWARN("%s: The name %s under lost+found has been used " - "by the "DFID". Try to increase the FID version " - "for the new file name.\n", - lfsck_lfsck2name(lfsck), name, PFID(tfid)); - sprintf(name, "%s"DFID"%s-%d", prefix, PFID(pfid), - postfix, ++idx); - } } while (rc == 0); + rc = linkea_data_new(&ldata, + &lfsck_env_info(env)->lti_linkea_buf); + if (rc != 0) + GOTO(put, rc); + + pname = lfsck_name_get_const(env, name, strlen(name)); + rc = linkea_add_buf(&ldata, pname, lfsck_dto2fid(lfsck->li_lpf_obj)); + if (rc != 0) + GOTO(put, rc); + memset(la, 0, sizeof(*la)); la->la_uid = rec->lor_uid; la->la_gid = rec->lor_gid; @@ -2016,14 +2016,11 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, memset(dof, 0, sizeof(*dof)); dof->dof_type = dt_mode_to_dft(S_IFREG); - rc = lov_mds_md_size(ea_off + 1, LOV_MAGIC_V1); - if (buflen < rc) { - lu_buf_realloc(ea_buf, rc); - buflen = ea_buf->lb_len; + size = lov_mds_md_size(ea_off + 1, LOV_MAGIC_V1); + if (ea_buf->lb_len < size) { + lu_buf_realloc(ea_buf, size); if (ea_buf->lb_buf == NULL) GOTO(put, rc = -ENOMEM); - } else { - ea_buf->lb_len = rc; } /* Hold update lock on the .lustre/lost+found/MDTxxxx/. @@ -2032,8 +2029,8 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, * because creating MDT-object for orphan OST-object is rare, we * do not much care about the performance. It can be improved in * the future when needed. */ - rc = lfsck_layout_lock(env, com, lfsck->li_lpf_obj, &lh, - MDS_INODELOCK_UPDATE); + rc = lfsck_ibits_lock(env, lfsck, lfsck->li_lpf_obj, &lh, + MDS_INODELOCK_UPDATE, LCK_EX); if (rc != 0) GOTO(put, rc); @@ -2046,7 +2043,8 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, * If other subsequent modifications failed, then next LFSCK scanning * will process the OST-object as orphan again with known parent FID. */ if (cobj != NULL) { - rc = dt_declare_xattr_set(env, cobj, pbuf, XATTR_NAME_FID, 0, th); + rc = dt_declare_xattr_set(env, cobj, &pbuf, XATTR_NAME_FID, + 0, th); if (rc != 0) GOTO(stop, rc); } @@ -2057,25 +2055,36 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, GOTO(stop, rc); /* 3a. Add layout EA for the MDT-object. */ - rc = dt_declare_xattr_set(env, pobj, ea_buf, XATTR_NAME_LOV, + lfsck_buf_init(&lov_buf, ea_buf->lb_buf, size); + rc = dt_declare_xattr_set(env, pobj, &lov_buf, XATTR_NAME_LOV, LU_XATTR_CREATE, th); if (rc != 0) GOTO(stop, rc); /* 4a. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ + dtrec->rec_fid = pfid; + dtrec->rec_type = S_IFREG; rc = dt_declare_insert(env, lfsck->li_lpf_obj, - (const struct dt_rec *)pfid, + (const struct dt_rec *)dtrec, (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); + /* 5a. insert linkEA for parent. */ + lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, + ldata.ld_leh->leh_len); + rc = dt_declare_xattr_set(env, pobj, &linkea_buf, + XATTR_NAME_LINK, 0, th); + if (rc != 0) + GOTO(stop, rc); + rc = dt_trans_start(env, next, th); if (rc != 0) GOTO(stop, rc); /* 1b. Update OST-object's parent information remotely. */ if (cobj != NULL) { - rc = dt_xattr_set(env, cobj, pbuf, XATTR_NAME_FID, 0, th, + rc = dt_xattr_set(env, cobj, &pbuf, XATTR_NAME_FID, 0, th, BYPASS_CAPA); if (rc != 0) GOTO(stop, rc); @@ -2087,16 +2096,21 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, if (rc == 0) /* 3b. Add layout EA for the MDT-object. */ rc = lfsck_layout_extend_lovea(env, lfsck, th, pobj, cfid, - ea_buf, LU_XATTR_CREATE, + &lov_buf, LU_XATTR_CREATE, ltd->ltd_index, ea_off, false); dt_write_unlock(env, pobj); if (rc < 0) GOTO(stop, rc); /* 4b. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ - rc = dt_insert(env, lfsck->li_lpf_obj, - (const struct dt_rec *)pfid, + rc = dt_insert(env, lfsck->li_lpf_obj, (const struct dt_rec *)dtrec, (const struct dt_key *)name, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(stop, rc); + + /* 5b. insert linkEA for parent. */ + rc = dt_xattr_set(env, pobj, &linkea_buf, + XATTR_NAME_LINK, 0, th, BYPASS_CAPA); GOTO(stop, rc); @@ -2104,14 +2118,22 @@ stop: dt_trans_stop(env, next, th); unlock: - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); put: if (cobj != NULL && !IS_ERR(cobj)) lu_object_put(env, &cobj->do_lu); if (pobj != NULL && !IS_ERR(pobj)) lu_object_put(env, &pobj->do_lu); - ea_buf->lb_len = buflen; + +log: + if (rc < 0) + CDEBUG(D_LFSCK, "%s layout LFSCK assistant failed to " + "recreate the lost MDT-object: parent "DFID + ", child "DFID", OST-index %u, stripe-index %u, " + "infix %s, type %s: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), PFID(cfid), + ltd->ltd_index, ea_off, infix, type, rc); return rc >= 0 ? 1 : rc; } @@ -2152,7 +2174,7 @@ static int lfsck_layout_master_conditional_destroy(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_CONDITIONAL_DESTROY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *fid; tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); @@ -2193,7 +2215,8 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, RETURN(PTR_ERR(obj)); dt_read_lock(env, obj, 0); - if (dt_object_exists(obj) == 0) { + if (dt_object_exists(obj) == 0 || + lfsck_is_dead_obj(obj)) { dt_read_unlock(env, obj); GOTO(put, rc = -ENOENT); @@ -2252,10 +2275,11 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, rc = dt_destroy(env, obj, th); if (rc == 0) - CDEBUG(D_LFSCK, "Destroy the empty OST-object "DFID" which " - "was created for reparing dangling referenced case. " - "But the original missed OST-object is found now.\n", - PFID(fid)); + CDEBUG(D_LFSCK, "%s: layout LFSCK destroyed the empty " + "OST-object "DFID" that was created for reparing " + "dangling referenced case. But the original missed " + "OST-object is found now.\n", + lfsck_lfsck2name(lfsck), PFID(fid)); GOTO(stop, rc); @@ -2293,12 +2317,12 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, struct lu_fid *cfid, struct lu_buf *ea_buf, struct lov_ost_data_v1 *slot, - __u32 ea_off, __u32 ori_len) + __u32 ea_off) { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_fid *cfid2 = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; - char *postfix = info->lti_tmpbuf; + char *infix = info->lti_tmpbuf; struct lov_mds_md_v1 *lmm = ea_buf->lb_buf; struct dt_device *dev = com->lc_lfsck->li_bottom; struct thandle *th = NULL; @@ -2308,17 +2332,14 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, ENTRY; ostid_le_to_cpu(&slot->l_ost_oi, oi); - ostid_to_fid(cfid2, oi, ost_idx2); - - CDEBUG(D_LFSCK, "Handle layout EA conflict: parent "DFID - ", cur-child "DFID" on the OST %u, orphan-child " - DFID" on the OST %u, stripe-index %u\n", - PFID(lfsck_dto2fid(parent)), PFID(cfid2), ost_idx2, - PFID(cfid), ltd->ltd_index, ea_off); + rc = ostid_to_fid(cfid2, oi, ost_idx2); + if (rc != 0) + GOTO(out, rc); /* Hold layout lock on the parent to prevent others to access. */ - rc = lfsck_layout_lock(env, com, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); + rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); if (rc != 0) GOTO(out, rc); @@ -2330,14 +2351,13 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, * a new MDT-object for the orphan OST-object. */ if (rc == -ETXTBSY) { /* No need the layout lock on the original parent. */ - lfsck_layout_unlock(&lh); - ea_buf->lb_len = ori_len; + lfsck_ibits_unlock(&lh, LCK_EX); fid_zero(&rec->lor_fid); - snprintf(postfix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", + snprintf(infix, LFSCK_TMPBUF_LEN, "-"DFID"-%x", PFID(lu_object_fid(&parent->do_lu)), ea_off); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "C-", postfix, ea_off); + infix, "C", ea_off); RETURN(rc); } @@ -2370,10 +2390,15 @@ stop: dt_trans_stop(env, dev, th); unlock: - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); out: - ea_buf->lb_len = ori_len; + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant replaced the conflict " + "OST-object "DFID" on the OST %x with the orphan "DFID" on " + "the OST %x: parent "DFID", stripe-index %u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(cfid2), ost_idx2, + PFID(cfid), ltd->ltd_index, PFID(lfsck_dto2fid(parent)), + ea_off, rc); return rc >= 0 ? 1 : rc; } @@ -2399,7 +2424,7 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, struct dt_device *dt = lfsck->li_bottom; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct thandle *handle = NULL; - size_t buflen = buf->lb_len; + size_t lovea_size; struct lov_mds_md_v1 *lmm; struct lov_ost_data_v1 *objs; struct lustre_handle lh = { 0 }; @@ -2412,14 +2437,18 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, bool locked = false; ENTRY; - CDEBUG(D_LFSCK, "Re-create the crashed layout EA: parent " - DFID", child "DFID", OST-index %u, stripe-index %u\n", - PFID(lfsck_dto2fid(parent)), PFID(cfid), ost_idx, ea_off); + rc = lfsck_ibits_lock(env, lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to recreate " + "LOV EA for "DFID": parent "DFID", OST-index %u, " + "stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, rc); - rc = lfsck_layout_lock(env, com, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); - if (rc != 0) RETURN(rc); + } again: if (locked) { @@ -2435,9 +2464,9 @@ again: if (rc < 0) GOTO(unlock_layout, rc); - if (buf->lb_len < rc) { - lu_buf_realloc(buf, rc); - buflen = buf->lb_len; + lovea_size = rc; + if (buf->lb_len < lovea_size) { + lu_buf_realloc(buf, lovea_size); if (buf->lb_buf == NULL) GOTO(unlock_layout, rc = -ENOMEM); } @@ -2466,11 +2495,12 @@ again: LASSERT(rc != 0); goto again; } else if (rc == -ENODATA || rc == 0) { - rc = lov_mds_md_size(ea_off + 1, LOV_MAGIC_V1); + lovea_size = lov_mds_md_size(ea_off + 1, LOV_MAGIC_V1); /* If the declared is not big enough, re-try. */ - if (buf->lb_len < rc) + if (buf->lb_len < lovea_size) { + rc = lovea_size; goto again; - + } fl = LU_XATTR_CREATE; } else if (rc < 0) { GOTO(unlock_parent, rc); @@ -2478,15 +2508,15 @@ again: goto again; } else { fl = LU_XATTR_REPLACE; + lovea_size = rc; } if (fl == LU_XATTR_CREATE) { if (bk->lb_param & LPF_DRYRUN) GOTO(unlock_parent, rc = 1); - LASSERT(buf->lb_len >= rc); + LASSERT(buf->lb_len >= lovea_size); - buf->lb_len = rc; rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, buf, fl, ost_idx, ea_off, false); @@ -2501,10 +2531,8 @@ again: if (bk->lb_param & LPF_DRYRUN) GOTO(unlock_parent, rc = 1); - LASSERT(buf->lb_len >= rc); + LASSERT(buf->lb_len >= lovea_size); - buf->lb_len = rc; - memset(lmm, 0, buf->lb_len); rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, buf, fl, ost_idx, ea_off, true); @@ -2537,12 +2565,13 @@ again: if (bk->lb_param & LPF_DRYRUN) GOTO(unlock_parent, rc = 1); - rc = lov_mds_md_size(ea_off + 1, magic); + lovea_size = lov_mds_md_size(ea_off + 1, magic); /* If the declared is not big enough, re-try. */ - if (buf->lb_len < rc) + if (buf->lb_len < lovea_size) { + rc = lovea_size; goto again; + } - buf->lb_len = rc; rc = lfsck_layout_extend_lovea(env, lfsck, handle, parent, cfid, buf, fl, ost_idx, ea_off, false); @@ -2551,7 +2580,6 @@ again: LASSERTF(rc > 0, "invalid rc = %d\n", rc); - buf->lb_len = rc; for (i = 0; i < count; i++, objs++) { /* The MDT-object was created via lfsck_layout_recover_create() * by others before, and we fill the dummy layout EA. */ @@ -2567,11 +2595,28 @@ again: rc = lfsck_layout_refill_lovea(env, handle, parent, cfid, buf, objs, fl, ost_idx); + + CDEBUG(D_LFSCK, "%s layout LFSCK assistant fill " + "dummy layout slot for "DFID": parent "DFID + ", OST-index %u, stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), ost_idx, i, rc); + GOTO(unlock_parent, rc); } ostid_le_to_cpu(&objs->l_ost_oi, oi); - ostid_to_fid(fid, oi, le32_to_cpu(objs->l_ost_idx)); + rc = ostid_to_fid(fid, oi, le32_to_cpu(objs->l_ost_idx)); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: the parent "DFID" contains " + "invalid layout EA at the slot %d, index %u\n", + lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(parent)), i, + le32_to_cpu(objs->l_ost_idx)); + + GOTO(unlock_parent, rc); + } + /* It should be rare case, the slot is there, but the LFSCK * does not handle it during the first-phase cycle scanning. */ if (unlikely(lu_fid_eq(fid, cfid))) { @@ -2587,11 +2632,18 @@ again: dt_write_unlock(env, parent); if (handle != NULL) dt_trans_stop(env, dt, handle); - lfsck_layout_unlock(&lh); - buf->lb_len = buflen; + lfsck_ibits_unlock(&lh, LCK_EX); rc = lfsck_layout_update_pfid(env, com, parent, cfid, ltd->ltd_tgt, i); + CDEBUG(D_LFSCK, "%s layout LFSCK assistant " + "updated OST-object's pfid for "DFID + ": parent "DFID", OST-index %u, " + "stripe-index %u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(cfid), + PFID(lfsck_dto2fid(parent)), + ltd->ltd_index, i, rc); + RETURN(rc); } } @@ -2605,13 +2657,13 @@ again: dt_write_unlock(env, parent); if (handle != NULL) dt_trans_stop(env, dt, handle); - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1) objs = &lmm->lmm_objects[ea_off]; else objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[ea_off]; rc = lfsck_layout_conflict_create(env, com, ltd, rec, parent, cfid, - buf, objs, ea_off, buflen); + buf, objs, ea_off); RETURN(rc); @@ -2624,8 +2676,7 @@ stop: dt_trans_stop(env, dt, handle); unlock_layout: - lfsck_layout_unlock(&lh); - buf->lb_len = buflen; + lfsck_ibits_unlock(&lh, LCK_EX); return rc; } @@ -2648,7 +2699,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (fid_is_zero(pfid)) { rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "N-", "", ea_off); + "", "N", ea_off); GOTO(out, rc); } @@ -2666,7 +2717,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (dt_object_exists(parent) == 0) { lu_object_put(env, &parent->do_lu); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, - "R-", "", ea_off); + "", "R", ea_off); GOTO(out, rc); } @@ -2716,15 +2767,19 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, int rc = 0; ENTRY; - CDEBUG(D_LFSCK, "%s: start the orphan scanning for OST%04x\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant starts the orphan " + "scanning for OST%04x\n", lfsck_lfsck2name(lfsck), ltd->ltd_index); ostid_set_seq(oi, FID_SEQ_IDIF); ostid_set_id(oi, 0); - ostid_to_fid(fid, oi, ltd->ltd_index); + rc = ostid_to_fid(fid, oi, ltd->ltd_index); + if (rc != 0) + GOTO(log, rc); + obj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, fid); if (unlikely(IS_ERR(obj))) - RETURN(PTR_ERR(obj)); + GOTO(log, rc = PTR_ERR(obj)); rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_orphan_features); if (rc != 0) @@ -2793,7 +2848,9 @@ fini: put: lu_object_put(env, &obj->do_lu); - CDEBUG(D_LFSCK, "%s: finish the orphan scanning for OST%04x, rc = %d\n", +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant finished the orphan " + "scanning for OST%04x: rc = %d\n", lfsck_lfsck2name(lfsck), ltd->ltd_index, rc); return rc > 0 ? 0 : rc; @@ -2833,15 +2890,8 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, else create = false; - CDEBUG(D_LFSCK, "Found dangling reference for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u. %s", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid, - create ? "Create the lost OST-object as required.\n" : - "Keep the MDT-object there by default.\n"); - if (!create) - RETURN(1); + GOTO(log, rc = 1); memset(cla, 0, sizeof(*cla)); cla->la_uid = pla->la_uid; @@ -2850,10 +2900,11 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | LA_ATIME | LA_MTIME | LA_CTIME; - rc = lfsck_layout_lock(env, com, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); + rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) @@ -2883,7 +2934,7 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, GOTO(stop, rc); dt_read_lock(env, parent, 0); - if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 1); rc = dt_create(env, child, cla, hint, NULL, handle); @@ -2902,7 +2953,17 @@ stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); unlock1: - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); + +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found dangling " + "reference for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u. %s: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, + llr->llr_lov_idx, pla->la_uid, pla->la_gid, + create ? "Create the lost OST-object as required" : + "Keep the MDT-object there by default", rc); return rc; } @@ -2928,15 +2989,11 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair unmatched MDT-OST pair for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid); - - rc = lfsck_layout_lock(env, com, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); + rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) @@ -2966,7 +3023,7 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, GOTO(stop, rc); dt_write_lock(env, parent, 0); - if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 1); rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle, @@ -2991,7 +3048,15 @@ stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); unlock1: - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); + +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired unmatched " + "MDT-OST pair for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, + pla->la_uid, pla->la_gid, rc); return rc; } @@ -3019,19 +3084,16 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, struct lov_mds_md_v1 *lmm; struct lov_ost_data_v1 *objs; struct lustre_handle lh = { 0 }; + struct lu_buf ea_buf; __u32 magic; int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair multiple references for: parent "DFID - ", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), llr->llr_ost_idx, - llr->llr_lov_idx, la->la_uid, la->la_gid); - - rc = lfsck_layout_lock(env, com, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR); + rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); if (rc != 0) - RETURN(rc); + GOTO(log, rc); handle = dt_trans_create(env, pdev); if (IS_ERR(handle)) @@ -3065,7 +3127,7 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, GOTO(stop, rc); dt_write_lock(env, parent, 0); - if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 0); rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); @@ -3098,7 +3160,10 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, ostid_cpu_to_le(oi, &objs[llr->llr_lov_idx].l_ost_oi); objs[llr->llr_lov_idx].l_ost_gen = cpu_to_le32(0); objs[llr->llr_lov_idx].l_ost_idx = cpu_to_le32(llr->llr_ost_idx); - rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV, + lfsck_buf_init(&ea_buf, lmm, + lov_mds_md_size(le16_to_cpu(lmm->lmm_stripe_count), + magic)); + rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, LU_XATTR_REPLACE, handle, BYPASS_CAPA); GOTO(unlock2, rc = (rc == 0 ? 1 : rc)); @@ -3113,7 +3178,14 @@ stop: dt_trans_stop(env, pdev, handle); unlock1: - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); + +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired multiple " + "references for: parent "DFID", OST-index %u, stripe-index %u, " + "owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid, rc); return rc; } @@ -3136,14 +3208,9 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, int rc; ENTRY; - CDEBUG(D_LFSCK, "Repair inconsistent file owner for: parent "DFID - ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n", - PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), - llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid); - handle = dt_trans_create(env, dev); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(log, rc = PTR_ERR(handle)); tla->la_uid = pla->la_uid; tla->la_gid = pla->la_gid; @@ -3158,19 +3225,13 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, /* Use the dt_object lock to serialize with destroy and attr_set. */ dt_read_lock(env, parent, 0); - if (unlikely(lu_object_is_dying(parent->do_lu.lo_header))) + if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock, rc = 1); /* Get the latest parent's owner. */ rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); - if (rc != 0) { - CWARN("%s: fail to get the latest parent's ("DFID") owner, " - "not sure whether some others chown/chgrp during the " - "LFSCK: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), - PFID(lfsck_dto2fid(parent)), rc); - + if (rc != 0) GOTO(unlock, rc); - } /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ if (unlikely(tla->la_uid != pla->la_uid || @@ -3188,6 +3249,14 @@ unlock: stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired inconsistent " + "file owner for: parent "DFID", child "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, + pla->la_uid, pla->la_gid, rc); + return rc; } @@ -3238,21 +3307,23 @@ static int lfsck_layout_check_parent(const struct lu_env *env, } tobj = lfsck_object_find(env, com->lc_lfsck, pfid); - if (tobj == NULL) - RETURN(LLIT_UNMATCHED_PAIR); - if (IS_ERR(tobj)) RETURN(PTR_ERR(tobj)); - if (!dt_object_exists(tobj)) + dt_read_lock(env, tobj, 0); + if (dt_object_exists(tobj) == 0 || + lfsck_is_dead_obj(tobj)) + GOTO(out, rc = LLIT_UNMATCHED_PAIR); + + if (!S_ISREG(lfsck_object_type(tobj))) GOTO(out, rc = LLIT_UNMATCHED_PAIR); /* Load the tobj's layout EA, in spite of it is a local MDT-object or * remote one on another MDT. Then check whether the given OST-object * is in such layout. If yes, it is multiple referenced, otherwise it * is unmatched referenced case. */ - rc = lfsck_layout_get_lovea(env, tobj, buf, NULL); - if (rc == 0) + rc = lfsck_layout_get_lovea(env, tobj, buf); + if (rc == 0 || rc == -ENOENT) GOTO(out, rc = LLIT_UNMATCHED_PAIR); if (rc < 0) @@ -3271,12 +3342,23 @@ static int lfsck_layout_check_parent(const struct lu_env *env, for (i = 0; i < count; i++, objs++) { struct lu_fid *tfid = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; + __u32 idx2; if (lovea_slot_is_dummy(objs)) continue; ostid_le_to_cpu(&objs->l_ost_oi, oi); - ostid_to_fid(tfid, oi, le32_to_cpu(objs->l_ost_idx)); + idx2 = le32_to_cpu(objs->l_ost_idx); + rc = ostid_to_fid(tfid, oi, idx2); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: the parent "DFID" contains " + "invalid layout EA at the slot %d, index %u\n", + lfsck_lfsck2name(com->lc_lfsck), + PFID(pfid), i, idx2); + + GOTO(out, rc = LLIT_UNMATCHED_PAIR); + } + if (lu_fid_eq(cfid, tfid)) { *lov_ea = *buf; @@ -3287,6 +3369,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env, GOTO(out, rc = LLIT_UNMATCHED_PAIR); out: + dt_read_unlock(env, tobj); lfsck_object_put(env, tobj); return rc; @@ -3300,7 +3383,7 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid_old *pea = &info->lti_old_pfid; struct lu_fid *pfid = &info->lti_fid; - struct lu_buf *buf = NULL; + struct lu_buf buf = { 0 }; struct dt_object *parent = llr->llr_parent->llo_obj; struct dt_object *child = llr->llr_child; struct lu_attr *pla = &info->lti_la; @@ -3312,17 +3395,16 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, int rc; ENTRY; - rc = dt_attr_get(env, parent, pla, BYPASS_CAPA); - if (rc != 0) { - if (lu_object_is_dying(parent->do_lu.lo_header)) - RETURN(0); + if (unlikely(lfsck_is_dead_obj(parent))) + RETURN(0); + rc = dt_attr_get(env, parent, pla, BYPASS_CAPA); + if (rc != 0) GOTO(out, rc); - } rc = dt_attr_get(env, child, cla, BYPASS_CAPA); if (rc == -ENOENT) { - if (lu_object_is_dying(parent->do_lu.lo_header)) + if (unlikely(lfsck_is_dead_obj(parent))) RETURN(0); type = LLIT_DANGLING; @@ -3332,8 +3414,8 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - buf = lfsck_buf_get(env, pea, sizeof(struct filter_fid_old)); - rc= dt_xattr_get(env, child, buf, XATTR_NAME_FID, BYPASS_CAPA); + lfsck_buf_init(&buf, pea, sizeof(struct filter_fid_old)); + rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID, BYPASS_CAPA); if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) && rc != sizeof(struct filter_fid))) { type = LLIT_UNMATCHED_PAIR; @@ -3356,7 +3438,7 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env, rc = lfsck_layout_check_parent(env, com, parent, pfid, lu_object_fid(&child->do_lu), - pla, cla, llr, buf, idx); + pla, cla, llr, &buf, idx); if (rc > 0) { type = rc; goto repair; @@ -3388,7 +3470,7 @@ repair: break; case LLIT_MULTIPLE_REFERENCED: rc = lfsck_layout_repair_multiple_references(env, com, llr, - pla, buf); + pla, &buf); break; case LLIT_INCONSISTENT_OWNER: rc = lfsck_layout_repair_owner(env, com, llr, pla); @@ -3412,7 +3494,8 @@ out: rc == -EHOSTUNREACH) { /* If cannot touch the target server, * mark the LFSCK as INCOMPLETE. */ - CERROR("%s: Fail to talk with OST %x: rc = %d.\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant fail to " + "talk with OST %x: rc = %d\n", lfsck_lfsck2name(lfsck), llr->llr_ost_idx, rc); lo->ll_flags |= LF_INCOMPLETE; lo->ll_objs_skipped++; @@ -3469,7 +3552,8 @@ static int lfsck_layout_assistant(void *args) rc = lfsck_layout_master_notify_others(env, com, lr); if (rc != 0) { - CERROR("%s: fail to notify others for layout start: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to notify " + "others for LFSCK start: rc = %d\n", lfsck_lfsck2name(lfsck), rc); GOTO(fini, rc); } @@ -3538,8 +3622,9 @@ static int lfsck_layout_assistant(void *args) lr->lr_status = llmd->llmd_post_result; rc = lfsck_layout_master_notify_others(env, com, lr); if (rc != 0) - CERROR("%s: failed to notify others " - "for layout post: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant " + "failed to notify others for LFSCK " + "post: rc = %d\n", lfsck_lfsck2name(lfsck), rc); /* Wakeup the master engine to go ahead. */ @@ -3552,6 +3637,9 @@ static int lfsck_layout_assistant(void *args) llmd->llmd_in_double_scan = 1; wake_up_all(&mthread->t_ctl_waitq); + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant phase2 " + "scan start\n", lfsck_lfsck2name(lfsck)); + com->lc_new_checked = 0; com->lc_new_scanned = 0; com->lc_time_last_checkpoint = cfs_time_current(); @@ -3559,6 +3647,9 @@ static int lfsck_layout_assistant(void *args) com->lc_time_last_checkpoint + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + /* flush all async updating before handling orphan. */ + dt_sync(env, lfsck->li_next); + while (llmd->llmd_in_double_scan) { struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs; @@ -3670,7 +3761,7 @@ cleanup2: lr->lr_status = LS_CO_STOPPED; break; default: - CERROR("%s: unknown status: rc = %d\n", + CDEBUG(D_LFSCK, "%s: unknown status: rc = %d\n", lfsck_lfsck2name(lfsck), lfsck->li_status); lr->lr_status = LS_CO_FAILED; @@ -3689,16 +3780,28 @@ cleanup2: rc1 = lfsck_layout_master_notify_others(env, com, lr); if (rc1 != 0) { - CERROR("%s: failed to notify others for layout quit: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to " + "notify others for LFSCK quit: rc = %d\n", lfsck_lfsck2name(lfsck), rc1); rc = rc1; } + /* flush all async updating before exit. */ + dt_sync(env, lfsck->li_next); + /* Under force exit case, some requests may be just freed without * verification, those objects should be re-handled when next run. * So not update the on-disk tracing file under such case. */ - if (llmd->llmd_in_double_scan && !llmd->llmd_exit) - rc1 = lfsck_layout_double_scan_result(env, com, rc); + if (llmd->llmd_in_double_scan) { + struct lfsck_layout *lo = com->lc_file_ram; + + if (!llmd->llmd_exit) + rc1 = lfsck_layout_double_scan_result(env, com, rc); + + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant phase2 scan " + "finished, status %d: rc = %d\n", + lfsck_lfsck2name(lfsck), lo->ll_status, rc1); + } fini: if (llmd->llmd_in_double_scan) @@ -3724,6 +3827,7 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, struct lfsck_component *com = llsaa->llsaa_com; struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst; struct lfsck_layout_slave_data *llsd = com->lc_data; + struct lfsck_reply *lr = NULL; bool done = false; if (rc != 0) { @@ -3732,15 +3836,20 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, * the target finished the LFSCK prcoessing. */ done = true; } else { - struct lfsck_reply *lr; - lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY); if (lr->lr_status != LS_SCANNING_PHASE1 && lr->lr_status != LS_SCANNING_PHASE2) done = true; } - if (done) + + if (done) { + CDEBUG(D_LFSCK, "%s: layout LFSCK slave gets the MDT %x " + "status %d\n", lfsck_lfsck2name(com->lc_lfsck), + llst->llst_index, lr != NULL ? lr->lr_status : rc); + lfsck_layout_llst_del(llsd, llst); + } + lfsck_layout_llst_put(llst); lfsck_component_put(env, com); class_export_put(exp); @@ -3829,12 +3938,12 @@ lfsck_layout_slave_query_master(const struct lu_env *env, set = ptlrpc_prep_set(); if (set == NULL) - RETURN(-ENOMEM); + GOTO(log, rc = -ENOMEM); memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); @@ -3846,9 +3955,8 @@ lfsck_layout_slave_query_master(const struct lu_env *env, break; llst->llst_gen = llsd->llsd_touch_gen; - list_del(&llst->llst_list); - list_add_tail(&llst->llst_list, - &llsd->llsd_master_list); + list_move_tail(&llst->llst_list, + &llsd->llsd_master_list); atomic_inc(&llst->llst_ref); spin_unlock(&llsd->llsd_lock); @@ -3863,9 +3971,11 @@ lfsck_layout_slave_query_master(const struct lu_env *env, rc = lfsck_layout_async_query(env, com, exp, llst, lr, set); if (rc != 0) { - CERROR("%s: slave fail to query %s for layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK slave fail to " + "query %s for layout: rc = %d\n", + lfsck_lfsck2name(lfsck), exp->exp_obd->obd_name, rc); + rc1 = rc; lfsck_layout_llst_put(llst); class_export_put(exp); @@ -3877,7 +3987,13 @@ lfsck_layout_slave_query_master(const struct lu_env *env, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); - RETURN(rc1 != 0 ? rc1 : rc); + GOTO(log, rc = (rc1 != 0 ? rc1 : rc)); + +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave queries master: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); + + return rc; } static void @@ -3894,6 +4010,9 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, int rc; ENTRY; + CDEBUG(D_LFSCK, "%s: layout LFSCK slave notifies master\n", + lfsck_lfsck2name(com->lc_lfsck)); + set = ptlrpc_prep_set(); if (set == NULL) RETURN_EXIT; @@ -3903,7 +4022,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, lr->lr_flags = LEF_FROM_OST; lr->lr_status = result; lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { @@ -3914,9 +4033,8 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, break; llst->llst_gen = llsd->llsd_touch_gen; - list_del(&llst->llst_list); - list_add_tail(&llst->llst_list, - &llsd->llsd_master_list); + list_move_tail(&llst->llst_list, + &llsd->llsd_master_list); atomic_inc(&llst->llst_ref); spin_unlock(&llsd->llsd_lock); @@ -3931,9 +4049,11 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, rc = lfsck_layout_async_notify(env, exp, lr, set); if (rc != 0) - CERROR("%s: slave fail to notify %s for layout: " - "rc = %d\n", lfsck_lfsck2name(lfsck), + CDEBUG(D_LFSCK, "%s: layout LFSCK slave fail to " + "notify %s for layout: rc = %d\n", + lfsck_lfsck2name(lfsck), exp->exp_obd->obd_name, rc); + lfsck_layout_llst_put(llst); class_export_put(exp); spin_lock(&llsd->llsd_lock); @@ -3975,10 +4095,14 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, RETURN(PTR_ERR(obj)); dt_read_lock(env, obj, 0); - if (unlikely(!dt_object_exists(obj))) + if (unlikely(dt_object_exists(obj) == 0 || + lfsck_is_dead_obj(obj))) GOTO(unlock, rc = -ENOENT); - rc = lfsck_layout_get_lovea(env, obj, buf, NULL); + if (!S_ISREG(lfsck_object_type(obj))) + GOTO(unlock, rc = -ENODATA); + + rc = lfsck_layout_get_lovea(env, obj, buf); if (rc < 0) GOTO(unlock, rc); @@ -4080,7 +4204,7 @@ static int lfsck_layout_slave_check_pairs(const struct lu_env *env, lr = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_PAIRS_VERIFY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *cfid; /* OST-object itself FID. */ lr->lr_fid2 = *pfid; /* The claimed parent FID. */ @@ -4115,12 +4239,13 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env, obj = lfsck_object_find_by_dev(env, dev, &lr->lr_fid); if (IS_ERR(obj)) - RETURN(PTR_ERR(obj)); + GOTO(log, rc = PTR_ERR(obj)); fid_cpu_to_le(&ff->ff_parent, &lr->lr_fid2); buf = lfsck_buf_get(env, ff, sizeof(*ff)); dt_write_lock(env, obj, 0); - if (unlikely(!dt_object_exists(obj))) + if (unlikely(dt_object_exists(obj) == 0 || + lfsck_is_dead_obj(obj))) GOTO(unlock, rc = 0); th = dt_trans_create(env, dev); @@ -4146,6 +4271,11 @@ unlock: dt_write_unlock(env, obj); lu_object_put(env, &obj->do_lu); +log: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave repaired pfid for "DFID + ", parent "DFID": rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + PFID(&lr->lr_fid), PFID(&lr->lr_fid2), rc); + return rc; } @@ -4175,6 +4305,9 @@ static int lfsck_layout_reset(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK reset: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), rc); + return rc; } @@ -4229,6 +4362,10 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos [" + LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, rc); + return rc; } @@ -4243,7 +4380,6 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, return 0; down_write(&com->lc_sem); - if (init) { lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie; } else { @@ -4257,9 +4393,12 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, } rc = lfsck_layout_store(env, com); - up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos [" + LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, rc); + return rc; } @@ -4283,8 +4422,12 @@ static int lfsck_layout_prep(const struct lu_env *env, if (rc == 0) rc = lfsck_set_param(env, lfsck, start, true); - if (rc != 0) + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK prep failed: " + "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + return rc; + } } down_write(&com->lc_sem); @@ -4294,9 +4437,8 @@ static int lfsck_layout_prep(const struct lu_env *env, if (!lfsck->li_drop_dryrun || lo->ll_pos_first_inconsistent == 0) { lo->ll_status = LS_SCANNING_PHASE2; - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, - &lfsck->li_list_double_scan); + list_move_tail(&com->lc_link, + &lfsck->li_list_double_scan); pos->lp_oit_cookie = 0; } else { int i; @@ -4362,6 +4504,10 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, write_unlock(&llsd->llsd_rb_lock); } + CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos [" + LPU64"]\n", lfsck_lfsck2name(lfsck), + com->lc_pos_start.lp_oit_cookie); + return rc; } @@ -4397,7 +4543,7 @@ static int lfsck_layout_master_prep(const struct lu_env *env, task = kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"); if (IS_ERR(task)) { rc = PTR_ERR(task); - CERROR("%s: Cannot start LFSCK layout assistant thread: " + CERROR("%s: cannot start LFSCK layout assistant thread: " "rc = %d\n", lfsck_lfsck2name(lfsck), rc); lfsck_thread_args_fini(lta); } else { @@ -4413,6 +4559,10 @@ static int lfsck_layout_master_prep(const struct lu_env *env, rc = 0; } + CDEBUG(D_LFSCK, "%s: layout LFSCK master prep done, start pos [" + LPU64"\n", lfsck_lfsck2name(lfsck), + com->lc_pos_start.lp_oit_cookie); + RETURN(rc); } @@ -4433,7 +4583,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, struct ptlrpc_thread *mthread = &lfsck->li_thread; struct ptlrpc_thread *athread = &llmd->llmd_thread; struct l_wait_info lwi = { 0 }; - struct lu_buf *buf; + struct lu_buf buf; int rc = 0; int i; __u32 magic; @@ -4441,8 +4591,8 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, __u16 gen; ENTRY; - buf = lfsck_buf_get(env, &info->lti_old_pfid, - sizeof(struct filter_fid_old)); + lfsck_buf_init(&buf, &info->lti_old_pfid, + sizeof(struct filter_fid_old)); count = le16_to_cpu(lmm->lmm_stripe_count); gen = le16_to_cpu(lmm->lmm_layout_gen); /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has @@ -4463,8 +4613,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, struct lfsck_layout_req *llr; struct lfsck_tgt_desc *tgt = NULL; struct dt_object *cobj = NULL; - __u32 index = - le32_to_cpu(objs->l_ost_idx); + __u32 index; bool wakeup = false; if (unlikely(lovea_slot_is_dummy(objs))) @@ -4481,19 +4630,66 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, thread_is_stopped(athread)) GOTO(out, rc = 0); + if (unlikely(lfsck_is_dead_obj(parent))) + GOTO(out, rc = 0); + ostid_le_to_cpu(&objs->l_ost_oi, oi); - ostid_to_fid(fid, oi, index); + index = le32_to_cpu(objs->l_ost_idx); + rc = ostid_to_fid(fid, oi, index); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: get invalid layout EA for "DFID + ": "DOSTID", idx:%u\n", lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(parent)), POSTID(oi), index); + goto next; + } + tgt = lfsck_tgt_get(ltds, index); if (unlikely(tgt == NULL)) { - CERROR("%s: Cannot talk with OST %x which did not join " - "the layout LFSCK.\n", + CDEBUG(D_LFSCK, "%s: cannot talk with OST %x which " + "did not join the layout LFSCK\n", lfsck_lfsck2name(lfsck), index); lo->ll_flags |= LF_INCOMPLETE; goto next; } - cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid); + /* There is potential deadlock race condition between object + * destroy and layout LFSCK. Consider the following scenario: + * + * 1) The LFSCK thread obtained the parent object firstly, at + * that time, the parent object has not been destroyed yet. + * + * 2) One RPC service thread destroyed the parent and all its + * children objects. Because the LFSCK is referencing the + * parent object, then the parent object will be marked as + * dying in RAM. On the other hand, the parent object is + * referencing all its children objects, then all children + * objects will be marked as dying in RAM also. + * + * 3) The LFSCK thread tries to find some child object with + * the parent object referenced. Then it will find that the + * child object is dying. According to the object visibility + * rules: the object with dying flag cannot be returned to + * others. So the LFSCK thread has to wait until the dying + * object has been purged from RAM, then it can allocate a + * new object (with the same FID) in RAM. Unfortunately, the + * LFSCK thread itself is referencing the parent object, and + * cause the parent object cannot be purged, then cause the + * child object cannot be purged also. So the LFSCK thread + * will fall into deadlock. + * + * We introduce non-blocked version lu_object_find() to allow + * the LFSCK thread to return failure immediately (instead of + * wait) when it finds dying (child) object, then the LFSCK + * thread can check whether the parent object is dying or not. + * So avoid above deadlock. LU-5395 */ + cobj = lfsck_object_find_by_dev_nowait(env, tgt->ltd_tgt, fid); if (IS_ERR(cobj)) { + if (lfsck_is_dead_obj(parent)) { + lfsck_tgt_put(tgt); + + GOTO(out, rc = 0); + } + rc = PTR_ERR(cobj); goto next; } @@ -4502,7 +4698,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, if (rc != 0) goto next; - rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID, + rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID, BYPASS_CAPA); if (rc != 0) goto next; @@ -4588,10 +4784,12 @@ static int lfsck_layout_master_exec_oit(const struct lu_env *env, struct lov_mds_md_v1 *lmm = NULL; struct dt_device *dev = lfsck->li_bottom; struct lustre_handle lh = { 0 }; - ssize_t buflen = buf->lb_len; + struct lu_buf ea_buf = { 0 }; int rc = 0; + int size = 0; bool locked = false; bool stripe = false; + bool bad_oi = false; ENTRY; if (!S_ISREG(lfsck_object_type(obj))) @@ -4606,11 +4804,15 @@ static int lfsck_layout_master_exec_oit(const struct lu_env *env, locked = true; again: - rc = lfsck_layout_get_lovea(env, obj, buf, &buflen); + if (dt_object_exists(obj) == 0 || + lfsck_is_dead_obj(obj)) + GOTO(out, rc = 0); + + rc = lfsck_layout_get_lovea(env, obj, buf); if (rc <= 0) GOTO(out, rc); - buf->lb_len = rc; + size = rc; lmm = buf->lb_buf; rc = lfsck_layout_verify_header(lmm); /* If the LOV EA crashed, then it is possible to be rebuilt later @@ -4622,8 +4824,8 @@ again: GOTO(out, stripe = true); /* Inconsistent lmm_oi, should be repaired. */ - CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n", - PFID(lfsck_dto2fid(obj))); + bad_oi = true; + lmm->lmm_oi = *oi; if (bk->lb_param & LPF_DRYRUN) { down_write(&com->lc_sem); @@ -4636,10 +4838,9 @@ again: if (!lustre_handle_is_used(&lh)) { dt_read_unlock(env, obj); locked = false; - buf->lb_len = buflen; - rc = lfsck_layout_lock(env, com, obj, &lh, - MDS_INODELOCK_LAYOUT | - MDS_INODELOCK_XATTR); + rc = lfsck_ibits_lock(env, lfsck, obj, &lh, + MDS_INODELOCK_LAYOUT | + MDS_INODELOCK_XATTR, LCK_EX); if (rc != 0) GOTO(out, rc); @@ -4647,7 +4848,8 @@ again: if (IS_ERR(handle)) GOTO(out, rc = PTR_ERR(handle)); - rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV, + lfsck_buf_init(&ea_buf, lmm, size); + rc = dt_declare_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV, LU_XATTR_REPLACE, handle); if (rc != 0) GOTO(out, rc); @@ -4662,8 +4864,7 @@ again: goto again; } - lmm->lmm_oi = *oi; - rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV, + rc = dt_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV, LU_XATTR_REPLACE, handle, BYPASS_CAPA); if (rc != 0) GOTO(out, rc); @@ -4685,7 +4886,14 @@ out: if (handle != NULL && !IS_ERR(handle)) dt_trans_stop(env, dev, handle); - lfsck_layout_unlock(&lh); + lfsck_ibits_unlock(&lh, LCK_EX); + + if (bad_oi) + CDEBUG(D_LFSCK, "%s: layout LFSCK master %s bad lmm_oi for " + DFID": rc = %d\n", lfsck_lfsck2name(lfsck), + bk->lb_param & LPF_DRYRUN ? "found" : "repaired", + PFID(lfsck_dto2fid(obj)), rc); + if (stripe) { rc = lfsck_layout_scan_stripes(env, com, obj, lmm); } else { @@ -4695,7 +4903,6 @@ out: lfsck_layout_record_failure(env, lfsck, lo); up_write(&com->lc_sem); } - buf->lb_len = buflen; return rc; } @@ -4749,6 +4956,9 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, lls->lls_seq = seq; rc = lfsck_layout_lastid_load(env, com, lls); if (rc != 0) { + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " + "load LAST_ID for "LPX64": rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), seq, rc); lo->ll_objs_failed_phase1++; OBD_FREE_PTR(lls); GOTO(unlock, rc); @@ -4760,7 +4970,11 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, if (unlikely(fid_is_last_id(fid))) GOTO(unlock, rc = 0); - oid = fid_oid(fid); + if (fid_is_idif(fid)) + oid = fid_idif_id(fid_seq(fid), fid_oid(fid), fid_ver(fid)); + else + oid = fid_oid(fid); + if (oid > lls->lls_lastid_known) lls->lls_lastid_known = oid; @@ -4768,12 +4982,17 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, if (!(lo->ll_flags & LF_CRASHED_LASTID)) { /* OFD may create new objects during LFSCK scanning. */ rc = lfsck_layout_lastid_reload(env, com, lls); - if (unlikely(rc != 0)) - CWARN("%s: failed to reload LAST_ID for "LPX64 - ": rc = %d\n", + if (unlikely(rc != 0)) { + CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " + "reload LAST_ID for "LPX64": rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc); - if (oid <= lls->lls_lastid) + + GOTO(unlock, rc); + } + + if (oid <= lls->lls_lastid || + lo->ll_flags & LF_CRASHED_LASTID) GOTO(unlock, rc = 0); LASSERT(lfsck->li_out_notify != NULL); @@ -4781,6 +5000,12 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, lfsck->li_out_notify(env, lfsck->li_out_notify_data, LE_LASTID_REBUILDING); lo->ll_flags |= LF_CRASHED_LASTID; + + CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " + "LAST_ID file (2) for the sequence "LPX64 + ", old value "LPU64", known value "LPU64"\n", + lfsck_lfsck2name(lfsck), lls->lls_seq, + lls->lls_lastid, oid); } lls->lls_lastid = oid; @@ -4844,20 +5069,17 @@ static int lfsck_layout_master_post(const struct lu_env *env, lo->ll_status = LS_SCANNING_PHASE2; lo->ll_flags |= LF_SCANNED_ONCE; lo->ll_flags &= ~LF_UPGRADE; - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); + list_move_tail(&com->lc_link, &lfsck->li_list_double_scan); } else if (result == 0) { lo->ll_status = lfsck->li_status; if (lo->ll_status == 0) lo->ll_status = LS_STOPPED; if (lo->ll_status != LS_PAUSED) { - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_idle); + list_move_tail(&com->lc_link, &lfsck->li_list_idle); } } else { lo->ll_status = LS_FAILED; - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_idle); + list_move_tail(&com->lc_link, &lfsck->li_list_idle); } spin_unlock(&lfsck->li_lock); @@ -4872,6 +5094,9 @@ static int lfsck_layout_master_post(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK master post done: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + RETURN(rc); } @@ -4891,7 +5116,6 @@ static int lfsck_layout_slave_post(const struct lu_env *env, LASSERT(lfsck->li_out_notify != NULL); down_write(&com->lc_sem); - spin_lock(&lfsck->li_lock); if (!init) lo->ll_pos_last_checkpoint = @@ -4902,22 +5126,22 @@ static int lfsck_layout_slave_post(const struct lu_env *env, if (lo->ll_flags & LF_CRASHED_LASTID) { done = true; lo->ll_flags &= ~LF_CRASHED_LASTID; + + CDEBUG(D_LFSCK, "%s: layout LFSCK has rebuilt " + "crashed LAST_ID files successfully\n", + lfsck_lfsck2name(lfsck)); } lo->ll_flags &= ~LF_UPGRADE; - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); + list_move_tail(&com->lc_link, &lfsck->li_list_double_scan); } else if (result == 0) { lo->ll_status = lfsck->li_status; if (lo->ll_status == 0) lo->ll_status = LS_STOPPED; - if (lo->ll_status != LS_PAUSED) { - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_idle); - } + if (lo->ll_status != LS_PAUSED) + list_move_tail(&com->lc_link, &lfsck->li_list_idle); } else { lo->ll_status = LS_FAILED; - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_idle); + list_move_tail(&com->lc_link, &lfsck->li_list_idle); } spin_unlock(&lfsck->li_lock); @@ -4934,7 +5158,6 @@ static int lfsck_layout_slave_post(const struct lu_env *env, } rc = lfsck_layout_store(env, com); - up_write(&com->lc_sem); lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result); @@ -4942,73 +5165,60 @@ static int lfsck_layout_slave_post(const struct lu_env *env, if (result <= 0) lfsck_rbtree_cleanup(env, com); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave post done: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + return rc; } static int lfsck_layout_dump(const struct lu_env *env, - struct lfsck_component *com, char *buf, int len) + struct lfsck_component *com, struct seq_file *m) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; - int save = len; - int ret = -ENOSPC; int rc; down_read(&com->lc_sem); - rc = snprintf(buf, len, - "name: lfsck_layout\n" + seq_printf(m, "name: lfsck_layout\n" "magic: %#x\n" "version: %d\n" "status: %s\n", lo->ll_magic, bk->lb_version, lfsck_status2names(lo->ll_status)); - if (rc <= 0) - goto out; - buf += rc; - len -= rc; - rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names, - "flags"); + rc = lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); if (rc < 0) goto out; - rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names, - "param"); + rc = lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete, + rc = lfsck_time_dump(m, lo->ll_time_last_complete, "time_since_last_completed"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start, + rc = lfsck_time_dump(m, lo->ll_time_latest_start, "time_since_latest_start"); if (rc < 0) goto out; - rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint, + rc = lfsck_time_dump(m, lo->ll_time_last_checkpoint, "time_since_last_checkpoint"); if (rc < 0) goto out; - rc = snprintf(buf, len, - "latest_start_position: "LPU64"\n" + seq_printf(m, "latest_start_position: "LPU64"\n" "last_checkpoint_position: "LPU64"\n" "first_failure_position: "LPU64"\n", lo->ll_pos_latest_start, lo->ll_pos_last_checkpoint, lo->ll_pos_first_inconsistent); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; - rc = snprintf(buf, len, - "success_count: %u\n" + seq_printf(m, "success_count: %u\n" "repaired_dangling: "LPU64"\n" "repaired_unmatched_pair: "LPU64"\n" "repaired_multiple_referenced: "LPU64"\n" @@ -5028,11 +5238,6 @@ static int lfsck_layout_dump(const struct lu_env *env, lo->ll_objs_skipped, lo->ll_objs_failed_phase1, lo->ll_objs_failed_phase2); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; if (lo->ll_status == LS_SCANNING_PHASE1) { __u64 pos; @@ -5050,8 +5255,7 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(new_checked, duration); if (rtime != 0) do_div(speed, rtime); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" + seq_printf(m, "checked_phase1: "LPU64"\n" "checked_phase2: "LPU64"\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" @@ -5065,11 +5269,6 @@ static int lfsck_layout_dump(const struct lu_env *env, lo->ll_run_time_phase2, speed, new_checked); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; LASSERT(lfsck->li_di_oit != NULL); @@ -5082,12 +5281,8 @@ static int lfsck_layout_dump(const struct lu_env *env, pos = iops->store(env, lfsck->li_di_oit); if (!lfsck->li_current_oit_processed) pos--; - rc = snprintf(buf, len, "current_position: "LPU64"\n", pos); - if (rc <= 0) - goto out; + seq_printf(m, "current_position: "LPU64"\n", pos); - buf += rc; - len -= rc; } else if (lo->ll_status == LS_SCANNING_PHASE2) { cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; @@ -5105,29 +5300,26 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (rtime != 0) do_div(speed2, rtime); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" items/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: "LPU64" items/sec\n" - "current_position: "DFID"\n", - lo->ll_objs_checked_phase1, - checked, - lo->ll_run_time_phase1, - rtime, - speed1, - speed2, - new_checked, - PFID(&com->lc_fid_latest_scanned_phase2)); + rc = seq_printf(m, "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" items/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: "LPU64" items/sec\n" + "current_position: "DFID"\n", + lo->ll_objs_checked_phase1, + checked, + lo->ll_run_time_phase1, + rtime, + speed1, + speed2, + new_checked, + PFID(&com->lc_fid_latest_scanned_phase2)); if (rc <= 0) goto out; - buf += rc; - len -= rc; } else { __u64 speed1 = lo->ll_objs_checked_phase1; __u64 speed2 = lo->ll_objs_checked_phase2; @@ -5136,34 +5328,26 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (lo->ll_run_time_phase2 != 0) do_div(speed2, lo->ll_run_time_phase2); - rc = snprintf(buf, len, - "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: N/A\n" - "current_position: N/A\n", - lo->ll_objs_checked_phase1, - lo->ll_objs_checked_phase2, - lo->ll_run_time_phase1, - lo->ll_run_time_phase2, - speed1, - speed2); - if (rc <= 0) - goto out; - - buf += rc; - len -= rc; + seq_printf(m, "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" objs/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: N/A\n" + "current_position: N/A\n", + lo->ll_objs_checked_phase1, + lo->ll_objs_checked_phase2, + lo->ll_run_time_phase1, + lo->ll_run_time_phase2, + speed1, + speed2); } - ret = save - len; - out: up_read(&com->lc_sem); - return ret; + return rc; } static int lfsck_layout_master_double_scan(const struct lu_env *env, @@ -5206,6 +5390,9 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, RETURN(0); } + CDEBUG(D_LFSCK, "%s: layout LFSCK slave phase2 scan start\n", + lfsck_lfsck2name(lfsck)); + atomic_inc(&lfsck->li_double_scan_count); com->lc_new_checked = 0; @@ -5252,6 +5439,10 @@ done: if (atomic_dec_and_test(&lfsck->li_double_scan_count)) wake_up_all(&lfsck->li_thread.t_ctl_waitq); + CDEBUG(D_LFSCK, "%s: layout LFSCK slave phase2 scan finished, " + "status %d: rc = %d\n", + lfsck_lfsck2name(lfsck), lo->ll_status, rc); + return rc; } @@ -5285,6 +5476,10 @@ static void lfsck_layout_master_data_release(const struct lu_env *env, ltd_layout_list) { list_del_init(<d->ltd_layout_list); } + spin_unlock(<ds->ltd_lock); + + ltds = &lfsck->li_mdt_descs; + spin_lock(<ds->ltd_lock); list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list, ltd_layout_phase_list) { list_del_init(<d->ltd_layout_phase_list); @@ -5374,6 +5569,11 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, RETURN(rc); } + CDEBUG(D_LFSCK, "%s: layout LFSCK master handle notify %u " + "from %s %x, status %d\n", lfsck_lfsck2name(lfsck), + lr->lr_event, (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + lr->lr_index, lr->lr_status); + if (lr->lr_event != LE_PHASE1_DONE && lr->lr_event != LE_PHASE2_DONE && lr->lr_event != LE_PEER_EXIT) @@ -5397,10 +5597,6 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, if (lr->lr_status <= 0) { ltd->ltd_layout_done = 1; list_del_init(<d->ltd_layout_list); - CWARN("%s: %s %x failed/stopped at phase1: rc = %d.\n", - lfsck_lfsck2name(lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index, lr->lr_status); lo->ll_flags |= LF_INCOMPLETE; fail = true; break; @@ -5428,13 +5624,8 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, fail = true; ltd->ltd_layout_done = 1; list_del_init(<d->ltd_layout_list); - if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) { - CWARN("%s: the peer %s %x exit layout LFSCK.\n", - lfsck_lfsck2name(lfsck), - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", - ltd->ltd_index); + if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) lo->ll_flags |= LF_INCOMPLETE; - } break; default: break; @@ -5507,6 +5698,9 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env, } case LE_PHASE2_DONE: case LE_PEER_EXIT: + CDEBUG(D_LFSCK, "%s: layout LFSCK slave handle notify %u " + "from MDT %x, status %d\n", lfsck_lfsck2name(lfsck), + lr->lr_event, lr->lr_index, lr->lr_status); break; default: RETURN(-EINVAL); @@ -5568,7 +5762,7 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_PEER_EXIT; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_status = LS_CO_PAUSED; if (ltds == &lfsck->li_ost_descs) lr->lr_flags = LEF_TO_OST; @@ -5584,7 +5778,8 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env, lfsck_layout_master_async_interpret, laia, LFSCK_NOTIFY); if (rc != 0) { - CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n", + CDEBUG(D_LFSCK, "%s: layout LFSCK fail to notify %s %x " + "for co-stop: rc = %d\n", lfsck_lfsck2name(lfsck), (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", ltd->ltd_index, rc); @@ -5606,10 +5801,17 @@ static int lfsck_layout_slave_join(const struct lu_env *env, int rc = 0; ENTRY; - if (!lsp->lsp_index_valid || start == NULL || - !(start->ls_flags & LPF_ALL_TGT) || - !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) - RETURN(-EALREADY); + if (start == NULL || !(start->ls_flags & LPF_ORPHAN)) + RETURN(0); + + if (!lsp->lsp_index_valid) + RETURN(-EINVAL); + + /* If someone is running the LFSCK without orphan handling, + * it will not maintain the object accessing rbtree. So we + * cannot join it for orphan handling. */ + if (!llsd->llsd_rbtree_valid) + RETURN(-EBUSY); spin_unlock(&lfsck->li_lock); rc = lfsck_layout_llst_add(llsd, lsp->lsp_index); @@ -5680,7 +5882,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) init_rwsem(&com->lc_sem); atomic_set(&com->lc_ref, 1); com->lc_lfsck = lfsck; - com->lc_type = LT_LAYOUT; + com->lc_type = LFSCK_TYPE_LAYOUT; if (lfsck->li_master) { struct lfsck_layout_master_data *llmd; @@ -5758,7 +5960,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) spin_unlock(&lfsck->li_lock); break; default: - CERROR("%s: unknown lfsck_layout status: rc = %u\n", + CERROR("%s: unknown lfsck_layout status %d\n", lfsck_lfsck2name(lfsck), lo->ll_status); /* fall through */ case LS_SCANNING_PHASE1: @@ -5793,8 +5995,11 @@ out: if (root != NULL && !IS_ERR(root)) lu_object_put(env, &root->do_lu); - if (rc != 0) + if (rc != 0) { lfsck_component_cleanup(env, com); + CERROR("%s: fail to init layout LFSCK component: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + } return rc; } @@ -5882,6 +6087,9 @@ static void lfsck_layout_destroy_orphan(const struct lu_env *env, stop: dt_trans_stop(env, dev, handle); + CDEBUG(D_LFSCK, "destroy orphan OST-object "DFID": rc = %d\n", + PFID(lfsck_dto2fid(obj)), rc); + RETURN_EXIT; } @@ -5948,7 +6156,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, if (unlikely(lfsck == NULL)) RETURN(ERR_PTR(-ENXIO)); - com = lfsck_component_find(lfsck, LT_LAYOUT); + com = lfsck_component_find(lfsck, LFSCK_TYPE_LAYOUT); if (unlikely(com == NULL)) GOTO(out, rc = -ENOENT); @@ -6002,6 +6210,10 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, out: if (com != NULL) lfsck_component_put(env, com); + + CDEBUG(D_LFSCK, "%s: init the orphan iteration: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + lfsck_instance_put(env, lfsck); if (rc != 0) { if (it != NULL) @@ -6022,6 +6234,9 @@ static void lfsck_orphan_it_fini(const struct lu_env *env, struct lfsck_layout_slave_target *llst; if (com != NULL) { + CDEBUG(D_LFSCK, "%s: fini the orphan iteration\n", + lfsck_lfsck2name(com->lc_lfsck)); + llsd = com->lc_data; read_unlock(&llsd->llsd_rb_lock); llst = it->loi_llst; @@ -6151,7 +6366,8 @@ again1: } dt_read_lock(env, obj, 0); - if (!dt_object_exists(obj)) { + if (dt_object_exists(obj) == 0 || + lfsck_is_dead_obj(obj)) { dt_read_unlock(env, obj); lfsck_object_put(env, obj); pos++; @@ -6322,10 +6538,11 @@ static int lfsck_orphan_it_load(const struct lu_env *env, LASSERT(llst != NULL); if (hash != llst->llst_hash) { - CWARN("%s: the given hash "LPU64" for orphan iteration does " - "not match the one when fini "LPU64", to be reset.\n", - lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, - llst->llst_hash); + CDEBUG(D_LFSCK, "%s: the given hash "LPU64" for orphan " + "iteration does not match the one when fini " + LPU64", to be reset.\n", + lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, + llst->llst_hash); fid_zero(&llst->llst_fid); llst->llst_hash = 0; }