X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_engine.c;h=f5b5e7005c8005eed919df3ab804a5d5e2e467b1;hb=445da16c2ac0475b1c1077c822800b68cdbb7ce3;hp=375e64c99e8a57707d8ea957958095ea3c6100cd;hpb=0f4875343e22bcdfe18708806e172aa234da23a6;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index 375e64c..f5b5e70 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2013, 2014, Intel Corporation. + * Copyright (c) 2013, 2015, Intel Corporation. */ /* * lustre/lfsck/lfsck_engine.c @@ -93,64 +93,6 @@ static void lfsck_di_dir_put(const struct lu_env *env, struct lfsck_instance *lf iops->put(env, di); } -static int lfsck_update_lma(const struct lu_env *env, - struct lfsck_instance *lfsck, struct dt_object *obj) -{ - struct lfsck_thread_info *info = lfsck_env_info(env); - struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; - struct dt_device *dev = lfsck_obj2dev(obj); - struct lustre_mdt_attrs *lma = &info->lti_lma; - struct lu_buf *buf; - struct thandle *th; - int fl; - int rc; - ENTRY; - - if (bk->lb_param & LPF_DRYRUN) - RETURN(0); - - buf = lfsck_buf_get(env, info->lti_lma_old, LMA_OLD_SIZE); - rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LMA); - if (rc < 0) { - if (rc != -ENODATA) - RETURN(rc); - - fl = LU_XATTR_CREATE; - lustre_lma_init(lma, lfsck_dto2fid(obj), LMAC_FID_ON_OST, 0); - } else { - if (rc != LMA_OLD_SIZE && rc != sizeof(struct lustre_mdt_attrs)) - RETURN(-EINVAL); - - fl = LU_XATTR_REPLACE; - lustre_lma_swab(lma); - lustre_lma_init(lma, lfsck_dto2fid(obj), - lma->lma_compat | LMAC_FID_ON_OST, - lma->lma_incompat); - } - lustre_lma_swab(lma); - - th = dt_trans_create(env, dev); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); - - buf = lfsck_buf_get(env, lma, sizeof(*lma)); - rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LMA, fl, th); - if (rc != 0) - GOTO(stop, rc); - - rc = dt_trans_start_local(env, dev, th); - if (rc != 0) - GOTO(stop, rc); - - rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LMA, fl, th); - - GOTO(stop, rc); - -stop: - dt_trans_stop(env, dev, th); - return rc; -} - static int lfsck_parent_fid(const struct lu_env *env, struct dt_object *obj, struct lu_fid *fid) { @@ -753,7 +695,7 @@ static int lfsck_master_dir_engine(const struct lu_env *env, if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY2, cfs_fail_val) && unlikely(!thread_is_running(thread))) { CDEBUG(D_LFSCK, "%s: scan dir exit for engine stop, " - "parent "DFID", cookie "LPX64"\n", + "parent "DFID", cookie %#llx\n", lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(dir)), lfsck->li_cookie_dir); @@ -769,7 +711,7 @@ static int lfsck_master_dir_engine(const struct lu_env *env, if (rc != 0) { CDEBUG(D_LFSCK, "%s: scan dir failed at rec(), " - "parent "DFID", cookie "LPX64": rc = %d\n", + "parent "DFID", cookie %#llx: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(dir)), lfsck->li_cookie_dir, rc); @@ -780,8 +722,7 @@ static int lfsck_master_dir_engine(const struct lu_env *env, goto checkpoint; } - if (ent->lde_attrs & LUDA_IGNORE && - strcmp(ent->lde_name, dotdot) != 0) + if (ent->lde_attrs & LUDA_IGNORE) goto checkpoint; /* skip dot entry. */ @@ -816,7 +757,7 @@ checkpoint: lfsck_control_speed(lfsck); if (unlikely(!thread_is_running(thread))) { CDEBUG(D_LFSCK, "%s: scan dir exit for engine stop, " - "parent "DFID", cookie "LPX64"\n", + "parent "DFID", cookie %#llx\n", lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(dir)), lfsck->li_cookie_dir); @@ -831,6 +772,11 @@ checkpoint: } rc = iops->next(env, di); + if (rc < 0) + CDEBUG(D_LFSCK, "%s dir engine fail to locate next " + "for the directory "DFID": rc = %d\n", + lfsck_lfsck2name(lfsck), + PFID(&lfsck->li_pos_current.lp_dir_parent), rc); } while (rc == 0); if (rc > 0 && !lfsck->li_oit_over) @@ -887,7 +833,6 @@ static int lfsck_master_oit_engine(const struct lu_env *env, do { struct dt_object *target; - bool update_lma = false; if (lfsck->li_di_dir != NULL) { rc = lfsck_master_dir_engine(env, lfsck); @@ -901,7 +846,7 @@ static int lfsck_master_oit_engine(const struct lu_env *env, if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY1, cfs_fail_val) && unlikely(!thread_is_running(thread))) { CDEBUG(D_LFSCK, "%s: OIT scan exit for engine stop, " - "cookie "LPU64"\n", + "cookie %llu\n", lfsck_lfsck2name(lfsck), iops->store(env, di)); RETURN(0); @@ -956,13 +901,23 @@ static int lfsck_master_oit_engine(const struct lu_env *env, LASSERT(!lfsck->li_master); - /* It is an old format device, update the LMA. */ if (idx != idx1) { struct ost_id *oi = &info->lti_oi; + if (unlikely(idx1 != 0)) { + CDEBUG(D_LFSCK, "%s: invalid IDIF "DFID + ", not match device index %u\n", + lfsck_lfsck2name(lfsck), + PFID(fid), idx); + + goto checkpoint; + } + + /* rebuild the IDIF with index to + * avoid double instances for the + * same object. */ fid_to_ostid(fid, oi); ostid_to_fid(fid, oi, idx); - update_lma = true; } } else if (!fid_is_norm(fid) && !fid_is_igif(fid) && !fid_is_last_id(fid) && @@ -994,7 +949,7 @@ static int lfsck_master_oit_engine(const struct lu_env *env, target = lfsck_object_find_bottom(env, lfsck, fid); if (IS_ERR(target)) { CDEBUG(D_LFSCK, "%s: OIT scan failed at find target " - DFID", cookie "LPU64": rc = %d\n", + DFID", cookie %llu: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(fid), iops->store(env, di), rc); lfsck_fail(env, lfsck, true); @@ -1004,18 +959,9 @@ static int lfsck_master_oit_engine(const struct lu_env *env, goto checkpoint; } - if (dt_object_exists(target)) { - if (update_lma) { - rc = lfsck_update_lma(env, lfsck, target); - if (rc != 0) - CDEBUG(D_LFSCK, "%s: fail to update " - "LMA for "DFID": rc = %d\n", - lfsck_lfsck2name(lfsck), - PFID(lfsck_dto2fid(target)), rc); - } - if (rc == 0) - rc = lfsck_exec_oit(env, lfsck, target); - } + if (dt_object_exists(target)) + rc = lfsck_exec_oit(env, lfsck, target); + lfsck_object_put(env, target); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); @@ -1040,10 +986,14 @@ checkpoint: lfsck->li_oit_over = 1; else if (likely(rc == 0)) lfsck->li_current_oit_processed = 0; + else + CDEBUG(D_LFSCK, "%s oit engine fail to locate next at " + "%llu: rc = %d\n", lfsck_lfsck2name(lfsck), + iops->store(env, di), rc); if (unlikely(!thread_is_running(thread))) { CDEBUG(D_LFSCK, "%s: OIT scan exit for engine stop, " - "cookie "LPU64"\n", lfsck_lfsck2name(lfsck), + "cookie %llu\n", lfsck_lfsck2name(lfsck), iops->store(env, di)); RETURN(0); } @@ -1065,6 +1015,18 @@ int lfsck_master_engine(void *args) int rc; ENTRY; + /* There will be some objects verification during the LFSCK start, + * such as the subsequent lfsck_verify_lpf(). Trigger low layer OI + * OI scrub before that to handle the potential inconsistence. */ + oit_di = oit_iops->init(env, oit_obj, lfsck->li_args_oit); + if (IS_ERR(oit_di)) { + rc = PTR_ERR(oit_di); + CDEBUG(D_LFSCK, "%s: master engine fail to init iteration: " + "rc = %d\n", lfsck_lfsck2name(lfsck), rc); + + GOTO(fini_args, rc); + } + if (lfsck->li_master && (!list_empty(&lfsck->li_list_scan) || !list_empty(&lfsck->li_list_double_scan))) { @@ -1080,15 +1042,6 @@ int lfsck_master_engine(void *args) lfsck_lfsck2name(lfsck), rc); } - oit_di = oit_iops->init(env, oit_obj, lfsck->li_args_oit); - if (IS_ERR(oit_di)) { - rc = PTR_ERR(oit_di); - CDEBUG(D_LFSCK, "%s: master engine fail to init iteration: " - "rc = %d\n", lfsck_lfsck2name(lfsck), rc); - - GOTO(fini_args, rc); - } - spin_lock(&lfsck->li_lock); lfsck->li_di_oit = oit_di; spin_unlock(&lfsck->li_lock); @@ -1097,7 +1050,7 @@ int lfsck_master_engine(void *args) GOTO(fini_oit, rc); CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = %#x, dir_flags = %#x, " - "oit_cookie = "LPU64", dir_cookie = "LPX64", parent = "DFID + "oit_cookie = %llu, dir_cookie = %#llx, parent = "DFID ", pid = %d\n", lfsck->li_args_oit, lfsck->li_args_dir, lfsck->li_pos_checkpoint.lp_oit_cookie, lfsck->li_pos_checkpoint.lp_dir_cookie, @@ -1124,7 +1077,7 @@ int lfsck_master_engine(void *args) lfsck_pos_fill(env, lfsck, &lfsck->li_pos_checkpoint, false); CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = %#x, dir_flags = %#x, " - "oit_cookie = "LPU64", dir_cookie = "LPX64", parent = "DFID + "oit_cookie = %llu, dir_cookie = %#llx, parent = "DFID ", pid = %d, rc = %d\n", lfsck->li_args_oit, lfsck->li_args_dir, lfsck->li_pos_checkpoint.lp_oit_cookie, lfsck->li_pos_checkpoint.lp_dir_cookie, @@ -1208,9 +1161,10 @@ static int lfsck_assistant_query_others(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_QUERY; lr->lr_active = com->lc_type; + + memset(laia, 0, sizeof(*laia)); laia->laia_com = com; laia->laia_lr = lr; - laia->laia_shared = 0; if (!list_empty(&lad->lad_mdt_phase1_list)) { ltds = &lfsck->li_mdt_descs; @@ -1324,9 +1278,10 @@ static int lfsck_assistant_notify_others(const struct lu_env *env, lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_active = com->lc_type; + + memset(laia, 0, sizeof(*laia)); laia->laia_com = com; laia->laia_lr = lr; - laia->laia_shared = 0; switch (lr->lr_event) { case LE_START: @@ -1349,8 +1304,6 @@ static int lfsck_assistant_notify_others(const struct lu_env *env, LASSERT(ltd != NULL); laia->laia_ltd = ltd; - ltd->ltd_layout_done = 0; - ltd->ltd_synced_failures = 0; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, lfsck_async_interpret_common, laia, LFSCK_NOTIFY); @@ -1381,7 +1334,7 @@ next: spin_lock(<ds->ltd_lock); if (com->lc_type == LFSCK_TYPE_LAYOUT) { cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { - ltd = LTD_TGT(ltds, idx); + ltd = lfsck_ltd2tgt(ltds, idx); LASSERT(ltd != NULL); if (!list_empty(<d->ltd_layout_list)) @@ -1394,7 +1347,7 @@ next: } } else { cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { - ltd = LTD_TGT(ltds, idx); + ltd = lfsck_ltd2tgt(ltds, idx); LASSERT(ltd != NULL); if (!list_empty(<d->ltd_namespace_list)) @@ -1633,6 +1586,7 @@ int lfsck_assistant_engine(void *args) } spin_lock(&lad->lad_lock); + lad->lad_task = current; thread_set_flags(athread, SVC_RUNNING); spin_unlock(&lad->lad_lock); wake_up_all(&mthread->t_ctl_waitq); @@ -1643,7 +1597,7 @@ int lfsck_assistant_engine(void *args) if (unlikely(lad->lad_exit || !thread_is_running(mthread))) - GOTO(cleanup1, rc = lad->lad_post_result); + GOTO(cleanup, rc = lad->lad_post_result); lar = list_entry(lad->lad_req_list.next, struct lfsck_assistant_req, @@ -1668,7 +1622,7 @@ int lfsck_assistant_engine(void *args) lao->la_req_fini(env, lar); if (rc < 0 && bk->lb_param & LPF_FAILOUT) - GOTO(cleanup1, rc); + GOTO(cleanup, rc); } l_wait_event(athread->t_ctl_waitq, @@ -1679,7 +1633,7 @@ int lfsck_assistant_engine(void *args) &lwi); if (unlikely(lad->lad_exit)) - GOTO(cleanup1, rc = lad->lad_post_result); + GOTO(cleanup, rc = lad->lad_post_result); if (!list_empty(&lad->lad_req_list)) continue; @@ -1689,7 +1643,7 @@ int lfsck_assistant_engine(void *args) lfsck_lfsck2name(lfsck), lad->lad_name); if (unlikely(lad->lad_exit)) - GOTO(cleanup1, rc = lad->lad_post_result); + GOTO(cleanup, rc = lad->lad_post_result); lad->lad_to_post = 0; LASSERT(lad->lad_post_result > 0); @@ -1733,7 +1687,7 @@ int lfsck_assistant_engine(void *args) lfsck_lfsck2name(lfsck), rc2); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN)) - GOTO(cleanup2, rc = 0); + GOTO(cleanup, rc = 0); while (lad->lad_in_double_scan) { rc = lfsck_assistant_query_others(env, com); @@ -1741,7 +1695,7 @@ int lfsck_assistant_engine(void *args) goto p2_next; if (rc < 0) - GOTO(cleanup2, rc); + GOTO(cleanup, rc); /* Pull LFSCK status on related targets once * per 30 seconds if we are not notified. */ @@ -1756,27 +1710,27 @@ int lfsck_assistant_engine(void *args) if (unlikely(lad->lad_exit || !thread_is_running(mthread))) - GOTO(cleanup2, rc = 0); + GOTO(cleanup, rc = 0); if (rc == -ETIMEDOUT) continue; if (rc < 0) - GOTO(cleanup2, rc); + GOTO(cleanup, rc); p2_next: rc = lao->la_handler_p2(env, com); if (rc != 0) - GOTO(cleanup2, rc); + GOTO(cleanup, rc); if (unlikely(lad->lad_exit || !thread_is_running(mthread))) - GOTO(cleanup2, rc = 0); + GOTO(cleanup, rc = 0); } } } -cleanup1: +cleanup: /* Cleanup the unfinished requests. */ spin_lock(&lad->lad_lock); if (rc < 0) @@ -1785,6 +1739,7 @@ cleanup1: if (lad->lad_exit && lad->lad_post_result <= 0) lao->la_fill_pos(env, com, &lfsck->li_pos_checkpoint); + thread_set_flags(athread, SVC_STOPPING); while (!list_empty(&lad->lad_req_list)) { lar = list_entry(lad->lad_req_list.next, struct lfsck_assistant_req, @@ -1800,7 +1755,6 @@ cleanup1: LASSERTF(lad->lad_prefetched == 0, "unmatched prefeteched objs %d\n", lad->lad_prefetched); -cleanup2: memset(lr, 0, sizeof(*lr)); if (rc > 0) { lr->lr_event = LE_PHASE2_DONE; @@ -1876,6 +1830,7 @@ fini: lad->lad_assistant_status = (rc1 != 0 ? rc1 : rc); thread_set_flags(athread, SVC_STOPPED); wake_up_all(&mthread->t_ctl_waitq); + lad->lad_task = NULL; spin_unlock(&lad->lad_lock); CDEBUG(D_LFSCK, "%s: %s LFSCK assistant thread exit: rc = %d\n",