X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdd%2Fmdd_lfsck.c;h=7ab15f97ccc72c57dcd3cbc7056b1fe720eb39dc;hb=ab28a55546afdd8871f5cd3dbcef86b97b68fd87;hp=04f3a524efa600326f1e4389cbbd901dfead950f;hpb=3d6a1b79c92933f8d822919229016fbb0122b769;p=fs%2Flustre-release.git diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index 04f3a52..7ab15f9 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * lustre/mdd/mdd_lfsck.c @@ -47,11 +47,39 @@ #define HALF_SEC (CFS_HZ >> 1) #define LFSCK_CHECKPOINT_INTERVAL 60 -#define MDS_DIR_DUMMY_START 0xffffffffffffffffULL + +#define LFSCK_NAMEENTRY_DEAD 1 /* The object has been unlinked. */ +#define LFSCK_NAMEENTRY_REMOVED 2 /* The entry has been removed. */ +#define LFSCK_NAMEENTRY_RECREATED 3 /* The entry has been recreated. */ const char lfsck_bookmark_name[] = "lfsck_bookmark"; const char lfsck_namespace_name[] = "lfsck_namespace"; +static const char *lfsck_status_names[] = { + "init", + "scanning-phase1", + "scanning-phase2", + "completed", + "failed", + "stopped", + "paused", + "crashed", + NULL +}; + +static const char *lfsck_flags_names[] = { + "scanned-once", + "inconsistent", + "upgrade", + NULL +}; + +static const char *lfsck_param_names[] = { + "failout", + "dryrun", + NULL +}; + /* misc functions */ static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck) @@ -97,6 +125,30 @@ __mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list) return NULL; } +static struct lfsck_component * +mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type) +{ + struct lfsck_component *com; + + spin_lock(&lfsck->ml_lock); + com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan); + if (com != NULL) + goto unlock; + + com = __mdd_lfsck_component_find(lfsck, type, + &lfsck->ml_list_double_scan); + if (com != NULL) + goto unlock; + + com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle); + +unlock: + if (com != NULL) + mdd_lfsck_component_get(com); + spin_unlock(&lfsck->ml_lock); + return com; +} + static void mdd_lfsck_component_cleanup(const struct lu_env *env, struct lfsck_component *com) { @@ -108,9 +160,79 @@ static void mdd_lfsck_component_cleanup(const struct lu_env *env, mdd_lfsck_component_put(env, com); } +static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[], + const char *prefix) +{ + int save = *len; + int flag; + int rc; + int i; + + rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n'); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) { + if (flag & bits) { + bits &= ~flag; + rc = snprintf(*buf, *len, "%s%c", names[i], + bits != 0 ? ',' : '\n'); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + } + } + return save - *len; +} + +static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix) +{ + int rc; + + if (time != 0) + rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix, + cfs_time_current_sec() - time); + else + rc = snprintf(*buf, *len, "%s: N/A\n", prefix); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + return rc; +} + +static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos, + const char *prefix) +{ + int rc; + + if (fid_is_zero(&pos->lp_dir_parent)) { + if (pos->lp_oit_cookie == 0) + rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n", + prefix); + else + rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n", + prefix, pos->lp_oit_cookie); + } else { + rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n", + prefix, pos->lp_oit_cookie, + PFID(&pos->lp_dir_parent), pos->lp_dir_cookie); + } + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + return rc; +} + static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck, - struct lfsck_position *pos, bool oit_processed, - bool dir_processed) + struct lfsck_position *pos, bool init) { const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it; @@ -122,29 +244,23 @@ static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck, } pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit); + if (!lfsck->ml_current_oit_processed && !init) + pos->lp_oit_cookie--; LASSERT(pos->lp_oit_cookie > 0); - if (!oit_processed) - pos->lp_oit_cookie--; - if (lfsck->ml_di_dir != NULL) { struct dt_object *dto = lfsck->ml_obj_dir; - pos->lp_dir_parent = *lu_object_fid(&dto->do_lu); pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env, lfsck->ml_di_dir); - LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START); - - if (pos->lp_dir_cookie == MDS_DIR_END_OFF) - LASSERT(dir_processed); - - /* For the dir which just to be processed, - * lp_dir_cookie will become MDS_DIR_DUMMY_START, - * which can be correctly handled by mdd_lfsck_prep. */ - if (!dir_processed) - pos->lp_dir_cookie--; + if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) { + fid_zero(&pos->lp_dir_parent); + pos->lp_dir_cookie = 0; + } else { + pos->lp_dir_parent = *lu_object_fid(&dto->do_lu); + } } else { fid_zero(&pos->lp_dir_parent); pos->lp_dir_cookie = 0; @@ -360,9 +476,9 @@ static int mdd_lfsck_bookmark_init(const struct lu_env *env, struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram; int rc; - memset(mb, 0, sizeof(mb)); + memset(mb, 0, sizeof(*mb)); mb->lb_magic = LFSCK_BOOKMARK_MAGIC; - mb->lb_version = LFSCK_VERSION_V1; + mb->lb_version = LFSCK_VERSION_V2; mutex_lock(&lfsck->ml_mutex); rc = mdd_lfsck_bookmark_store(env, lfsck); mutex_unlock(&lfsck->ml_mutex); @@ -560,89 +676,289 @@ static int mdd_lfsck_namespace_init(const struct lu_env *env, return rc; } -static int mdd_declare_lfsck_namespace_unlink(const struct lu_env *env, - struct mdd_device *mdd, - struct dt_object *p, - struct dt_object *c, - const char *name, - struct thandle *handle) +static int mdd_lfsck_namespace_lookup(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *fid, + __u8 *flags) { - int rc; + struct lu_fid *key = &mdd_env_info(env)->mti_fid; + int rc; - rc = dt_declare_delete(env, p, (const struct dt_key *)name, handle); + fid_cpu_to_be(key, fid); + rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags, + (const struct dt_key *)key, BYPASS_CAPA); + return rc; +} + +static int mdd_lfsck_namespace_delete(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *fid) +{ + struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck); + struct lu_fid *key = &mdd_env_info(env)->mti_fid; + struct thandle *handle; + struct dt_object *obj = com->lc_obj; + int rc; + ENTRY; + + handle = dt_trans_create(env, mdd->mdd_bottom); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle); if (rc != 0) - return rc; + GOTO(out, rc); - rc = dt_declare_ref_del(env, c, handle); + rc = dt_trans_start_local(env, mdd->mdd_bottom, handle); if (rc != 0) - return rc; + GOTO(out, rc); - rc = dt_declare_destroy(env, c, handle); + fid_cpu_to_be(key, fid); + rc = dt_delete(env, obj, (const struct dt_key *)key, handle, + BYPASS_CAPA); + + GOTO(out, rc); + +out: + dt_trans_stop(env, mdd->mdd_bottom, handle); return rc; } -static int mdd_lfsck_namespace_unlink(const struct lu_env *env, - struct mdd_device *mdd, - struct lfsck_component *com) +static int mdd_lfsck_namespace_update(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *fid, + __u8 flags, bool force) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lu_fid *fid = &info->mti_fid; - struct dt_object *child = com->lc_obj; - struct dt_object *parent; - struct thandle *handle; - int rc; + struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck); + struct lu_fid *key = &mdd_env_info(env)->mti_fid; + struct thandle *handle; + struct dt_object *obj = com->lc_obj; + int rc; + bool exist = false; + __u8 tf; ENTRY; - parent = dt_store_resolve(env, mdd->mdd_bottom, "", fid); - if (IS_ERR(parent)) - RETURN(rc = PTR_ERR(parent)); + rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf); + if (rc != 0 && rc != -ENOENT) + RETURN(rc); - if (dt_try_as_dir(env, parent)) - GOTO(out, rc = -ENOTDIR); + if (rc == 0) { + if (!force || flags == tf) + RETURN(0); - handle = dt_trans_create(env, mdd->mdd_bottom); - if (IS_ERR(handle)) - GOTO(out, rc = PTR_ERR(handle)); + exist = true; + handle = dt_trans_create(env, mdd->mdd_bottom); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, + handle); + if (rc != 0) + GOTO(out, rc); + } else { + handle = dt_trans_create(env, mdd->mdd_bottom); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + } - rc = mdd_declare_lfsck_namespace_unlink(env, mdd, parent, child, - lfsck_namespace_name, handle); + rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags, + (const struct dt_key *)fid, handle); if (rc != 0) - GOTO(stop, rc); + GOTO(out, rc); rc = dt_trans_start_local(env, mdd->mdd_bottom, handle); if (rc != 0) + GOTO(out, rc); + + fid_cpu_to_be(key, fid); + if (exist) { + rc = dt_delete(env, obj, (const struct dt_key *)key, handle, + BYPASS_CAPA); + if (rc != 0) { + CERROR("%s: fail to insert "DFID", rc = %d\n", + mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc); + GOTO(out, rc); + } + } + + rc = dt_insert(env, obj, (const struct dt_rec *)&flags, + (const struct dt_key *)key, handle, BYPASS_CAPA, 1); + + GOTO(out, rc); + +out: + dt_trans_stop(env, mdd->mdd_bottom, handle); + return rc; +} + +/** + * \retval +ve repaired + * \retval 0 no need to repair + * \retval -ve error cases + */ +static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env, + struct lfsck_component *com, + struct mdd_object *child, + __u8 flags) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la; + struct lu_name *cname = &info->mti_name; + struct lu_fid *pfid = &info->mti_fid; + struct lu_fid *cfid = &info->mti_fid2; + struct md_lfsck *lfsck = com->lc_lfsck; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); + struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; + struct lfsck_namespace *ns = + (struct lfsck_namespace *)com->lc_file_ram; + struct linkea_data ldata = { 0 }; + struct thandle *handle = NULL; + bool locked = false; + bool update = false; + int count; + int rc; + ENTRY; + + if (com->lc_journal) { + +again: + LASSERT(!locked); + + com->lc_journal = 1; + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + RETURN(rc = PTR_ERR(handle)); + + rc = mdd_declare_links_add(env, child, handle, NULL); + if (rc != 0) + GOTO(stop, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc != 0) + GOTO(stop, rc); + + mdd_write_lock(env, child, MOR_TGT_CHILD); + locked = true; + } + + if (unlikely(mdd_is_dead_obj(child))) + GOTO(stop, rc = 0); + + rc = mdd_links_read(env, child, &ldata); + if (rc != 0) { + if ((bk->lb_param & LPF_DRYRUN) && + (rc == -EINVAL || rc == -ENODATA)) + rc = 1; + GOTO(stop, rc); + } - rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name, - handle, BYPASS_CAPA); + rc = mdd_la_get(env, child, la, BYPASS_CAPA); if (rc != 0) GOTO(stop, rc); - rc = child->do_ops->do_ref_del(env, child, handle); - if (rc != 0) { - lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID); - rc = dt_insert(env, parent, - (const struct dt_rec*)fid, - (const struct dt_key *)lfsck_namespace_name, - handle, BYPASS_CAPA, 1); + ldata.ld_lee = LINKEA_FIRST_ENTRY(ldata); + count = ldata.ld_leh->leh_reccount; + while (count-- > 0) { + struct mdd_object *parent = NULL; + struct dt_object *dir; - GOTO(stop, rc); + linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname, + pfid); + if (!fid_is_sane(pfid)) + goto shrink; + + parent = mdd_object_find(env, mdd, pfid); + if (parent == NULL) + goto shrink; + else if (IS_ERR(parent)) + GOTO(stop, rc = PTR_ERR(parent)); + + if (!mdd_object_exists(parent)) + goto shrink; + + /* XXX: need more processing for remote object in the future. */ + if (mdd_object_remote(parent)) { + mdd_object_put(env, parent); + ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata); + continue; + } + + dir = mdd_object_child(parent); + if (unlikely(!dt_try_as_dir(env, dir))) + goto shrink; + + /* To guarantee the 'name' is terminated with '0'. */ + memcpy(info->mti_key, cname->ln_name, cname->ln_namelen); + info->mti_key[cname->ln_namelen] = 0; + cname->ln_name = info->mti_key; + rc = dt_lookup(env, dir, (struct dt_rec *)cfid, + (const struct dt_key *)cname->ln_name, + BYPASS_CAPA); + if (rc != 0 && rc != -ENOENT) { + mdd_object_put(env, parent); + GOTO(stop, rc); + } + + if (rc == 0) { + if (lu_fid_eq(cfid, mdo2fid(child))) { + mdd_object_put(env, parent); + ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata); + continue; + } + + goto shrink; + } + + if (ldata.ld_leh->leh_reccount > la->la_nlink) + goto shrink; + + /* XXX: For the case of there is linkea entry, but without name + * entry pointing to the object, and the object link count + * isn't less than the count of name entries, then add the + * name entry back to namespace. + * + * It is out of LFSCK 1.5 scope, will implement it in the + * future. Keep the linkEA entry. */ + mdd_object_put(env, parent); + ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata); + continue; + +shrink: + if (parent != NULL) + mdd_object_put(env, parent); + if (bk->lb_param & LPF_DRYRUN) + RETURN(1); + + CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n", + PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name, + PFID(pfid)); + linkea_del_buf(&ldata, cname); + update = true; } + if (update) { + if (!com->lc_journal) { + com->lc_journal = 1; + goto again; + } - rc = dt_destroy(env, child, handle); - if (rc == 0) { - lu_object_put(env, &child->do_lu); - com->lc_obj = NULL; + rc = mdd_links_write(env, child, &ldata, handle); } GOTO(stop, rc); stop: - dt_trans_stop(env, mdd->mdd_bottom, handle); + if (locked) + mdd_write_unlock(env, child); -out: - lu_object_put(env, &parent->do_lu); + if (handle != NULL) + mdd_trans_stop(env, mdd, rc, handle); + + if (rc == 0 && update) { + ns->ln_objs_nlink_repaired++; + rc = 1; + } return rc; } @@ -651,12 +967,9 @@ out: static int mdd_lfsck_namespace_reset(const struct lu_env *env, struct lfsck_component *com, bool init) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lu_fid *fid = &info->mti_fid; struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram; struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck); - struct md_object *mdo; - struct dt_object *dto; + struct dt_object *dto, *root; int rc; ENTRY; @@ -674,39 +987,39 @@ static int mdd_lfsck_namespace_reset(const struct lu_env *env, ns->ln_magic = LFSCK_NAMESPACE_MAGIC; ns->ln_status = LS_INIT; - rc = mdd_lfsck_namespace_unlink(env, mdd, com); + root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid); + if (unlikely(IS_ERR(root))) + GOTO(out, rc = PTR_ERR(root)); + + rc = local_object_unlink(env, mdd->mdd_bottom, root, + lfsck_namespace_name); if (rc != 0) GOTO(out, rc); - lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID); - mdo = llo_store_create_index(env, &mdd->mdd_md_dev, mdd->mdd_bottom, "", - lfsck_namespace_name, fid, - &dt_lfsck_features); - if (IS_ERR(mdo)) - GOTO(out, rc = PTR_ERR(mdo)); - - lu_object_put(env, &mdo->mo_lu); - dto = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name, fid); + dto = local_index_find_or_create(env, mdd->mdd_los, root, + lfsck_namespace_name, + S_IFREG | S_IRUGO | S_IWUSR, + &dt_lfsck_features); if (IS_ERR(dto)) GOTO(out, rc = PTR_ERR(dto)); - com->lc_obj = dto; rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features); if (rc != 0) GOTO(out, rc); + com->lc_obj = dto; rc = mdd_lfsck_namespace_store(env, com, true); GOTO(out, rc); - out: + lu_object_put(env, &root->do_lu); up_write(&com->lc_sem); return rc; } static void mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com, - bool oit, bool new_checked) + bool new_checked) { struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram; @@ -716,7 +1029,7 @@ mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com, ns->ln_items_failed++; if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) mdd_lfsck_pos_fill(env, com->lc_lfsck, - &ns->ln_pos_first_inconsistent, oit, !oit); + &ns->ln_pos_first_inconsistent, false); up_write(&com->lc_sem); } @@ -734,11 +1047,10 @@ static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env, down_write(&com->lc_sem); - ns->ln_pos_last_checkpoint = lfsck->ml_pos_current; if (init) { - ns->ln_time_last_checkpoint = ns->ln_time_latest_start; ns->ln_pos_latest_start = lfsck->ml_pos_current; } else { + ns->ln_pos_last_checkpoint = lfsck->ml_pos_current; ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->ml_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); @@ -812,13 +1124,6 @@ static int mdd_lfsck_namespace_prep(const struct lu_env *env, mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) { *pos = ns->ln_pos_last_checkpoint; pos->lp_oit_cookie++; - if (!fid_is_zero(&pos->lp_dir_parent)) { - if (pos->lp_dir_cookie == MDS_DIR_END_OFF) { - fid_zero(&pos->lp_dir_parent); - } else { - pos->lp_dir_cookie++; - } - } } else { *pos = ns->ln_pos_first_inconsistent; } @@ -841,16 +1146,72 @@ static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env, return 0; } -/* XXX: to be implemented in other patch. */ +static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env, + struct mdd_object *obj, + struct thandle *handle) +{ + int rc; + + /* For destroying all invalid linkEA entries. */ + rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle); + if (rc != 0) + return rc; + + /* For insert new linkEA entry. */ + rc = mdd_declare_links_add(env, obj, handle, NULL); + return rc; +} + +static int mdd_lfsck_namespace_check_exist(const struct lu_env *env, + struct md_lfsck *lfsck, + struct mdd_object *obj, + const char *name) +{ + struct dt_object *dir = lfsck->ml_obj_dir; + struct lu_fid *fid = &mdd_env_info(env)->mti_fid; + int rc; + ENTRY; + + if (unlikely(mdd_is_dead_obj(obj))) + RETURN(LFSCK_NAMEENTRY_DEAD); + + rc = dt_lookup(env, dir, (struct dt_rec *)fid, + (const struct dt_key *)name, BYPASS_CAPA); + if (rc == -ENOENT) + RETURN(LFSCK_NAMEENTRY_REMOVED); + + if (rc < 0) + RETURN(rc); + + if (!lu_fid_eq(fid, mdo2fid(obj))) + RETURN(LFSCK_NAMEENTRY_RECREATED); + + RETURN(0); +} + static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env, struct lfsck_component *com, struct mdd_object *obj, struct lu_dirent *ent) { + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la; + struct md_lfsck *lfsck = com->lc_lfsck; + struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); + struct linkea_data ldata = { 0 }; + const struct lu_fid *pfid = + lu_object_fid(&lfsck->ml_obj_dir->do_lu); + const struct lu_fid *cfid = mdo2fid(obj); const struct lu_name *cname; - int repaired; + struct thandle *handle = NULL; + bool repaired = false; + bool locked = false; + int count = 0; + int rc; + ENTRY; cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen); down_write(&com->lc_sem); @@ -858,22 +1219,190 @@ static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env, if (ent->lde_attrs & LUDA_UPGRADE) { ns->ln_flags |= LF_UPGRADE; - repaired = 1; + repaired = true; } else if (ent->lde_attrs & LUDA_REPAIR) { ns->ln_flags |= LF_INCONSISTENT; - repaired = 1; + repaired = true; + } + + if (ent->lde_name[0] == '.' && + (ent->lde_namelen == 1 || + (ent->lde_namelen == 2 && ent->lde_name[1] == '.') || + fid_is_dot_lustre(&ent->lde_fid))) + GOTO(out, rc = 0); + + if (!(bk->lb_param & LPF_DRYRUN) && + (com->lc_journal || repaired)) { + +again: + LASSERT(!locked); + + com->lc_journal = 1; + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + GOTO(out, rc = PTR_ERR(handle)); + + rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc != 0) + GOTO(stop, rc); + + mdd_write_lock(env, obj, MOR_TGT_CHILD); + locked = true; + } + + rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name); + if (rc != 0) + GOTO(stop, rc); + + rc = mdd_links_read(env, obj, &ldata); + if (rc == 0) { + count = ldata.ld_leh->leh_reccount; + rc = linkea_links_find(&ldata, cname, pfid); + if (rc == 0) { + /* For dir, if there are more than one linkea entries, + * then remove all the other redundant linkea entries.*/ + if (unlikely(count > 1 && + S_ISDIR(mdd_object_type(obj)))) + goto unmatch; + + goto record; + } else { + +unmatch: + ns->ln_flags |= LF_INCONSISTENT; + if (bk->lb_param & LPF_DRYRUN) { + repaired = true; + goto record; + } + + /*For dir, remove the unmatched linkea entry directly.*/ + if (S_ISDIR(mdd_object_type(obj))) { + if (!com->lc_journal) + goto again; + + rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, + handle, BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + goto nodata; + } else { + goto add; + } + } + } else if (unlikely(rc == -EINVAL)) { + ns->ln_flags |= LF_INCONSISTENT; + if (bk->lb_param & LPF_DRYRUN) { + count = 1; + repaired = true; + goto record; + } + + if (!com->lc_journal) + goto again; + + /* The magic crashed, we are not sure whether there are more + * corrupt data in the linkea, so remove all linkea entries. */ + rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle, + BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + goto nodata; + } else if (rc == -ENODATA) { + ns->ln_flags |= LF_UPGRADE; + if (bk->lb_param & LPF_DRYRUN) { + count = 1; + repaired = true; + goto record; + } + +nodata: + rc = linkea_data_new(&ldata, &mdd_env_info(env)->mti_link_buf); + if (rc != 0) + GOTO(stop, rc); + +add: + if (!com->lc_journal) + goto again; + + rc = linkea_add_buf(&ldata, cname, pfid); + if (rc != 0) + GOTO(stop, rc); + + rc = mdd_links_write(env, obj, &ldata, handle); + if (rc != 0) + GOTO(stop, rc); + + count = ldata.ld_leh->leh_reccount; + repaired = true; } else { - repaired = 0; + GOTO(stop, rc); + } + +record: + LASSERT(count > 0); + + rc = mdd_la_get(env, obj, la, BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + if ((count == 1) && + (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj)))) + /* Usually, it is for single linked object or dir, do nothing.*/ + GOTO(stop, rc); + + /* Following modification will be in another transaction. */ + if (handle != NULL) { + LASSERT(mdd_write_locked(env, obj)); + + mdd_write_unlock(env, obj); + locked = false; + + mdd_trans_stop(env, mdd, 0, handle); + handle = NULL; } - ns->ln_items_repaired += repaired; + ns->ln_mlinked_checked++; + rc = mdd_lfsck_namespace_update(env, com, cfid, + count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false); + + GOTO(out, rc); + +stop: + if (locked) + mdd_write_unlock(env, obj); + + if (handle != NULL) + mdd_trans_stop(env, mdd, rc, handle); + +out: + if (rc < 0) { + ns->ln_items_failed++; + if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) + mdd_lfsck_pos_fill(env, lfsck, + &ns->ln_pos_first_inconsistent, + false); + if (!(bk->lb_param & LPF_FAILOUT)) + rc = 0; + } else { + if (repaired) + ns->ln_items_repaired++; + else + com->lc_journal = 0; + rc = 0; + } up_write(&com->lc_sem); - return 0; + return rc; } static int mdd_lfsck_namespace_post(const struct lu_env *env, struct lfsck_component *com, - int result) + int result, bool init) { struct md_lfsck *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = @@ -883,6 +1412,8 @@ static int mdd_lfsck_namespace_post(const struct lu_env *env, down_write(&com->lc_sem); spin_lock(&lfsck->ml_lock); + if (!init) + ns->ln_pos_last_checkpoint = lfsck->ml_pos_current; if (result > 0) { ns->ln_status = LS_SCANNING_PHASE2; ns->ln_flags |= LF_SCANNED_ONCE; @@ -907,11 +1438,13 @@ static int mdd_lfsck_namespace_post(const struct lu_env *env, } spin_unlock(&lfsck->ml_lock); - ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + + if (!init) { + ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->ml_time_last_checkpoint); - ns->ln_time_last_checkpoint = cfs_time_current_sec(); - ns->ln_items_checked += com->lc_new_checked; - com->lc_new_checked = 0; + ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_items_checked += com->lc_new_checked; + com->lc_new_checked = 0; + } rc = mdd_lfsck_namespace_store(env, com, false); @@ -919,41 +1452,404 @@ static int mdd_lfsck_namespace_post(const struct lu_env *env, return rc; } -/* XXX: to be implemented in other patch. */ static int mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, char *buf, int len) { - return 0; + struct md_lfsck *lfsck = com->lc_lfsck; + struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; + struct lfsck_namespace *ns = + (struct lfsck_namespace *)com->lc_file_ram; + int save = len; + int ret = -ENOSPC; + int rc; + + down_read(&com->lc_sem); + rc = snprintf(buf, len, + "name: lfsck_namespace\n" + "magic: 0x%x\n" + "version: %d\n" + "status: %s\n", + ns->ln_magic, + bk->lb_version, + lfsck_status_names[ns->ln_status]); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names, + "flags"); + if (rc < 0) + goto out; + + rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names, + "param"); + if (rc < 0) + goto out; + + rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete, + "time_since_last_completed"); + if (rc < 0) + goto out; + + rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start, + "time_since_latest_start"); + if (rc < 0) + goto out; + + rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint, + "time_since_last_checkpoint"); + if (rc < 0) + goto out; + + rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start, + "latest_start_position"); + if (rc < 0) + goto out; + + rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint, + "last_checkpoint_position"); + if (rc < 0) + goto out; + + rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent, + "first_failure_position"); + if (rc < 0) + goto out; + + if (ns->ln_status == LS_SCANNING_PHASE1) { + struct lfsck_position pos; + cfs_duration_t duration = cfs_time_current() - + lfsck->ml_time_last_checkpoint; + __u64 checked = ns->ln_items_checked + com->lc_new_checked; + __u64 speed = checked; + __u64 new_checked = com->lc_new_checked * CFS_HZ; + __u32 rtime = ns->ln_run_time_phase1 + + cfs_duration_sec(duration + HALF_SEC); + + if (duration != 0) + do_div(new_checked, duration); + if (rtime != 0) + do_div(speed, rtime); + rc = snprintf(buf, len, + "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "updated_phase1: "LPU64"\n" + "updated_phase2: "LPU64"\n" + "failed_phase1: "LPU64"\n" + "failed_phase2: "LPU64"\n" + "dirs: "LPU64"\n" + "M-linked: "LPU64"\n" + "nlinks_repaired: "LPU64"\n" + "lost_found: "LPU64"\n" + "success_count: %u\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: N/A\n" + "real-time_speed_phase1: "LPU64" items/sec\n" + "real-time_speed_phase2: N/A\n", + checked, + ns->ln_objs_checked_phase2, + ns->ln_items_repaired, + ns->ln_objs_repaired_phase2, + ns->ln_items_failed, + ns->ln_objs_failed_phase2, + ns->ln_dirs_checked, + ns->ln_mlinked_checked, + ns->ln_objs_nlink_repaired, + ns->ln_objs_lost_found, + ns->ln_success_count, + rtime, + ns->ln_run_time_phase2, + speed, + new_checked); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + mdd_lfsck_pos_fill(env, lfsck, &pos, false); + rc = lfsck_pos_dump(&buf, &len, &pos, "current_position"); + if (rc <= 0) + goto out; + } else if (ns->ln_status == LS_SCANNING_PHASE2) { + cfs_duration_t duration = cfs_time_current() - + lfsck->ml_time_last_checkpoint; + __u64 checked = ns->ln_objs_checked_phase2 + + com->lc_new_checked; + __u64 speed1 = ns->ln_items_checked; + __u64 speed2 = checked; + __u64 new_checked = com->lc_new_checked * CFS_HZ; + __u32 rtime = ns->ln_run_time_phase2 + + cfs_duration_sec(duration + HALF_SEC); + + if (duration != 0) + do_div(new_checked, duration); + if (ns->ln_run_time_phase1 != 0) + do_div(speed1, ns->ln_run_time_phase1); + if (rtime != 0) + do_div(speed2, rtime); + rc = snprintf(buf, len, + "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "updated_phase1: "LPU64"\n" + "updated_phase2: "LPU64"\n" + "failed_phase1: "LPU64"\n" + "failed_phase2: "LPU64"\n" + "dirs: "LPU64"\n" + "M-linked: "LPU64"\n" + "nlinks_repaired: "LPU64"\n" + "lost_found: "LPU64"\n" + "success_count: %u\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" objs/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: "LPU64" objs/sec\n" + "current_position: "DFID"\n", + ns->ln_items_checked, + checked, + ns->ln_items_repaired, + ns->ln_objs_repaired_phase2, + ns->ln_items_failed, + ns->ln_objs_failed_phase2, + ns->ln_dirs_checked, + ns->ln_mlinked_checked, + ns->ln_objs_nlink_repaired, + ns->ln_objs_lost_found, + ns->ln_success_count, + ns->ln_run_time_phase1, + rtime, + speed1, + speed2, + new_checked, + PFID(&ns->ln_fid_latest_scanned_phase2)); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + } else { + __u64 speed1 = ns->ln_items_checked; + __u64 speed2 = ns->ln_objs_checked_phase2; + + if (ns->ln_run_time_phase1 != 0) + do_div(speed1, ns->ln_run_time_phase1); + if (ns->ln_run_time_phase2 != 0) + do_div(speed2, ns->ln_run_time_phase2); + rc = snprintf(buf, len, + "checked_phase1: "LPU64"\n" + "checked_phase2: "LPU64"\n" + "updated_phase1: "LPU64"\n" + "updated_phase2: "LPU64"\n" + "failed_phase1: "LPU64"\n" + "failed_phase2: "LPU64"\n" + "dirs: "LPU64"\n" + "M-linked: "LPU64"\n" + "nlinks_repaired: "LPU64"\n" + "lost_found: "LPU64"\n" + "success_count: %u\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: "LPU64" items/sec\n" + "average_speed_phase2: "LPU64" objs/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: N/A\n" + "current_position: N/A\n", + ns->ln_items_checked, + ns->ln_objs_checked_phase2, + ns->ln_items_repaired, + ns->ln_objs_repaired_phase2, + ns->ln_items_failed, + ns->ln_objs_failed_phase2, + ns->ln_dirs_checked, + ns->ln_mlinked_checked, + ns->ln_objs_nlink_repaired, + ns->ln_objs_lost_found, + ns->ln_success_count, + ns->ln_run_time_phase1, + ns->ln_run_time_phase2, + speed1, + speed2); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + } + ret = save - len; + +out: + up_read(&com->lc_sem); + return ret; } -/* XXX: to be implemented in other patch. */ static int mdd_lfsck_namespace_double_scan(const struct lu_env *env, struct lfsck_component *com) { struct md_lfsck *lfsck = com->lc_lfsck; + struct ptlrpc_thread *thread = &lfsck->ml_thread; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram; struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram; + struct dt_object *obj = com->lc_obj; + const struct dt_it_ops *iops = &obj->do_index_ops->dio_it; + struct mdd_object *target; + struct dt_it *di; + struct dt_key *key; + struct lu_fid fid; int rc; + __u8 flags; + ENTRY; + + lfsck->ml_new_scanned = 0; + lfsck->ml_time_last_checkpoint = cfs_time_current(); + lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + + di = iops->init(env, obj, 0, BYPASS_CAPA); + if (IS_ERR(di)) + RETURN(PTR_ERR(di)); + + fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2); + rc = iops->get(env, di, (const struct dt_key *)&fid); + if (rc < 0) + GOTO(fini, rc); + + /* Skip the start one, which either has been processed or non-exist. */ + rc = iops->next(env, di); + if (rc != 0) + GOTO(put, rc); + + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN)) + GOTO(put, rc = 0); + + do { + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) && + cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + } + + key = iops->key(env, di); + fid_be_to_cpu(&fid, (const struct lu_fid *)key); + target = mdd_object_find(env, mdd, &fid); + down_write(&com->lc_sem); + if (target == NULL) { + rc = 0; + goto checkpoint; + } else if (IS_ERR(target)) { + rc = PTR_ERR(target); + goto checkpoint; + } + + /* XXX: need more processing for remote object in the future. */ + if (!mdd_object_exists(target) || mdd_object_remote(target)) + goto obj_put; + + rc = iops->rec(env, di, (struct dt_rec *)&flags, 0); + if (rc == 0) + rc = mdd_lfsck_namespace_double_scan_one(env, com, + target, flags); + +obj_put: + mdd_object_put(env, target); +checkpoint: + lfsck->ml_new_scanned++; + com->lc_new_checked++; + ns->ln_fid_latest_scanned_phase2 = fid; + if (rc > 0) + ns->ln_objs_repaired_phase2++; + else if (rc < 0) + ns->ln_objs_failed_phase2++; + up_write(&com->lc_sem); + + if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) { + mdd_lfsck_namespace_delete(env, com, &fid); + } else if (rc < 0) { + flags |= LLF_REPAIR_FAILED; + mdd_lfsck_namespace_update(env, com, &fid, flags, true); + } + + if (rc < 0 && bk->lb_param & LPF_FAILOUT) + GOTO(put, rc); + + if (likely(cfs_time_beforeq(cfs_time_current(), + lfsck->ml_time_next_checkpoint)) || + com->lc_new_checked == 0) + goto speed; + + down_write(&com->lc_sem); + ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->ml_time_last_checkpoint); + ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_objs_checked_phase2 += com->lc_new_checked; + com->lc_new_checked = 0; + rc = mdd_lfsck_namespace_store(env, com, false); + up_write(&com->lc_sem); + if (rc != 0) + GOTO(put, rc); + + lfsck->ml_time_last_checkpoint = cfs_time_current(); + lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + +speed: + mdd_lfsck_control_speed(lfsck); + if (unlikely(!thread_is_running(thread))) + GOTO(put, rc = 0); + + rc = iops->next(env, di); + } while (rc == 0); + + GOTO(put, rc); + +put: + iops->put(env, di); + +fini: + iops->fini(env, di); down_write(&com->lc_sem); + ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->ml_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; - com->lc_journal = 0; - ns->ln_status = LS_COMPLETED; - if (!(bk->lb_param & LPF_DRYRUN)) - ns->ln_flags &= - ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE); - ns->ln_time_last_complete = ns->ln_time_last_checkpoint; - ns->ln_success_count++; + if (rc > 0) { + com->lc_journal = 0; + ns->ln_status = LS_COMPLETED; + if (!(bk->lb_param & LPF_DRYRUN)) + ns->ln_flags &= + ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE); + ns->ln_time_last_complete = ns->ln_time_last_checkpoint; + ns->ln_success_count++; + } else if (rc == 0) { + if (lfsck->ml_paused) + ns->ln_status = LS_PAUSED; + else + ns->ln_status = LS_STOPPED; + } else { + ns->ln_status = LS_FAILED; + } - spin_lock(&lfsck->ml_lock); - cfs_list_del_init(&com->lc_link); - cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle); - spin_unlock(&lfsck->ml_lock); + if (ns->ln_status != LS_PAUSED) { + spin_lock(&lfsck->ml_lock); + cfs_list_del_init(&com->lc_link); + cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle); + spin_unlock(&lfsck->ml_lock); + } rc = mdd_lfsck_namespace_store(env, com, false); @@ -978,11 +1874,11 @@ static struct lfsck_operations mdd_lfsck_namespace_ops = { static int mdd_lfsck_namespace_setup(const struct lu_env *env, struct md_lfsck *lfsck) { - struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); - struct lfsck_component *com; - struct lfsck_namespace *ns; - struct dt_object *obj; - int rc; + struct mdd_device *mdd = mdd_lfsck2mdd(lfsck); + struct lfsck_component *com; + struct lfsck_namespace *ns; + struct dt_object *obj, *root; + int rc; ENTRY; OBD_ALLOC_PTR(com); @@ -1005,8 +1901,15 @@ static int mdd_lfsck_namespace_setup(const struct lu_env *env, if (com->lc_file_disk == NULL) GOTO(out, rc = -ENOMEM); - obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name, - &mdd_env_info(env)->mti_fid); + root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid); + if (unlikely(IS_ERR(root))) + GOTO(out, rc = PTR_ERR(root)); + + obj = local_index_find_or_create(env, mdd->mdd_los, root, + lfsck_namespace_name, + S_IFREG | S_IRUGO | S_IWUSR, + &dt_lfsck_features); + lu_object_put(env, &root->do_lu); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); @@ -1059,9 +1962,8 @@ out: /* helper functions for framework */ -static int object_is_client_visible(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *obj) +static int object_needs_lfsck(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *obj) { struct lu_fid *fid = &mdd_env_info(env)->mti_fid; int depth = 0; @@ -1076,6 +1978,13 @@ static int object_is_client_visible(const struct lu_env *env, return 1; } + /* .lustre doesn't contain "real" user objects, no need lfsck */ + if (fid_is_dot_lustre(mdo2fid(obj))) { + if (depth > 0) + mdd_object_put(env, obj); + return 0; + } + mdd_read_lock(env, obj, MOR_TGT_CHILD); if (unlikely(mdd_is_dead_obj(obj))) { mdd_read_unlock(env, obj); @@ -1115,12 +2024,17 @@ static int object_is_client_visible(const struct lu_env *env, else if (IS_ERR(obj)) return PTR_ERR(obj); - /* XXX: need more processing for remote object in the future. */ - if (!mdd_object_exists(obj) || mdd_object_remote(obj)) { + if (!mdd_object_exists(obj)) { mdd_object_put(env, obj); return 0; } + /* Currently, only client visible directory can be remote. */ + if (mdd_object_remote(obj)) { + mdd_object_put(env, obj); + return 1; + } + depth++; } return 0; @@ -1143,17 +2057,17 @@ static void mdd_lfsck_unpack_ent(struct lu_dirent *ent) /* LFSCK wrap functions */ static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck, - bool oit, bool new_checked) + bool new_checked) { struct lfsck_component *com; cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) { - com->lc_ops->lfsck_fail(env, com, oit, new_checked); + com->lc_ops->lfsck_fail(env, com, new_checked); } } static int mdd_lfsck_checkpoint(const struct lu_env *env, - struct md_lfsck *lfsck, bool oit) + struct md_lfsck *lfsck) { struct lfsck_component *com; int rc; @@ -1162,7 +2076,7 @@ static int mdd_lfsck_checkpoint(const struct lu_env *env, lfsck->ml_time_next_checkpoint))) return 0; - mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, oit, !oit); + mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false); cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) { rc = com->lc_ops->lfsck_checkpoint(env, com, false); if (rc != 0) @@ -1192,6 +2106,7 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck) LASSERT(lfsck->ml_obj_dir == NULL); LASSERT(lfsck->ml_di_dir == NULL); + lfsck->ml_current_oit_processed = 0; cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) { com->lc_new_checked = 0; if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN) @@ -1210,12 +2125,19 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck) /* Init otable-based iterator. */ if (pos == NULL) { rc = iops->load(env, lfsck->ml_di_oit, 0); - GOTO(out, rc = (rc >= 0 ? 0 : rc)); + if (rc > 0) { + lfsck->ml_oit_over = 1; + rc = 0; + } + + GOTO(out, rc); } rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie); if (rc < 0) GOTO(out, rc); + else if (rc > 0) + lfsck->ml_oit_over = 1; if (fid_is_zero(&pos->lp_dir_parent)) GOTO(out, rc = 0); @@ -1245,8 +2167,10 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck) if (IS_ERR(di)) GOTO(out, rc = PTR_ERR(di)); + LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF); + rc = iops->load(env, di, pos->lp_dir_cookie); - if (rc == 0) + if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0)) rc = iops->next(env, di); else if (rc > 0) rc = 0; @@ -1269,10 +2193,16 @@ out: if (obj != NULL) mdd_object_put(env, obj); - if (rc != 0) - return (rc > 0 ? 0 : rc); + if (rc < 0) { + cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, + lc_link) + com->lc_ops->lfsck_post(env, com, rc, true); - mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false, false); + return rc; + } + + rc = 0; + mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true); cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) { rc = com->lc_ops->lfsck_checkpoint(env, com, true); if (rc != 0) @@ -1307,7 +2237,7 @@ static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck, cfs_list_empty(&lfsck->ml_list_dir)) RETURN(0); - rc = object_is_client_visible(env, mdd_lfsck2mdd(lfsck), obj); + rc = object_needs_lfsck(env, mdd_lfsck2mdd(lfsck), obj); if (rc <= 0) GOTO(out, rc); @@ -1345,7 +2275,7 @@ static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck, out: if (rc < 0) - mdd_lfsck_fail(env, lfsck, false, false); + mdd_lfsck_fail(env, lfsck, false); return (rc > 0 ? 0 : rc); } @@ -1370,9 +2300,9 @@ static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck, struct lfsck_component *next; int rc; - mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true, true); + mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false); cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) { - rc = com->lc_ops->lfsck_post(env, com, result); + rc = com->lc_ops->lfsck_post(env, com, result, false); if (rc != 0) return rc; } @@ -1422,11 +2352,22 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env, do { struct mdd_object *child; + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) && + cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + } + lfsck->ml_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)ent, lfsck->ml_args_dir); if (rc != 0) { - mdd_lfsck_fail(env, lfsck, false, true); + mdd_lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(rc); else @@ -1442,7 +2383,7 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env, if (child == NULL) { goto checkpoint; } else if (IS_ERR(child)) { - mdd_lfsck_fail(env, lfsck, false, true); + mdd_lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(PTR_ERR(child)); else @@ -1457,7 +2398,7 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env, RETURN(rc); checkpoint: - rc = mdd_lfsck_checkpoint(env, lfsck, false); + rc = mdd_lfsck_checkpoint(env, lfsck); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); @@ -1466,6 +2407,13 @@ checkpoint: if (unlikely(!thread_is_running(thread))) RETURN(0); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) { + spin_lock(&lfsck->ml_lock); + thread_set_flags(thread, SVC_STOPPING); + spin_unlock(&lfsck->ml_lock); + RETURN(-EINVAL); + } + rc = iops->next(env, di); } while (rc == 0); @@ -1501,10 +2449,25 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env, if (unlikely(lfsck->ml_oit_over)) RETURN(1); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) && + cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + } + + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) + RETURN(0); + + lfsck->ml_current_oit_processed = 1; lfsck->ml_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)fid, 0); if (rc != 0) { - mdd_lfsck_fail(env, lfsck, true, true); + mdd_lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(rc); else @@ -1515,7 +2478,7 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env, if (target == NULL) { goto checkpoint; } else if (IS_ERR(target)) { - mdd_lfsck_fail(env, lfsck, true, true); + mdd_lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) RETURN(PTR_ERR(target)); else @@ -1532,16 +2495,25 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env, RETURN(rc); checkpoint: - rc = mdd_lfsck_checkpoint(env, lfsck, true); + rc = mdd_lfsck_checkpoint(env, lfsck); if (rc != 0 && bk->lb_param & LPF_FAILOUT) RETURN(rc); /* Rate control. */ mdd_lfsck_control_speed(lfsck); + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) { + spin_lock(&lfsck->ml_lock); + thread_set_flags(thread, SVC_STOPPING); + spin_unlock(&lfsck->ml_lock); + RETURN(-EINVAL); + } + rc = iops->next(env, di); - if (rc > 0) + if (unlikely(rc > 0)) lfsck->ml_oit_over = 1; + else if (likely(rc == 0)) + lfsck->ml_current_oit_processed = 0; if (unlikely(!thread_is_running(thread))) RETURN(0); @@ -1614,7 +2586,8 @@ static int mdd_lfsck_main(void *args) if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan)) oit_iops->put(&env, oit_di); - rc = mdd_lfsck_post(&env, lfsck, rc); + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) + rc = mdd_lfsck_post(&env, lfsck, rc); if (lfsck->ml_di_dir != NULL) mdd_lfsck_close_dir(&env, lfsck); @@ -1658,6 +2631,24 @@ int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck, return rc; } +int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck, + __u16 type, char *buf, int len) +{ + struct lfsck_component *com; + int rc; + + if (!lfsck->ml_initialized) + return -ENODEV; + + com = mdd_lfsck_component_find(lfsck, type); + if (com == NULL) + return -ENOTSUPP; + + rc = com->lc_ops->lfsck_dump(env, com, buf, len); + mdd_lfsck_component_put(env, com); + return rc; +} + int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, struct lfsck_start *start) { @@ -1675,7 +2666,9 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, RETURN(-ENOTSUPP); /* start == NULL means auto trigger paused LFSCK. */ - if (start == NULL && cfs_list_empty(&lfsck->ml_list_scan)) + if ((start == NULL) && + (cfs_list_empty(&lfsck->ml_list_scan) || + OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO))) RETURN(0); mutex_lock(&lfsck->ml_mutex); @@ -1756,7 +2749,8 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) { if (!(com->lc_type & start->ls_active)) { - rc = com->lc_ops->lfsck_post(env, com, 0); + rc = com->lc_ops->lfsck_post(env, com, 0, + false); if (rc != 0) GOTO(out, rc); } @@ -1828,6 +2822,9 @@ int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck, struct l_wait_info lwi = { 0 }; ENTRY; + if (!lfsck->ml_initialized) + RETURN(0); + mutex_lock(&lfsck->ml_mutex); spin_lock(&lfsck->ml_lock); if (thread_is_init(thread) || thread_is_stopped(thread)) { @@ -1861,9 +2858,11 @@ static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE, int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd) { - struct md_lfsck *lfsck = &mdd->mdd_lfsck; - struct dt_object *obj; - int rc; + struct md_lfsck *lfsck = &mdd->mdd_lfsck; + struct dt_object *obj; + struct lu_fid fid; + int rc; + ENTRY; LASSERT(!lfsck->ml_initialized); @@ -1885,27 +2884,40 @@ int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd) rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features); if (rc != 0) { if (rc == -ENOTSUPP) - rc = 0; - - RETURN(rc); + RETURN(0); + GOTO(out, rc); } - obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_bookmark_name, - &mdd_env_info(env)->mti_fid); + /* LFSCK bookmark */ + fid_zero(&fid); + rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid, + lfsck_bookmark_name, + S_IFREG | S_IRUGO | S_IWUSR, &fid); + if (rc < 0) + GOTO(out, rc); + + obj = dt_locate(env, mdd->mdd_bottom, &fid); if (IS_ERR(obj)) - RETURN(PTR_ERR(obj)); + GOTO(out, rc = PTR_ERR(obj)); + LASSERT(lu_object_exists(&obj->do_lu)); lfsck->ml_bookmark_obj = obj; + rc = mdd_lfsck_bookmark_load(env, lfsck); if (rc == -ENODATA) rc = mdd_lfsck_bookmark_init(env, lfsck); if (rc != 0) - RETURN(rc); + GOTO(out, rc); rc = mdd_lfsck_namespace_setup(env, lfsck); + if (rc < 0) + GOTO(out, rc); /* XXX: LFSCK components initialization to be added here. */ - - RETURN(rc); + RETURN(0); +out: + lu_object_put(env, &lfsck->ml_obj_oit->do_lu); + lfsck->ml_obj_oit = NULL; + return 0; } void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)