X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_lib.c;h=3e6a1d578ae4d5a12bd985a072cc56a414048b70;hb=d35997e6e100acca3effb75ca402c9df7f6252ef;hp=325f7e7dc7e6ea2030ea2a9ec4a06af2f104eea1;hpb=53380e03668325423d6ffb80f3a955ad3a16d21a;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 325f7e7..3e6a1d5 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -42,6 +42,8 @@ #include "lfsck_internal.h" +#define LFSCK_CHECKPOINT_SKIP 1 + /* define lfsck thread key */ LU_KEY_INIT(lfsck, struct lfsck_thread_info); @@ -51,6 +53,7 @@ static void lfsck_key_fini(const struct lu_context *ctx, struct lfsck_thread_info *info = data; lu_buf_free(&info->lti_linkea_buf); + lu_buf_free(&info->lti_linkea_buf2); lu_buf_free(&info->lti_big_buf); OBD_FREE_PTR(info); } @@ -98,6 +101,11 @@ const char *lfsck_param_names[] = { NULL }; +enum lfsck_verify_lpf_types { + LVLT_BY_BOOKMARK = 0, + LVLT_BY_NAMEENTRY = 1, +}; + const char *lfsck_status2names(enum lfsck_status status) { if (unlikely(status < 0 || status >= LS_MAX)) @@ -143,6 +151,8 @@ static void lfsck_tgt_descs_fini(struct lfsck_tgt_descs *ltds) if (likely(ltd != NULL)) { LASSERT(list_empty(<d->ltd_layout_list)); LASSERT(list_empty(<d->ltd_layout_phase_list)); + LASSERT(list_empty(<d->ltd_namespace_list)); + LASSERT(list_empty(<d->ltd_namespace_phase_list)); ltds->ltd_tgtnr--; cfs_bitmap_clear(ltds->ltd_tgts_bitmap, idx); @@ -412,8 +422,25 @@ void lfsck_ibits_unlock(struct lustre_handle *lh, ldlm_mode_t mode) } } -static const char dot[] = "."; -static const char dotdot[] = ".."; +int lfsck_find_mdt_idx_by_fid(const struct lu_env *env, + struct lfsck_instance *lfsck, + const struct lu_fid *fid) +{ + struct seq_server_site *ss = + lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + struct lu_seq_range *range = &lfsck_env_info(env)->lti_range; + int rc; + + fld_range_set_mdt(range); + rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); + if (rc == 0) + rc = range->lsr_index; + + return rc; +} + +const char dot[] = "."; +const char dotdot[] = ".."; static const char dotlustre[] = ".lustre"; static const char lostfound[] = "lost+found"; @@ -440,7 +467,7 @@ static int lfsck_create_lpf_local(const struct lu_env *env, ENTRY; rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf); + &lfsck_env_info(env)->lti_linkea_buf2); if (rc != 0) RETURN(rc); @@ -584,7 +611,7 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, ENTRY; rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf); + &lfsck_env_info(env)->lti_linkea_buf2); if (rc != 0) RETURN(rc); @@ -865,6 +892,488 @@ out: return rc; } +/** + * Scan .lustre/lost+found for bad name entries and remove them. + * + * The valid name entry should be "MDTxxxx", the "xxxx" is the MDT device + * index in the system. Any other formatted name is invalid and should be + * removed. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[in] parent pointer to the lost+found object + * + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_scan_lpf_bad_entries(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *parent) +{ + struct lu_dirent *ent = + (struct lu_dirent *)lfsck_env_info(env)->lti_key; + const struct dt_it_ops *iops = &parent->do_index_ops->dio_it; + struct dt_it *it; + int rc; + ENTRY; + + it = iops->init(env, parent, LUDA_64BITHASH, BYPASS_CAPA); + if (IS_ERR(it)) + RETURN(PTR_ERR(it)); + + rc = iops->load(env, it, 0); + if (rc == 0) + rc = iops->next(env, it); + else if (rc > 0) + rc = 0; + + while (rc == 0) { + int off = 3; + + rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_64BITHASH); + if (rc != 0) + break; + + ent->lde_namelen = le16_to_cpu(ent->lde_namelen); + if (ent->lde_name[0] == '.') { + if (ent->lde_namelen == 1) + goto next; + + if (ent->lde_namelen == 2 && ent->lde_name[1] == '.') + goto next; + } + + /* name length must be strlen("MDTxxxx") */ + if (ent->lde_namelen != 7) + goto remove; + + if (memcmp(ent->lde_name, "MDT", off) != 0) + goto remove; + + while (off < 7 && isxdigit(ent->lde_name[off])) + off++; + + if (off != 7) { + +remove: + rc = lfsck_remove_name_entry(env, lfsck, parent, + ent->lde_name, S_IFDIR); + if (rc != 0) + break; + } + +next: + rc = iops->next(env, it); + } + + iops->put(env, it); + iops->fini(env, it); + + RETURN(rc > 0 ? 0 : rc); +} + +static int lfsck_update_lpf_entry(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *parent, + struct dt_object *child, + const char *name, + enum lfsck_verify_lpf_types type) +{ + int rc; + + if (type == LVLT_BY_BOOKMARK) { + rc = lfsck_update_name_entry(env, lfsck, parent, name, + lfsck_dto2fid(child), S_IFDIR); + } else /* if (type == LVLT_BY_NAMEENTRY) */ { + lfsck->li_bookmark_ram.lb_lpf_fid = *lfsck_dto2fid(child); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: update LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(child)), rc); + } + + return rc; +} + +/** + * Check whether the @child back references the @parent. + * + * Two cases: + * 1) The child's FID is stored in the bookmark file. If the child back + * references the parent (LU_LPF_FID object) via its ".." entry, then + * insert the name (MDTxxxx) to the .lustre/lost+found; otherwise, if + * the child back references another parent2, then: + * 1.1) If the parent2 recognizes the child, then update the bookmark file; + * 1.2) Otherwise, the LFSCK cannot know whether there will be parent3 that + * references the child. So keep them there. As the LFSCK processing, + * the parent3 may be found, then when the LFSCK run next time, the + * inconsistency can be repaired. + * + * 2) The child's FID is stored in the .lustre/lost+found/ sub-directory name + * entry (MDTxxxx). If the child back references the parent (LU_LPF_FID obj) + * via its ".." entry, then update the bookmark file, otherwise, if the child + * back references another parent2, then: + * 2.1) If the parent2 recognizes the child, then remove the sub-directory + * from .lustre/lost+found/; + * 2.2) Otherwise, if the parent2 does not recognizes the child, trust the + * sub-directory name entry and update the child; + * 2.3) Otherwise, if we do not know whether the parent2 recognizes the child + * or not, then keep them there. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[in] parent pointer to the lost+found object + * \param[in] child pointer to the lost+found sub-directory object + * \param[in] name the name for lost+found sub-directory object + * \param[out] fid pointer to the buffer to hold the FID of the object + * (called it as parent2) that is referenced via the + * child's dotdot entry; it also can be the FID that + * is referenced by the name entry under the parent2. + * \param[in] type to indicate where the child's FID is stored in + * + * \retval positive number for uncertain inconsistency + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_verify_lpf_pairs(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *parent, + struct dt_object *child, const char *name, + struct lu_fid *fid, + enum lfsck_verify_lpf_types type) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + char *name2 = info->lti_key; + struct lu_fid *fid2 = &info->lti_fid3; + struct dt_object *parent2 = NULL; + struct lustre_handle lh = { 0 }; + int rc; + ENTRY; + + fid_zero(fid); + rc = dt_lookup(env, child, (struct dt_rec *)fid, + (const struct dt_key *)dotdot, BYPASS_CAPA); + if (rc != 0) + GOTO(linkea, rc); + + if (!fid_is_sane(fid)) + GOTO(linkea, rc = -EINVAL); + + if (lu_fid_eq(fid, &LU_LPF_FID)) { + const struct lu_name *cname; + + if (lfsck->li_lpf_obj == NULL) { + lu_object_get(&child->do_lu); + lfsck->li_lpf_obj = child; + } + + cname = lfsck_name_get_const(env, name, strlen(name)); + rc = lfsck_verify_linkea(env, lfsck->li_bottom, child, cname, + &LU_LPF_FID); + if (rc == 0) + rc = lfsck_update_lpf_entry(env, lfsck, parent, child, + name, type); + + GOTO(out_done, rc); + } + + parent2 = lfsck_object_find_by_dev(env, lfsck->li_next, fid); + if (IS_ERR(parent2)) + GOTO(linkea, parent2); + + if (!dt_object_exists(parent2)) { + lu_object_put(env, &parent2->do_lu); + + GOTO(linkea, parent2 = ERR_PTR(-ENOENT)); + } + + if (!dt_try_as_dir(env, parent2)) { + lu_object_put(env, &parent2->do_lu); + + GOTO(linkea, parent2 = ERR_PTR(-ENOTDIR)); + } + +linkea: + /* To prevent rename/unlink race */ + rc = lfsck_ibits_lock(env, lfsck, child, &lh, + MDS_INODELOCK_UPDATE, LCK_PR); + if (rc != 0) + GOTO(out_put, rc); + + dt_read_lock(env, child, 0); + rc = lfsck_links_get_first(env, child, name2, fid2); + if (rc != 0) { + dt_read_unlock(env, child); + lfsck_ibits_unlock(&lh, LCK_PR); + + GOTO(out_put, rc = 1); + } + + /* It is almost impossible that the bookmark file (or the name entry) + * and the linkEA hit the same data corruption. Trust the linkEA. */ + if (lu_fid_eq(fid2, &LU_LPF_FID) && strcmp(name, name2) == 0) { + dt_read_unlock(env, child); + lfsck_ibits_unlock(&lh, LCK_PR); + + *fid = *fid2; + if (lfsck->li_lpf_obj == NULL) { + lu_object_get(&child->do_lu); + lfsck->li_lpf_obj = child; + } + + /* Update the child's dotdot entry */ + rc = lfsck_update_name_entry(env, lfsck, child, dotdot, + &LU_LPF_FID, S_IFDIR); + if (rc == 0) + rc = lfsck_update_lpf_entry(env, lfsck, parent, child, + name, type); + + GOTO(out_put, rc); + } + + if (parent2 == NULL || IS_ERR(parent2)) { + dt_read_unlock(env, child); + lfsck_ibits_unlock(&lh, LCK_PR); + + GOTO(out_done, rc = 1); + } + + rc = dt_lookup(env, parent2, (struct dt_rec *)fid, + (const struct dt_key *)name2, BYPASS_CAPA); + dt_read_unlock(env, child); + lfsck_ibits_unlock(&lh, LCK_PR); + if (rc != 0 && rc != -ENOENT) + GOTO(out_put, rc); + + if (rc == -ENOENT || !lu_fid_eq(fid, lfsck_dto2fid(child))) { + if (type == LVLT_BY_BOOKMARK) + GOTO(out_put, rc = 1); + + /* Trust the name entry, update the child's dotdot entry. */ + rc = lfsck_update_name_entry(env, lfsck, child, dotdot, + &LU_LPF_FID, S_IFDIR); + + GOTO(out_put, rc); + } + + if (type == LVLT_BY_BOOKMARK) { + /* Invalid FID record in the bookmark file, reset it. */ + fid_zero(&lfsck->li_bookmark_ram.lb_lpf_fid); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(child)), rc); + } else /* if (type == LVLT_BY_NAMEENTRY) */ { + /* The name entry is wrong, remove it. */ + rc = lfsck_remove_name_entry(env, lfsck, parent, name, S_IFDIR); + } + + GOTO(out_put, rc); + +out_put: + if (parent2 != NULL && !IS_ERR(parent2)) + lu_object_put(env, &parent2->do_lu); + +out_done: + return rc; +} + +/** + * Verify the /ROOT/.lustre/lost+found/ directory. + * + * /ROOT/.lustre/lost+found/ is a special directory to hold the objects that + * the LFSCK does not exactly know how to handle, such as orphans. So before + * the LFSCK scanning the system, the consistency of such directory needs to + * be verified firstly to allow the users to use it during the LFSCK. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * + * \retval positive number for uncertain inconsistency + * \retval 0 for success + * \retval negative error number on failure + */ +int lfsck_verify_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_fid *pfid = &info->lti_fid; + struct lu_fid *cfid = &info->lti_fid2; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct dt_object *parent = NULL; + /* child1's FID is in the bookmark file. */ + struct dt_object *child1 = NULL; + /* child2's FID is in the name entry MDTxxxx. */ + struct dt_object *child2 = NULL; + struct dt_device *dev = lfsck->li_bottom; + const struct lu_name *cname; + char name[8]; + int node = lfsck_dev_idx(dev); + int rc = 0; + ENTRY; + + LASSERT(lfsck->li_master); + + if (node == 0) { + parent = lfsck_object_find_by_dev(env, dev, &LU_LPF_FID); + } else { + struct lfsck_tgt_desc *ltd; + + ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0); + if (unlikely(ltd == NULL)) + RETURN(-ENXIO); + + parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt, + &LU_LPF_FID); + lfsck_tgt_put(ltd); + } + + if (IS_ERR(parent)) + RETURN(PTR_ERR(parent)); + + LASSERT(dt_object_exists(parent)); + + if (unlikely(!dt_try_as_dir(env, parent))) + GOTO(put, rc = -ENOTDIR); + + if (node == 0) { + rc = lfsck_scan_lpf_bad_entries(env, lfsck, parent); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: scan .lustre/lost+found/ " + "for bad sub-directories: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + } + + if (!fid_is_zero(&bk->lb_lpf_fid)) { + if (unlikely(!fid_is_norm(&bk->lb_lpf_fid))) { + struct lu_fid tfid = bk->lb_lpf_fid; + + /* Invalid FID record in the bookmark file, reset it. */ + fid_zero(&bk->lb_lpf_fid); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(&tfid), rc); + + if (rc != 0) + GOTO(put, rc); + } else { + child1 = lfsck_object_find_by_dev(env, dev, + &bk->lb_lpf_fid); + if (IS_ERR(child1)) + GOTO(put, rc = PTR_ERR(child1)); + + if (unlikely(!dt_object_exists(child1) || + dt_object_remote(child1)) || + !S_ISDIR(lfsck_object_type(child1))) { + /* Invalid FID record in the bookmark file, + * reset it. */ + fid_zero(&bk->lb_lpf_fid); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(child1)), rc); + + if (rc != 0) + GOTO(put, rc); + + lu_object_put(env, &child1->do_lu); + child1 = NULL; + } else if (unlikely(!dt_try_as_dir(env, child1))) { + GOTO(put, rc = -ENOTDIR); + } + } + } + + snprintf(name, 8, "MDT%04x", node); + rc = dt_lookup(env, parent, (struct dt_rec *)cfid, + (const struct dt_key *)name, BYPASS_CAPA); + if (rc == -ENOENT) { + if (!fid_is_zero(&bk->lb_lpf_fid)) + goto check_child1; + + GOTO(put, rc = 0); + } + + if (rc != 0) + GOTO(put, rc); + + /* Invalid FID in the name entry, remove the name entry. */ + if (!fid_is_norm(cfid)) { + rc = lfsck_remove_name_entry(env, lfsck, parent, name, S_IFDIR); + if (rc != 0) + GOTO(put, rc); + + goto check_child1; + } + + child2 = lfsck_object_find_by_dev(env, dev, cfid); + if (IS_ERR(child2)) + GOTO(put, rc = PTR_ERR(child2)); + + if (unlikely(!dt_object_exists(child2) || + dt_object_remote(child2)) || + !S_ISDIR(lfsck_object_type(child2))) { + rc = lfsck_remove_name_entry(env, lfsck, parent, name, + S_IFDIR); + if (rc != 0) + GOTO(put, rc); + + goto check_child1; + } + + if (unlikely(!dt_try_as_dir(env, child2))) + GOTO(put, rc = -ENOTDIR); + + if (child1 == NULL) { + rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child2, name, + pfid, LVLT_BY_NAMEENTRY); + } else if (!lu_fid_eq(cfid, &bk->lb_lpf_fid)) { + rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child1, name, + pfid, LVLT_BY_BOOKMARK); + if (!lu_fid_eq(pfid, &LU_LPF_FID)) + rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child2, + name, pfid, + LVLT_BY_NAMEENTRY); + } else { + if (lfsck->li_lpf_obj == NULL) { + lu_object_get(&child2->do_lu); + lfsck->li_lpf_obj = child2; + } + + cname = lfsck_name_get_const(env, name, strlen(name)); + rc = lfsck_verify_linkea(env, dev, child2, cname, &LU_LPF_FID); + } + + GOTO(put, rc); + +check_child1: + if (child1 != NULL) + rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child1, name, + pfid, LVLT_BY_BOOKMARK); + + GOTO(put, rc); + +put: + if (lfsck->li_lpf_obj != NULL && + unlikely(!dt_try_as_dir(env, lfsck->li_lpf_obj))) + rc = -ENOTDIR; + + if (child2 != NULL && !IS_ERR(child2)) + lu_object_put(env, &child2->do_lu); + if (child1 != NULL && !IS_ERR(child1)) + lu_object_put(env, &child1->do_lu); + if (parent != NULL && !IS_ERR(parent)) + lu_object_put(env, &parent->do_lu); + + return rc; +} + static int lfsck_fid_init(struct lfsck_instance *lfsck) { struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; @@ -1166,9 +1675,10 @@ void lfsck_control_speed_by_self(struct lfsck_component *com) } } -struct lfsck_thread_args *lfsck_thread_args_init(struct lfsck_instance *lfsck, - struct lfsck_component *com, - struct lfsck_start_param *lsp) +static struct lfsck_thread_args * +lfsck_thread_args_init(struct lfsck_instance *lfsck, + struct lfsck_component *com, + struct lfsck_start_param *lsp) { struct lfsck_thread_args *lta; int rc; @@ -1201,6 +1711,241 @@ void lfsck_thread_args_fini(struct lfsck_thread_args *lta) OBD_FREE_PTR(lta); } +struct lfsck_assistant_data * +lfsck_assistant_data_init(struct lfsck_assistant_operations *lao, + const char *name) +{ + struct lfsck_assistant_data *lad; + + OBD_ALLOC_PTR(lad); + if (lad != NULL) { + lad->lad_bitmap = CFS_ALLOCATE_BITMAP(BITS_PER_LONG); + if (lad->lad_bitmap == NULL) { + OBD_FREE_PTR(lad); + return NULL; + } + + INIT_LIST_HEAD(&lad->lad_req_list); + spin_lock_init(&lad->lad_lock); + INIT_LIST_HEAD(&lad->lad_ost_list); + INIT_LIST_HEAD(&lad->lad_ost_phase1_list); + INIT_LIST_HEAD(&lad->lad_ost_phase2_list); + INIT_LIST_HEAD(&lad->lad_mdt_list); + INIT_LIST_HEAD(&lad->lad_mdt_phase1_list); + INIT_LIST_HEAD(&lad->lad_mdt_phase2_list); + init_waitqueue_head(&lad->lad_thread.t_ctl_waitq); + lad->lad_ops = lao; + lad->lad_name = name; + } + + return lad; +} + +/** + * Generic LFSCK asynchronous communication interpretor function. + * The LFSCK RPC reply for both the event notification and status + * querying will be handled here. + * + * \param[in] env pointer to the thread context + * \param[in] req pointer to the LFSCK request + * \param[in] args pointer to the lfsck_async_interpret_args + * \param[in] rc the result for handling the LFSCK request + * + * \retval 0 for success + * \retval negative error number on failure + */ +int lfsck_async_interpret_common(const struct lu_env *env, + struct ptlrpc_request *req, + void *args, int rc) +{ + struct lfsck_async_interpret_args *laia = args; + struct lfsck_component *com = laia->laia_com; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_tgt_descs *ltds = laia->laia_ltds; + struct lfsck_tgt_desc *ltd = laia->laia_ltd; + struct lfsck_request *lr = laia->laia_lr; + + LASSERT(com->lc_lfsck->li_master); + + switch (lr->lr_event) { + case LE_START: + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for %s " + "start: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lad->lad_name, rc); + + if (com->lc_type == LFSCK_TYPE_LAYOUT) { + struct lfsck_layout *lo = com->lc_file_ram; + + if (lr->lr_flags & LEF_TO_OST) + lfsck_lad_set_bitmap(env, com, + ltd->ltd_index); + else + lo->ll_flags |= LF_INCOMPLETE; + } else { + struct lfsck_namespace *ns = com->lc_file_ram; + + /* If some MDT does not join the namespace + * LFSCK, then we cannot know whether there + * is some name entry on such MDT that with + * the referenced MDT-object on this MDT or + * not. So the namespace LFSCK on this MDT + * cannot handle orphan MDT-objects properly. + * So we mark the LFSCK as LF_INCOMPLETE and + * skip orphan MDT-objects handling. */ + ns->ln_flags |= LF_INCOMPLETE; + } + break; + } + + spin_lock(<ds->ltd_lock); + if (ltd->ltd_dead) { + spin_unlock(<ds->ltd_lock); + break; + } + + if (com->lc_type == LFSCK_TYPE_LAYOUT) { + struct list_head *list; + struct list_head *phase_list; + + if (ltd->ltd_layout_done) { + spin_unlock(<ds->ltd_lock); + break; + } + + if (lr->lr_flags & LEF_TO_OST) { + list = &lad->lad_ost_list; + phase_list = &lad->lad_ost_phase1_list; + } else { + list = &lad->lad_mdt_list; + phase_list = &lad->lad_mdt_phase1_list; + } + + if (list_empty(<d->ltd_layout_list)) + list_add_tail(<d->ltd_layout_list, list); + if (list_empty(<d->ltd_layout_phase_list)) + list_add_tail(<d->ltd_layout_phase_list, + phase_list); + } else { + if (ltd->ltd_namespace_done) { + spin_unlock(<ds->ltd_lock); + break; + } + + if (list_empty(<d->ltd_namespace_list)) + list_add_tail(<d->ltd_namespace_list, + &lad->lad_mdt_list); + if (list_empty(<d->ltd_namespace_phase_list)) + list_add_tail(<d->ltd_namespace_phase_list, + &lad->lad_mdt_phase1_list); + } + spin_unlock(<ds->ltd_lock); + break; + case LE_STOP: + case LE_PHASE1_DONE: + case LE_PHASE2_DONE: + case LE_PEER_EXIT: + if (rc != 0 && rc != -EALREADY) + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for %s: " + "event = %d, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lad->lad_name, lr->lr_event, rc); + break; + case LE_QUERY: { + struct lfsck_reply *reply; + struct list_head *list; + struct list_head *phase_list; + + if (com->lc_type == LFSCK_TYPE_LAYOUT) { + list = <d->ltd_layout_list; + phase_list = <d->ltd_layout_phase_list; + } else { + list = <d->ltd_namespace_list; + phase_list = <d->ltd_namespace_phase_list; + } + + if (rc != 0) { + spin_lock(<ds->ltd_lock); + list_del_init(phase_list); + list_del_init(list); + spin_unlock(<ds->ltd_lock); + break; + } + + reply = req_capsule_server_get(&req->rq_pill, + &RMF_LFSCK_REPLY); + if (reply == NULL) { + rc = -EPROTO; + CDEBUG(D_LFSCK, "%s: invalid query reply for %s: " + "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + lad->lad_name, rc); + spin_lock(<ds->ltd_lock); + list_del_init(phase_list); + list_del_init(list); + spin_unlock(<ds->ltd_lock); + break; + } + + switch (reply->lr_status) { + case LS_SCANNING_PHASE1: + break; + case LS_SCANNING_PHASE2: + spin_lock(<ds->ltd_lock); + list_del_init(phase_list); + if (ltd->ltd_dead) { + spin_unlock(<ds->ltd_lock); + break; + } + + if (com->lc_type == LFSCK_TYPE_LAYOUT) { + if (ltd->ltd_layout_done) { + spin_unlock(<ds->ltd_lock); + break; + } + + if (lr->lr_flags & LEF_TO_OST) + list_add_tail(phase_list, + &lad->lad_ost_phase2_list); + else + list_add_tail(phase_list, + &lad->lad_mdt_phase2_list); + } else { + if (ltd->ltd_namespace_done) { + spin_unlock(<ds->ltd_lock); + break; + } + + list_add_tail(phase_list, + &lad->lad_mdt_phase2_list); + } + spin_unlock(<ds->ltd_lock); + break; + default: + spin_lock(<ds->ltd_lock); + list_del_init(phase_list); + list_del_init(list); + spin_unlock(<ds->ltd_lock); + break; + } + break; + } + default: + CDEBUG(D_LFSCK, "%s: unexpected event: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), lr->lr_event); + break; + } + + if (!laia->laia_shared) { + lfsck_tgt_put(ltd); + lfsck_component_put(env, com); + } + + return 0; +} + static void lfsck_interpret(const struct lu_env *env, struct lfsck_instance *lfsck, struct ptlrpc_request *req, void *args, int result) @@ -1213,17 +1958,13 @@ static void lfsck_interpret(const struct lu_env *env, spin_lock(&lfsck->li_lock); list_for_each_entry(com, &lfsck->li_list_scan, lc_link) { - if (com->lc_ops->lfsck_interpret != NULL) { - laia->laia_com = com; - com->lc_ops->lfsck_interpret(env, req, laia, result); - } + laia->laia_com = com; + lfsck_async_interpret_common(env, req, laia, result); } list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) { - if (com->lc_ops->lfsck_interpret != NULL) { - laia->laia_com = com; - com->lc_ops->lfsck_interpret(env, req, laia, result); - } + laia->laia_com = com; + lfsck_async_interpret_common(env, req, laia, result); } spin_unlock(&lfsck->li_lock); } @@ -1233,11 +1974,12 @@ static int lfsck_stop_notify(const struct lu_env *env, struct lfsck_tgt_descs *ltds, struct lfsck_tgt_desc *ltd, __u16 type) { - struct ptlrpc_request_set *set; - struct lfsck_component *com; - int rc = 0; + struct lfsck_component *com; + int rc = 0; ENTRY; + LASSERT(lfsck->li_master); + spin_lock(&lfsck->li_lock); com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan); if (com == NULL) @@ -1248,22 +1990,72 @@ static int lfsck_stop_notify(const struct lu_env *env, spin_unlock(&lfsck->li_lock); if (com != NULL) { - if (com->lc_ops->lfsck_stop_notify != NULL) { - set = ptlrpc_prep_set(); - if (set == NULL) { - lfsck_component_put(env, com); + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_async_interpret_args *laia = &info->lti_laia; + struct lfsck_request *lr = &info->lti_lr; + struct lfsck_assistant_data *lad = com->lc_data; + struct list_head *list; + struct list_head *phase_list; + struct ptlrpc_request_set *set; + + set = ptlrpc_prep_set(); + if (set == NULL) { + lfsck_component_put(env, com); - RETURN(-ENOMEM); - } + RETURN(-ENOMEM); + } - rc = com->lc_ops->lfsck_stop_notify(env, com, ltds, - ltd, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); + if (type == LFSCK_TYPE_LAYOUT) { + list = <d->ltd_layout_list; + phase_list = <d->ltd_layout_phase_list; + } else { + list = <d->ltd_namespace_list; + phase_list = <d->ltd_namespace_phase_list; + } + spin_lock(<ds->ltd_lock); + if (list_empty(list)) { + LASSERT(list_empty(phase_list)); + spin_unlock(<ds->ltd_lock); ptlrpc_set_destroy(set); + + RETURN(0); } + list_del_init(phase_list); + list_del_init(list); + spin_unlock(<ds->ltd_lock); + + memset(lr, 0, sizeof(*lr)); + lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_event = LE_PEER_EXIT; + lr->lr_active = type; + lr->lr_status = LS_CO_PAUSED; + if (ltds == &lfsck->li_ost_descs) + lr->lr_flags = LEF_TO_OST; + + laia->laia_com = com; + laia->laia_ltds = ltds; + atomic_inc(<d->ltd_ref); + laia->laia_ltd = ltd; + laia->laia_lr = lr; + laia->laia_shared = 0; + + rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + lfsck_async_interpret_common, + laia, LFSCK_NOTIFY); + if (rc != 0) { + CDEBUG(D_LFSCK, "%s: fail to notify %s %x for " + "co-stop for %s: rc = %d\n", + lfsck_lfsck2name(lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lad->lad_name, rc); + lfsck_tgt_put(ltd); + } else { + rc = ptlrpc_set_wait(set); + } + + ptlrpc_set_destroy(set); lfsck_component_put(env, com); } @@ -1337,6 +2129,139 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, return 0; } +int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com, + struct lfsck_start_param *lsp) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_assistant_data *lad = com->lc_data; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct lfsck_thread_args *lta; + struct task_struct *task; + int rc; + ENTRY; + + lad->lad_assistant_status = 0; + lad->lad_post_result = 0; + lad->lad_to_post = 0; + lad->lad_to_double_scan = 0; + lad->lad_in_double_scan = 0; + lad->lad_exit = 0; + thread_set_flags(athread, 0); + + lta = lfsck_thread_args_init(lfsck, com, lsp); + if (IS_ERR(lta)) + RETURN(PTR_ERR(lta)); + + task = kthread_run(lfsck_assistant_engine, lta, lad->lad_name); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + CERROR("%s: cannot start LFSCK assistant thread for %s: " + "rc = %d\n", lfsck_lfsck2name(lfsck), lad->lad_name, rc); + lfsck_thread_args_fini(lta); + } else { + struct l_wait_info lwi = { 0 }; + + l_wait_event(mthread->t_ctl_waitq, + thread_is_running(athread) || + thread_is_stopped(athread), + &lwi); + if (unlikely(!thread_is_running(athread))) + rc = lad->lad_assistant_status; + else + rc = 0; + } + + RETURN(rc); +} + +int lfsck_checkpoint_generic(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_assistant_data *lad = com->lc_data; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct l_wait_info lwi = { 0 }; + + if (com->lc_new_checked == 0) + return LFSCK_CHECKPOINT_SKIP; + + l_wait_event(mthread->t_ctl_waitq, + list_empty(&lad->lad_req_list) || + !thread_is_running(mthread) || + thread_is_stopped(athread), + &lwi); + + if (!thread_is_running(mthread) || thread_is_stopped(athread)) + return LFSCK_CHECKPOINT_SKIP; + + return 0; +} + +void lfsck_post_generic(const struct lu_env *env, + struct lfsck_component *com, int *result) +{ + struct lfsck_assistant_data *lad = com->lc_data; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct l_wait_info lwi = { 0 }; + + lad->lad_post_result = *result; + if (*result <= 0) + lad->lad_exit = 1; + lad->lad_to_post = 1; + + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + (*result > 0 && list_empty(&lad->lad_req_list)) || + thread_is_stopped(athread), + &lwi); + + if (lad->lad_assistant_status < 0) + *result = lad->lad_assistant_status; +} + +int lfsck_double_scan_generic(const struct lu_env *env, + struct lfsck_component *com, int status) +{ + struct lfsck_assistant_data *lad = com->lc_data; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct l_wait_info lwi = { 0 }; + + if (status != LS_SCANNING_PHASE2) + lad->lad_exit = 1; + else + lad->lad_to_double_scan = 1; + + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + lad->lad_in_double_scan || + thread_is_stopped(athread), + &lwi); + + if (lad->lad_assistant_status < 0) + return lad->lad_assistant_status; + + return 0; +} + +void lfsck_quit_generic(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_assistant_data *lad = com->lc_data; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct l_wait_info lwi = { 0 }; + + lad->lad_exit = 1; + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + thread_is_init(athread) || + thread_is_stopped(athread), + &lwi); +} + /* external interfaces */ int lfsck_get_speed(struct seq_file *m, struct dt_device *key) @@ -1604,6 +2529,7 @@ static int lfsck_start_all(const struct lu_env *env, laia->laia_ltd = ltd; ltd->ltd_layout_done = 0; + ltd->ltd_namespace_done = 0; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, lfsck_async_interpret, laia, LFSCK_NOTIFY); @@ -1804,7 +2730,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, } trigger: - lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY; + lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY | LUDA_TYPE; if (bk->lb_param & LPF_DRYRUN) lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN; @@ -2249,6 +3175,8 @@ int lfsck_add_target(const struct lu_env *env, struct dt_device *key, INIT_LIST_HEAD(<d->ltd_orphan_list); INIT_LIST_HEAD(<d->ltd_layout_list); INIT_LIST_HEAD(<d->ltd_layout_phase_list); + INIT_LIST_HEAD(<d->ltd_namespace_list); + INIT_LIST_HEAD(<d->ltd_namespace_phase_list); atomic_set(<d->ltd_ref, 1); ltd->ltd_index = index; @@ -2348,6 +3276,7 @@ unlock: spin_lock(<ds->ltd_lock); ltd->ltd_dead = 1; spin_unlock(<ds->ltd_lock); + lfsck_stop_notify(env, lfsck, ltds, ltd, LFSCK_TYPE_NAMESPACE); lfsck_stop_notify(env, lfsck, ltds, ltd, LFSCK_TYPE_LAYOUT); lfsck_tgt_put(ltd); }