X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_striped_dir.c;h=854504d7602563348021973937e296d3e4484591;hb=3274e573957e8b8a067ae28c3f7d7788d40f310e;hp=c42d23357ea3b056b9c4421ca92f3e7fa30a2727;hpb=eb8452240bce761062d49ff7cbd6398a239d431c;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_striped_dir.c b/lustre/lfsck/lfsck_striped_dir.c index c42d233..854504d 100644 --- a/lustre/lfsck/lfsck_striped_dir.c +++ b/lustre/lfsck/lfsck_striped_dir.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2014, Intel Corporation. + * Copyright (c) 2014, 2017, Intel Corporation. */ /* * lustre/lfsck/lfsck_striped_dir.c @@ -140,7 +140,6 @@ #define DEBUG_SUBSYSTEM S_LFSCK -#include #include #include #include @@ -148,7 +147,6 @@ #include #include #include -#include #include "lfsck_internal.h" @@ -170,14 +168,13 @@ void lfsck_lmv_put(const struct lu_env *env, struct lfsck_lmv *llmv) LASSERT(llmv->ll_lslr != NULL); - OBD_FREE_LARGE(llmv->ll_lslr, - sizeof(*llmv->ll_lslr) * - llmv->ll_stripes_allocated); + OBD_FREE_PTR_ARRAY_LARGE(llmv->ll_lslr, + llmv->ll_stripes_allocated); OBD_FREE_PTR(llu); } else { if (llmv->ll_lslr != NULL) - OBD_FREE_LARGE(llmv->ll_lslr, - sizeof(*llmv->ll_lslr) * + OBD_FREE_PTR_ARRAY_LARGE( + llmv->ll_lslr, llmv->ll_stripes_allocated); OBD_FREE_PTR(llmv); @@ -196,7 +193,7 @@ void lfsck_lmv_put(const struct lu_env *env, struct lfsck_lmv *llmv) * \param[in] del_lmv true if need to drop the LMV EA * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ static int lfsck_disable_master_lmv(const struct lu_env *env, @@ -206,7 +203,7 @@ static int lfsck_disable_master_lmv(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_attr *la = &info->lti_la; struct lfsck_instance *lfsck = com->lc_lfsck; - struct dt_device *dev = lfsck_obj2dt_dev(obj); + struct dt_device *dev = lfsck_obj2dev(obj); struct thandle *th = NULL; int rc = 0; ENTRY; @@ -238,16 +235,16 @@ static int lfsck_disable_master_lmv(const struct lu_env *env, GOTO(unlock, rc = 0); if (del_lmv) { - rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th, BYPASS_CAPA); + rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th); if (rc != 0) GOTO(unlock, rc); } - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); if (rc == 0 && !(la->la_flags & LUSTRE_IMMUTABLE_FL)) { la->la_valid = LA_FLAGS; la->la_flags |= LUSTRE_IMMUTABLE_FL; - rc = dt_attr_set(env, obj, la, th, BYPASS_CAPA); + rc = dt_attr_set(env, obj, la, th); } GOTO(unlock, rc); @@ -290,18 +287,19 @@ static inline bool lfsck_is_valid_slave_lmv(struct lmv_mds_md_v1 *lmv) * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component + * \param[in] obj pointer to the striped directory to be handled * \param[in] lnr pointer to the namespace request that contains the * striped directory to be handled and other information * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ static int lfsck_remove_lmv(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *obj, struct lfsck_namespace_req *lnr) { - struct dt_object *obj = lnr->lnr_obj; struct lustre_handle lh = { 0 }; int rc; @@ -341,7 +339,7 @@ static int lfsck_remove_dirent(const struct lu_env *env, snprintf(info->lti_tmpbuf2, sizeof(info->lti_tmpbuf2), DFID":%u", PFID(fid), index); - obj = lfsck_object_find_by_dev(env, com->lc_lfsck->li_bottom, fid); + obj = lfsck_object_find_bottom(env, com->lc_lfsck, fid); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -367,6 +365,7 @@ static int lfsck_remove_dirent(const struct lu_env *env, * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component + * \param[in] dir pointer to the striped directory to be handled * \param[in] lslr pointer to lfsck_disable_master_lmv slot which content * will be replaced by the given information * \param[in] lnr contain the shard's FID to be used to fill the @@ -377,11 +376,12 @@ static int lfsck_remove_dirent(const struct lu_env *env, * \param[in] index the old shard's index in the striped directory * \param[in] flags the new shard's flags in the @lslr slot * - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ static int lfsck_replace_lmv(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *dir, struct lfsck_slave_lmv_rec *lslr, struct lfsck_namespace_req *lnr, struct lmv_mds_md_v1 *lmv, @@ -390,7 +390,7 @@ static int lfsck_replace_lmv(const struct lu_env *env, struct lfsck_lmv *llmv = lnr->lnr_lmv; int rc; - rc = lfsck_remove_dirent(env, com, lnr->lnr_obj, + rc = lfsck_remove_dirent(env, com, dir, &lslr->lslr_fid, index); if (rc < 0) return rc; @@ -454,6 +454,7 @@ static int lfsck_replace_lmv(const struct lu_env *env, * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component + * \param[in] dir pointer to the striped directory to be handled * \param[in] lnr contain the shard's FID to fill the @lslr slot, * it also records the known max filled index and * the known max stripe count @@ -472,7 +473,7 @@ static int lfsck_replace_lmv(const struct lu_env *env, * we define the max depth can be called recursively * (LFSCK_REC_LMV_MAX_DEPTH) * - * \retval zero for succeed + * \retval zero for success * \retval "-ERANGE" for invalid @shard_idx * \retval "-EEXIST" for the required lslr slot has been * occupied by other shard @@ -480,13 +481,13 @@ static int lfsck_replace_lmv(const struct lu_env *env, */ static int lfsck_record_lmv(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *dir, struct lfsck_namespace_req *lnr, struct lmv_mds_md_v1 *lmv, __u32 shard_idx, __u32 flags, __u32 flags2, __u32 *depth) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_lmv *llmv = lnr->lnr_lmv; - struct dt_object *dir = lnr->lnr_obj; const struct lu_fid *fid = &lnr->lnr_fid; struct lfsck_slave_lmv_rec *lslr; struct lfsck_rec_lmv_save *lrls; @@ -508,7 +509,7 @@ static int lfsck_record_lmv(const struct lu_env *env, int new_stripes = index + 1; size_t old_size = sizeof(*lslr) * llmv->ll_stripes_allocated; - OBD_ALLOC_LARGE(new_lslr, sizeof(*new_lslr) * new_stripes); + OBD_ALLOC_PTR_ARRAY_LARGE(new_lslr, new_stripes); if (new_lslr == NULL) { llmv->ll_failed = 1; @@ -567,7 +568,7 @@ static int lfsck_record_lmv(const struct lu_env *env, * mark the master MDT-object as read-only. The * administrator can handle the conflict with * more human knowledge. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); break; case LSLF_BAD_INDEX2: GOTO(out, rc = -EEXIST); @@ -586,7 +587,7 @@ no_lmvea: * as read-only. The administrator can * handle the conflict with more human * knowledge. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); } else { /* Otherwise, remove the current name entry, * and add its FID in the LFSCK tracing file @@ -609,7 +610,7 @@ no_lmvea: /* The name entry claims an index that is conflict * with a valid existing name entry, then try the * index in the lmv recursively. */ - rc = lfsck_record_lmv(env, com, lnr, lmv, index, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, index, LSLF_BAD_INDEX2, lslr->lslr_flags, depth); lmv->lmv_master_mdt_index = index; if (rc == -ERANGE || rc == -EEXIST) @@ -618,7 +619,7 @@ no_lmvea: * not know how to resolve the conflict. * We will handle it as handle the case * of 'LSLF_NONE' vs 'LSLF_NONE'. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); break; default: @@ -644,7 +645,7 @@ none: * as read-only. The administrator can * handle the conflict with more human * knowledge. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); } else { lrls = &lfsck->li_rec_lmv_save[*depth - 1]; lrls->lrls_fid = lslr->lslr_fid; @@ -656,8 +657,8 @@ none: com, &lrls->lrls_fid, LNTF_CHECK_PARENT, true); if (rc == 0) - rc = lfsck_replace_lmv(env, com, lslr, - lnr, lmv, index, flags); + rc = lfsck_replace_lmv(env, com, dir, + lslr, lnr, lmv, index, flags); } break; @@ -678,7 +679,7 @@ none: /* The name entry claims an index that is conflict * with a valid existing name entry, then try the * index in the lmv recursively. */ - rc = lfsck_record_lmv(env, com, lnr, lmv, index, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, index, LSLF_BAD_INDEX2, lslr->lslr_flags, depth); lmv->lmv_master_mdt_index = index; if (rc == -ERANGE || rc == -EEXIST) { @@ -700,7 +701,7 @@ none: case LSLF_NO_LMVEA: /* Remove the existing dangling name entry. * Refill the lslr slot with the given LMV. */ - rc = lfsck_replace_lmv(env, com, lslr, lnr, + rc = lfsck_replace_lmv(env, com, dir, lslr, lnr, lmv, index, flags); break; case LSLF_DANGLING: @@ -714,7 +715,7 @@ none: /* The name entry claims an index that is conflict * with a valid existing name entry, then try the * index in the lmv recursively. */ - rc = lfsck_record_lmv(env, com, lnr, lmv, index, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, index, LSLF_BAD_INDEX2, lslr->lslr_flags, depth); lmv->lmv_master_mdt_index = index; if (rc == -ERANGE || rc == -EEXIST) @@ -722,7 +723,7 @@ none: * also conflict with other, then remove * the existing dangling name entry. * Refill the lslr slot with the given LMV. */ - rc = lfsck_replace_lmv(env, com, lslr, lnr, + rc = lfsck_replace_lmv(env, com, dir, lslr, lnr, lmv, shard_idx, flags); break; @@ -747,7 +748,7 @@ none: /* The existing one has another possible slot, * try it recursively. */ - rc = lfsck_record_lmv(env, com, lnr, lmv, index, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, index, LSLF_BAD_INDEX2, flags, depth); *lmv = lrls->lrls_lmv; lnr->lnr_fid = lrls->lrls_fid; @@ -787,7 +788,7 @@ conflict: * mark the master MDT-object as read-only. The * administrator can handle the conflict with * more human knowledge. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); break; case LSLF_BAD_INDEX2: GOTO(out, rc = -EEXIST); @@ -803,7 +804,7 @@ conflict: /* The name entry claims an index that is conflict * with a valid existing name entry, then try the * index in the lmv recursively. */ - rc = lfsck_record_lmv(env, com, lnr, lmv, index, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, index, LSLF_BAD_INDEX2, lslr->lslr_flags, depth); lmv->lmv_master_mdt_index = index; if (rc == -ERANGE || rc == -EEXIST) @@ -812,7 +813,7 @@ conflict: * not know how to resolve the conflict. * We will handle it as handle the case * of 'LSLF_NONE' vs 'LSLF_NONE'. */ - rc = lfsck_remove_lmv(env, com, lnr); + rc = lfsck_remove_lmv(env, com, dir, lnr); break; } @@ -834,36 +835,92 @@ out: return rc > 0 ? 0 : rc; } -int lfsck_read_stripe_lmv(const struct lu_env *env, struct dt_object *obj, +/** + * Read LMV from bottom object, so it doesn't contain stripe FIDs. + * + * TODO: test migrating/foreign directory lfsck + * + * \param[in] env thread env + * \param[in] lfsck lfsck instance + * \param[in] obj dt object + * \param[out] lmv LMV data pointer + * + * \retval 0 on success + * \retval -ENODATA on no LMV, corrupt LMV, dir is dead or foreign + * -ev on other failures + */ +int lfsck_read_stripe_lmv(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *obj, struct lmv_mds_md_v1 *lmv) { - struct dt_object *bottom; - int rc; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_buf *buf = &info->lti_buf; + struct lmv_foreign_md *lfm; + int rc; + + /* use bottom object to avoid reading in shard FIDs */ + obj = lfsck_object_find_bottom(env, lfsck, lu_object_fid(&obj->do_lu)); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dt_read_lock(env, obj, 0); + buf->lb_buf = lmv; + buf->lb_len = sizeof(*lmv); + rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV); + if (unlikely(rc == -ERANGE)) { + buf = &info->lti_big_buf; + /* this may be a foreign LMV */ + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LMV); + if (rc > sizeof(*lmv)) { + int rc1; + + lu_buf_check_and_alloc(buf, rc); + rc1 = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV); + if (rc != rc1) + rc = -ENODATA; + } else { + rc = -ENODATA; + } + } + dt_read_unlock(env, obj); - /* Currently, we only store the LMV header on disk. It is the LOD's - * duty to iterate the master MDT-object's directory to compose the - * integrated LMV EA. But here, we only want to load the LMV header, - * so we need to bypass LOD to avoid unnecessary iteration in LOD. */ - bottom = lu2dt(container_of0(obj->do_lu.lo_header->loh_layers.prev, - struct lu_object, lo_linkage)); - if (unlikely(bottom == NULL)) - return -ENOENT; - - dt_read_lock(env, bottom, 0); - rc = dt_xattr_get(env, bottom, lfsck_buf_get(env, lmv, sizeof(*lmv)), - XATTR_NAME_LMV, BYPASS_CAPA); - dt_read_unlock(env, bottom); - if (rc != sizeof(*lmv)) - return rc > 0 ? -EINVAL : rc; - - lfsck_lmv_header_le_to_cpu(lmv, lmv); - if ((lmv->lmv_magic == LMV_MAGIC && - !(lmv->lmv_hash_type & LMV_HASH_FLAG_MIGRATION)) || - (lmv->lmv_magic == LMV_MAGIC_STRIPE && - !(lmv->lmv_hash_type & LMV_HASH_FLAG_DEAD))) - return 0; - - return -ENODATA; + lfsck_object_put(env, obj); + + if (rc > offsetof(typeof(*lfm), lfm_value) && + *((__u32 *)buf->lb_buf) == LMV_MAGIC_FOREIGN) { + __u32 value_len; + + lfm = buf->lb_buf; + value_len = le32_to_cpu(lfm->lfm_length); + CDEBUG(D_INFO, + "foreign LMV EA, magic %x, len %u, type %x, flags %x, for dir "DFID"\n", + le32_to_cpu(lfm->lfm_magic), value_len, + le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags), + PFID(lfsck_dto2fid(obj))); + + if (rc != value_len + offsetof(typeof(*lfm), lfm_value)) + CDEBUG(D_LFSCK, + "foreign LMV EA internal size %u does not match EA full size %d for dir "DFID"\n", + value_len, rc, PFID(lfsck_dto2fid(obj))); + + /* no further usage/decode of foreign LMV outside */ + return -ENODATA; + } + + if (rc == sizeof(*lmv)) { + rc = 0; + lfsck_lmv_header_le_to_cpu(lmv, lmv); + /* if LMV is corrupt, return -ENODATA */ + if (lmv->lmv_magic != LMV_MAGIC_V1 && + lmv->lmv_magic != LMV_MAGIC_STRIPE) + rc = -ENODATA; + } else if (rc >= 0) { + /* LMV is corrupt */ + rc = -ENODATA; + } + + return rc; } /** @@ -913,24 +970,30 @@ int lfsck_shard_name_to_index(const struct lu_env *env, const char *name, return idx; } +static inline bool lfsck_name_hash_match(struct lmv_mds_md_v1 *lmv, + const char *name, int namelen) +{ + int idx; + + idx = lmv_name_to_stripe_index_old(lmv, name, namelen); + if (idx == lmv->lmv_master_mdt_index) + return true; + + if (!lmv_hash_is_layout_changing(lmv->lmv_hash_type)) + return false; + + idx = lmv_name_to_stripe_index(lmv, name, namelen); + return (idx == lmv->lmv_master_mdt_index); +} + bool lfsck_is_valid_slave_name_entry(const struct lu_env *env, struct lfsck_lmv *llmv, const char *name, int namelen) { - struct lmv_mds_md_v1 *lmv; - int idx; - if (llmv == NULL || !llmv->ll_lmv_slave || !llmv->ll_lmv_verified) return true; - lmv = &llmv->ll_lmv; - idx = lmv_name_to_stripe_index(lmv->lmv_hash_type, - lmv->lmv_stripe_count, - name, namelen); - if (unlikely(idx != lmv->lmv_master_mdt_index)) - return false; - - return true; + return lfsck_name_hash_match(&llmv->ll_lmv, name, namelen); } /** @@ -952,15 +1015,15 @@ bool lfsck_is_valid_slave_name_entry(const struct lu_env *env, * \retval negative error number on failure */ int lfsck_namespace_check_name(const struct lu_env *env, + struct lfsck_instance *lfsck, struct dt_object *parent, struct dt_object *child, const struct lu_name *cname) { - struct lmv_mds_md_v1 *lmv = &lfsck_env_info(env)->lti_lmv; - int idx; - int rc; + struct lmv_mds_md_v1 *lmv = &lfsck_env_info(env)->lti_lmv; + int rc; - rc = lfsck_read_stripe_lmv(env, parent, lmv); + rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv); if (rc != 0) RETURN(rc == -ENODATA ? 0 : rc); @@ -968,11 +1031,8 @@ int lfsck_namespace_check_name(const struct lu_env *env, if (!lfsck_is_valid_slave_lmv(lmv)) return 0; - idx = lmv_name_to_stripe_index(lmv->lmv_hash_type, - lmv->lmv_stripe_count, - cname->ln_name, - cname->ln_namelen); - if (unlikely(idx != lmv->lmv_master_mdt_index)) + if (!lfsck_name_hash_match(lmv, cname->ln_name, + cname->ln_namelen)) return 1; } else if (lfsck_shard_name_to_index(env, cname->ln_name, cname->ln_namelen, lfsck_object_type(child), @@ -1005,7 +1065,7 @@ int lfsck_namespace_update_lmv(const struct lu_env *env, struct lmv_mds_md_v1 *lmv4 = &info->lti_lmv4; struct lu_buf *buf = &info->lti_buf; struct lfsck_instance *lfsck = com->lc_lfsck; - struct dt_device *dev = lfsck_obj2dt_dev(obj); + struct dt_device *dev = lfsck_obj2dev(obj); struct thandle *th = NULL; struct lustre_handle lh = { 0 }; int rc = 0; @@ -1049,7 +1109,7 @@ int lfsck_namespace_update_lmv(const struct lu_env *env, if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) GOTO(unlock, rc = 0); - rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LMV, 0, th, BYPASS_CAPA); + rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LMV, 0, th); GOTO(unlock, rc); @@ -1118,7 +1178,7 @@ static int lfsck_allow_regenerate_master_lmv(const struct lu_env *env, snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf), DFID":%u", PFID(cfid), cidx); rc = dt_lookup(env, obj, (struct dt_rec *)tfid, - (const struct dt_key *)info->lti_tmpbuf, BYPASS_CAPA); + (const struct dt_key *)info->lti_tmpbuf); if (rc != 0) RETURN(rc); @@ -1127,7 +1187,7 @@ static int lfsck_allow_regenerate_master_lmv(const struct lu_env *env, args = lfsck->li_args_dir & ~(LUDA_VERIFY | LUDA_VERIFY_DRYRUN); iops = &obj->do_index_ops->dio_it; - di = iops->init(env, obj, args, BYPASS_CAPA); + di = iops->init(env, obj, args); if (IS_ERR(di)) RETURN(PTR_ERR(di)); @@ -1197,7 +1257,7 @@ out: * \param[in] index the MDT index on which the LFSCK instance to be notified * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ static int lfsck_namespace_notify_lmv_remote(const struct lu_env *env, @@ -1233,7 +1293,7 @@ static int lfsck_namespace_notify_lmv_remote(const struct lu_env *env, lr = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); memset(lr, 0, sizeof(*lr)); lr->lr_event = event; - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_active = LFSCK_TYPE_NAMESPACE; lr->lr_fid = *fid; lr->lr_flags = flags; @@ -1264,7 +1324,7 @@ out: * \param[in] obj pointer to the striped directory to be rescanned * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env, @@ -1284,7 +1344,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env, if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) RETURN(0); - rc = lfsck_read_stripe_lmv(env, obj, lmv4); + rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv4); if (rc != 0) RETURN(rc); @@ -1299,7 +1359,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env, else count = lmv4->lmv_stripe_count; - OBD_ALLOC_LARGE(lslr, sizeof(struct lfsck_slave_lmv_rec) * count); + OBD_ALLOC_PTR_ARRAY_LARGE(lslr, count); if (lslr == NULL) { OBD_FREE_PTR(llu); @@ -1326,6 +1386,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env, lfsck_lmv_put(env, llmv); } else { ns->ln_striped_dirs_repaired++; + llmv->ll_counted = 1; spin_lock(&lfsck->li_lock); list_add_tail(&llu->llu_link, &lfsck->li_list_lmv); spin_unlock(&lfsck->li_lock); @@ -1352,19 +1413,19 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env, * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component - * \param[in] dir pointer to the object on which the LMV EA will be set + * \param[in] obj pointer to the object on which the LMV EA will be set * \param[in] lmv pointer to the buffer holding the new LMV EA * \param[in] cfid the shard's FID used for verification * \param[in] cidx the shard's index used for verification * \param[in] flags to indicate which element(s) in the LMV EA will be set * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ static int lfsck_namespace_set_lmv_master(const struct lu_env *env, struct lfsck_component *com, - struct dt_object *dir, + struct dt_object *obj, struct lmv_mds_md_v1 *lmv, const struct lu_fid *cfid, __u32 cidx, __u32 flags) @@ -1373,20 +1434,12 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env, struct lmv_mds_md_v1 *lmv3 = &info->lti_lmv3; struct lu_seq_range *range = &info->lti_range; struct lfsck_instance *lfsck = com->lc_lfsck; - struct seq_server_site *ss = - lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); - struct dt_object *obj; + struct seq_server_site *ss = lfsck_dev_site(lfsck); struct lustre_handle lh = { 0 }; int pidx = -1; int rc = 0; ENTRY; - /* Find the bottom object to bypass LOD when set LMV EA. */ - obj = lu2dt(container_of0(dir->do_lu.lo_header->loh_layers.prev, - struct lu_object, lo_linkage)); - if (unlikely(obj == NULL)) - RETURN(-ENOENT); - fld_range_set_mdt(range); rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(lfsck_dto2fid(obj)), range); @@ -1400,7 +1453,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env, if (rc != 0) GOTO(log, rc); - rc = lfsck_read_stripe_lmv(env, obj, lmv3); + rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv3); if (rc == -ENODATA) { if (!(flags & LEF_SET_LMV_ALL)) GOTO(log, rc); @@ -1418,6 +1471,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env, lmv3->lmv_magic = LMV_MAGIC; lmv3->lmv_master_mdt_index = pidx; + lmv3->lmv_layout_version++; if (flags & LEF_SET_LMV_ALL) { rc = lfsck_allow_regenerate_master_lmv(env, com, obj, @@ -1481,7 +1535,7 @@ log: * \param[in] name the name of the bad name hash * * \retval positive number if nothing to be done - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env, @@ -1499,7 +1553,7 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env, ENTRY; rc = dt_lookup(env, shard, (struct dt_rec *)pfid, - (const struct dt_key *)dotdot, BYPASS_CAPA); + (const struct dt_key *)dotdot); if (rc != 0 || !fid_is_sane(pfid)) GOTO(log, rc); @@ -1507,6 +1561,12 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env, if (IS_ERR(parent)) GOTO(log, rc = PTR_ERR(parent)); + if (unlikely(!dt_object_exists(parent))) + /* The parent object was previously accessed when verifying + * the slave LMV EA. If this condition is true it is because + * the striped directory is being removed. */ + GOTO(log, rc = 1); + *lmv2 = llmv->ll_lmv; lmv2->lmv_hash_type = LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE; rc = lfsck_namespace_set_lmv_master(env, com, parent, lmv2, @@ -1520,7 +1580,7 @@ log: CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found bad name hash " "on the MDT %x, parent "DFID", name %s, shard_%x "DFID ": rc = %d\n", - lfsck_lfsck2name(lfsck), lfsck_dev_idx(lfsck->li_bottom), + lfsck_lfsck2name(lfsck), lfsck_dev_idx(lfsck), PFID(pfid), name, llmv->ll_lmv.lmv_master_mdt_index, PFID(lfsck_dto2fid(shard)), rc); @@ -1571,7 +1631,7 @@ int lfsck_namespace_scan_shard(const struct lu_env *env, __u16 type; ENTRY; - rc = lfsck_read_stripe_lmv(env, child, lmv); + rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv); if (rc != 0) RETURN(rc == -ENODATA ? 1 : rc); @@ -1592,7 +1652,7 @@ int lfsck_namespace_scan_shard(const struct lu_env *env, args = lfsck->li_args_dir & ~(LUDA_VERIFY | LUDA_VERIFY_DRYRUN); iops = &child->do_index_ops->dio_it; - di = iops->init(env, child, args, BYPASS_CAPA); + di = iops->init(env, child, args); if (IS_ERR(di)) GOTO(out, rc = PTR_ERR(di)); @@ -1603,19 +1663,9 @@ int lfsck_namespace_scan_shard(const struct lu_env *env, rc = 0; while (rc == 0) { - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) && - cfs_fail_val > 0) { - struct l_wait_info lwi; - - lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - !thread_is_running(thread), - &lwi); - - if (unlikely(!thread_is_running(thread))) - GOTO(out, rc = 0); - } + if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) && + unlikely(!thread_is_running(thread))) + GOTO(out, rc = 0); rc = iops->rec(env, di, (struct dt_rec *)ent, args); if (rc == 0) @@ -1637,7 +1687,7 @@ int lfsck_namespace_scan_shard(const struct lu_env *env, ns->ln_flags |= LF_INCONSISTENT; rc = lfsck_namespace_repair_bad_name_hash(env, com, child, llmv, ent->lde_name); - if (rc >= 0) + if (rc == 0) ns->ln_name_hash_repaired++; } @@ -1683,7 +1733,8 @@ out: * \param[in] obj pointer to the object which LMV EA will be checked * \param[in] llmv pointer to buffer holding the slave LMV EA * - * \retval zero for succeed + * \retval positive number if nothing to be done + * \retval zero for success * \retval negative error number on failure */ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, @@ -1695,8 +1746,8 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, char *name = info->lti_key; char *name2; struct lu_fid *pfid = &info->lti_fid3; - struct lu_fid *tfid = &info->lti_fid4; const struct lu_fid *cfid = lfsck_dto2fid(obj); + struct lu_fid tfid; struct lfsck_instance *lfsck = com->lc_lfsck; struct lmv_mds_md_v1 *clmv = &llmv->ll_lmv; struct lmv_mds_md_v1 *plmv = &info->lti_lmv; @@ -1712,7 +1763,7 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, } rc = dt_lookup(env, obj, (struct dt_rec *)pfid, - (const struct dt_key *)dotdot, BYPASS_CAPA); + (const struct dt_key *)dotdot); if (rc != 0 || !fid_is_sane(pfid)) { rc = lfsck_namespace_trace_update(env, com, cfid, LNTF_UNCERTAIN_LMV, true); @@ -1720,7 +1771,9 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, GOTO(out, rc); } - parent = lfsck_object_find(env, lfsck, pfid); + CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ENGINE_DELAY, cfs_fail_val); + + parent = lfsck_object_find_bottom(env, lfsck, pfid); if (IS_ERR(parent)) { rc = lfsck_namespace_trace_update(env, com, cfid, LNTF_UNCERTAIN_LMV, true); @@ -1728,7 +1781,13 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, GOTO(out, rc); } - rc = lfsck_read_stripe_lmv(env, parent, plmv); + if (unlikely(!dt_object_exists(parent))) + GOTO(out, rc = 1); + + if (unlikely(!dt_try_as_dir(env, parent))) + GOTO(out, rc = -ENOTDIR); + + rc = lfsck_read_stripe_lmv(env, lfsck, parent, plmv); if (rc != 0) { int rc1; @@ -1777,16 +1836,16 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env, PFID(cfid), clmv->lmv_master_mdt_index); name2 = info->lti_tmpbuf2; - rc = lfsck_links_get_first(env, obj, name, tfid); - if (rc == 0 && strcmp(name, name2) == 0 && lu_fid_eq(pfid, tfid)) { + rc = lfsck_links_get_first(env, obj, name, &tfid); + if (rc == 0 && strcmp(name, name2) == 0 && lu_fid_eq(pfid, &tfid)) { llmv->ll_lmv_verified = 1; GOTO(out, rc); } - rc = dt_lookup(env, parent, (struct dt_rec *)tfid, - (const struct dt_key *)name2, BYPASS_CAPA); - if (rc != 0 || !lu_fid_eq(cfid, tfid)) + rc = dt_lookup(env, parent, (struct dt_rec *)&tfid, + (const struct dt_key *)name2); + if (rc != 0 || !lu_fid_eq(cfid, &tfid)) rc = lfsck_namespace_trace_update(env, com, cfid, LNTF_UNCERTAIN_LMV, true); else @@ -1822,7 +1881,7 @@ out: * \param[in] lnr pointer to the namespace request that contains the * striped directory or the shard * - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, @@ -1835,11 +1894,12 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, struct lfsck_lmv *llmv = lnr->lnr_lmv; struct lmv_mds_md_v1 *lmv = &llmv->ll_lmv; struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2; - struct dt_object *dir = lnr->lnr_obj; - const struct lu_fid *pfid = lfsck_dto2fid(dir); + struct lfsck_assistant_object *lso = lnr->lnr_lar.lar_parent; + const struct lu_fid *pfid = &lso->lso_fid; + struct dt_object *dir = NULL; + struct dt_object *obj = NULL; struct lu_seq_range *range = &info->lti_range; - struct seq_server_site *ss = - lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + struct seq_server_site *ss = lfsck_dev_site(lfsck); __u32 stripe_count; __u32 hash_type; int rc = 0; @@ -1849,8 +1909,7 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, if (llmv->ll_lmv_slave) { if (llmv->ll_lmv_verified) { ns->ln_striped_shards_scanned++; - lfsck_namespace_trace_update(env, com, - lfsck_dto2fid(dir), + lfsck_namespace_trace_update(env, com, pfid, LNTF_UNCERTAIN_LMV | LNTF_RECHECK_NAME_HASH, false); } @@ -1899,23 +1958,31 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, } if (llmv->ll_lmv_updated) { + if (dir == NULL) { + dir = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + + RETURN(rc == -ENOENT ? 0 : rc); + } + } + lmv->lmv_layout_version++; rc = lfsck_namespace_update_lmv(env, com, dir, lmv, false); if (rc != 0) RETURN(rc); ns->ln_striped_dirs_scanned++; - ns->ln_striped_dirs_repaired++; + if (!llmv->ll_counted) + ns->ln_striped_dirs_repaired++; } fld_range_set_mdt(range); for (i = 0; i <= llmv->ll_max_filled_off; i++) { - struct dt_object *obj = NULL; struct lfsck_slave_lmv_rec *lslr = llmv->ll_lslr + i; const struct lu_fid *cfid = &lslr->lslr_fid; const struct lu_name *cname; - struct linkea_data ldata = { 0 }; - int len; + struct linkea_data ldata = { NULL }; int rc1 = 0; bool repair_linkea = false; bool repair_lmvea = false; @@ -1930,13 +1997,27 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, if (fid_is_zero(cfid)) continue; - len = snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf), - DFID":%u", PFID(cfid), i); - cname = lfsck_name_get_const(env, info->lti_tmpbuf, len); - memcpy(lnr->lnr_name, info->lti_tmpbuf, len); - + lnr->lnr_fid = *cfid; + lnr->lnr_namelen = snprintf(lnr->lnr_name, + lnr->lnr_size - sizeof(*lnr), + DFID":%u", PFID(cfid), i); + cname = lfsck_name_get_const(env, lnr->lnr_name, + lnr->lnr_namelen); obj = lfsck_object_find_bottom(env, lfsck, cfid); if (IS_ERR(obj)) { + if (dir == NULL) { + dir = lfsck_assistant_object_load(env, lfsck, + lso); + if (IS_ERR(dir)) { + if (PTR_ERR(dir) == -ENOENT) + RETURN(0); + + dir = NULL; + } + } else if (lfsck_is_dead_obj(dir)) { + GOTO(out, rc = 0); + } + rc1 = PTR_ERR(obj); goto next; } @@ -1974,7 +2055,7 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, break; } - rc1 = lfsck_links_read(env, obj, &ldata); + rc1 = lfsck_links_read_with_rec(env, obj, &ldata); if (rc1 == -ENOENT) { create = true; goto repair; @@ -1999,7 +2080,21 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, repair: if (create) { - rc1 = lfsck_namespace_repair_dangling(env, com, + if (dir == NULL) { + dir = lfsck_assistant_object_load(env, lfsck, + lso); + if (IS_ERR(dir)) { + rc1 = PTR_ERR(dir); + + if (rc1 == -ENOENT) + GOTO(out, rc = 0); + + dir = NULL; + goto next; + } + } + + rc1 = lfsck_namespace_repair_dangling(env, com, dir, obj, lnr); if (rc1 >= 0) { create_repaired = true; @@ -2064,6 +2159,20 @@ repair: } if (rename) { + if (dir == NULL) { + dir = lfsck_assistant_object_load(env, lfsck, + lso); + if (IS_ERR(dir)) { + rc1 = PTR_ERR(dir); + + if (rc1 == -ENOENT) + GOTO(out, rc = 0); + + dir = NULL; + goto next; + } + } + rc1 = lfsck_namespace_repair_dirent(env, com, dir, obj, info->lti_tmpbuf2, lnr->lnr_name, lnr->lnr_type, true, false); @@ -2084,23 +2193,34 @@ repair: if (repair_linkea) { struct lustre_handle lh = { 0 }; - rc1 = linkea_data_new(&ldata, &info->lti_big_buf); - if (rc1 != 0) - goto next; + if (dir == NULL) { + dir = lfsck_assistant_object_load(env, lfsck, + lso); + if (IS_ERR(dir)) { + rc1 = PTR_ERR(dir); + + if (rc1 == -ENOENT) + GOTO(out, rc = 0); - rc1 = linkea_add_buf(&ldata, cname, lfsck_dto2fid(dir)); + dir = NULL; + goto next; + } + } + + rc1 = linkea_links_new(&ldata, &info->lti_big_buf, + cname, lfsck_dto2fid(dir)); if (rc1 != 0) goto next; rc1 = lfsck_ibits_lock(env, lfsck, obj, &lh, MDS_INODELOCK_UPDATE | MDS_INODELOCK_XATTR, LCK_EX); - lfsck_ibits_unlock(&lh, LCK_EX); if (rc1 != 0) goto next; rc1 = lfsck_namespace_rebuild_linkea(env, com, obj, &ldata); + lfsck_ibits_unlock(&lh, LCK_EX); if (rc1 >= 0) { linkea_repaired = true; if (rc1 > 0) @@ -2109,20 +2229,27 @@ repair: } next: - CDEBUG(D_LFSCK, "%s: namespace LFSCK repair the shard " - "%d "DFID" of the striped directory "DFID" with " - "dangling %s/%s, rename %s/%s, llinkea %s/%s, " - "repair_lmvea %s/%s: rc = %d\n", lfsck_lfsck2name(lfsck), - i, PFID(cfid), PFID(&lnr->lnr_fid), - create ? "yes" : "no", create_repaired ? "yes" : "no", - rename ? "yes" : "no", rename_repaired ? "yes" : "no", - repair_linkea ? "yes" : "no", - linkea_repaired ? "yes" : "no", - repair_lmvea ? "yes" : "no", - lmvea_repaired ? "yes" : "no", rc1); - - if (obj != NULL && !IS_ERR(obj)) + if (create || rename || repair_linkea || repair_lmvea) { + CDEBUG(D_LFSCK, "%s: namespace LFSCK repair the shard " + "%d "DFID" of the striped directory "DFID" with " + "dangling %s/%s, rename %s/%s, llinkea %s/%s, " + "repair_lmvea %s/%s: rc = %d\n", + lfsck_lfsck2name(lfsck), + i, PFID(cfid), PFID(pfid), + create ? "yes" : "no", + create_repaired ? "yes" : "no", + rename ? "yes" : "no", + rename_repaired ? "yes" : "no", + repair_linkea ? "yes" : "no", + linkea_repaired ? "yes" : "no", + repair_lmvea ? "yes" : "no", + lmvea_repaired ? "yes" : "no", rc1); + } + + if (obj != NULL && !IS_ERR(obj)) { lfsck_object_put(env, obj); + obj = NULL; + } if (rc1 < 0) { rc = rc1; @@ -2130,7 +2257,16 @@ next: } } - RETURN(rc); + GOTO(out, rc); + +out: + if (obj != NULL && !IS_ERR(obj)) + lfsck_object_put(env, obj); + + if (dir != NULL && !IS_ERR(dir)) + lfsck_object_put(env, dir); + + return rc; } /** @@ -2179,7 +2315,7 @@ next: * \param[in] lnr pointer to the namespace request that contains the * shard's name, parent object, parent's LMV, and ect. * - * \retval zero for succeed + * \retval zero for success * \retval negative error number on failure */ int lfsck_namespace_handle_striped_master(const struct lu_env *env, @@ -2191,8 +2327,9 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env, struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_lmv *llmv = lnr->lnr_lmv; - struct dt_object *dir = lnr->lnr_obj; - const struct lu_fid *pfid = lfsck_dto2fid(dir); + struct lfsck_assistant_object *lso = lnr->lnr_lar.lar_parent; + const struct lu_fid *pfid = &lso->lso_fid; + struct dt_object *dir; struct dt_object *obj = NULL; struct dt_device *dev = NULL; int shard_idx = 0; @@ -2206,15 +2343,22 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env, if (unlikely(llmv->ll_ignore)) RETURN(0); + dir = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + + RETURN(rc == -ENOENT ? 0 : rc); + } + shard_idx = lfsck_find_mdt_idx_by_fid(env, lfsck, &lnr->lnr_fid); if (shard_idx < 0) GOTO(fail_lmv, rc = shard_idx); - if (shard_idx == lfsck_dev_idx(lfsck->li_bottom)) { + if (shard_idx == lfsck_dev_idx(lfsck)) { if (unlikely(strcmp(lnr->lnr_name, dotdot) == 0)) GOTO(out, rc = 0); - dev = lfsck->li_next; + dev = lfsck->li_bottom; } else { struct lfsck_tgt_desc *ltd; @@ -2228,7 +2372,7 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env, GOTO(out, rc); } - ltd = LTD_TGT(&lfsck->li_mdt_descs, shard_idx); + ltd = lfsck_ltd2tgt(&lfsck->li_mdt_descs, shard_idx); if (unlikely(ltd == NULL)) { CDEBUG(D_LFSCK, "%s: cannot talk with MDT %x which " "did not join the namespace LFSCK\n", @@ -2242,8 +2386,12 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env, } obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + if (lfsck_is_dead_obj(dir)) + RETURN(0); + GOTO(fail_lmv, rc = PTR_ERR(obj)); + } if (!dt_object_exists(obj)) { stripe = lfsck_shard_name_to_index(env, lnr->lnr_name, @@ -2259,7 +2407,7 @@ dangling: if (rc == 0) { memset(lmv, 0, sizeof(*lmv)); lmv->lmv_magic = LMV_MAGIC; - rc = lfsck_record_lmv(env, com, lnr, lmv, stripe, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, stripe, LSLF_DANGLING, LSLF_NONE, &depth); } @@ -2275,17 +2423,17 @@ dangling: GOTO(out, rc = 0); } - rc = lfsck_read_stripe_lmv(env, obj, lmv); + rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv); if (unlikely(rc == -ENOENT)) /* It may happen when the remote object has been removed, * but the local MDT does not aware of that. */ goto dangling; if (rc == -ENODATA) - rc = lfsck_record_lmv(env, com, lnr, lmv, stripe, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, stripe, LSLF_NO_LMVEA, LSLF_NONE, &depth); else if (rc == 0) - rc = lfsck_record_lmv(env, com, lnr, lmv, stripe, + rc = lfsck_record_lmv(env, com, dir, lnr, lmv, stripe, lmv->lmv_master_mdt_index != stripe ? LSLF_BAD_INDEX1 : LSLF_NONE, LSLF_NONE, &depth); @@ -2322,13 +2470,12 @@ out: CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail to handle " "the shard: "DFID", parent "DFID", name %.*s: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(&lnr->lnr_fid), - PFID(lfsck_dto2fid(lnr->lnr_obj)), - lnr->lnr_namelen, lnr->lnr_name, rc); + PFID(pfid), lnr->lnr_namelen, lnr->lnr_name, rc); if ((rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -EREMCHG || rc == -ETIMEDOUT || rc == -EHOSTDOWN || rc == -EHOSTUNREACH || rc == -EINPROGRESS) && - dev != NULL && dev != lfsck->li_next) + dev != NULL && dev != lfsck->li_bottom) lfsck_lad_set_bitmap(env, com, shard_idx); if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) @@ -2355,5 +2502,7 @@ out: if (obj != NULL && !IS_ERR(obj)) lfsck_object_put(env, obj); + lfsck_object_put(env, dir); + return rc; }