X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_namespace.c;h=5a5eb48a224403aef2b536bfae063c2efc9fc0e5;hp=3f9d3d78a4620c6bb4097a0d89e247cc2099c82e;hb=85be1fae82b515094b60bb20eb48f88989ccc6e9;hpb=9ff2d957982160103b5d885c9a532ad45bdf8d4d diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index 3f9d3d7..5a5eb48 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2013, Intel Corporation. + * Copyright (c) 2013, 2014, Intel Corporation. */ /* * lustre/lfsck/lfsck_namespace.c @@ -64,8 +64,8 @@ lfsck_namespace_assistant_req_init(struct lfsck_instance *lfsck, return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&lnr->lnr_lar.lar_list); - lu_object_get(&lfsck->li_obj_dir->do_lu); - lnr->lnr_obj = lfsck->li_obj_dir; + lnr->lnr_obj = lfsck_object_get(lfsck->li_obj_dir); + lnr->lnr_lmv = lfsck_lmv_get(lfsck->li_lmv); lnr->lnr_fid = ent->lde_fid; lnr->lnr_oit_cookie = lfsck->li_pos_current.lp_oit_cookie; lnr->lnr_dir_cookie = ent->lde_hash; @@ -84,6 +84,9 @@ static void lfsck_namespace_assistant_req_fini(const struct lu_env *env, struct lfsck_namespace_req *lnr = container_of0(lar, struct lfsck_namespace_req, lnr_lar); + if (lnr->lnr_lmv != NULL) + lfsck_lmv_put(env, lnr->lnr_lmv); + lu_object_put(env, &lnr->lnr_obj->do_lu); OBD_FREE(lnr, lnr->lnr_size); } @@ -131,6 +134,29 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst, dst->ln_bad_type_repaired = le64_to_cpu(src->ln_bad_type_repaired); dst->ln_lost_dirent_repaired = le64_to_cpu(src->ln_lost_dirent_repaired); + dst->ln_striped_dirs_scanned = + le64_to_cpu(src->ln_striped_dirs_scanned); + dst->ln_striped_dirs_repaired = + le64_to_cpu(src->ln_striped_dirs_repaired); + dst->ln_striped_dirs_failed = + le64_to_cpu(src->ln_striped_dirs_failed); + dst->ln_striped_dirs_disabled = + le64_to_cpu(src->ln_striped_dirs_disabled); + dst->ln_striped_dirs_skipped = + le64_to_cpu(src->ln_striped_dirs_skipped); + dst->ln_striped_shards_scanned = + le64_to_cpu(src->ln_striped_shards_scanned); + dst->ln_striped_shards_repaired = + le64_to_cpu(src->ln_striped_shards_repaired); + dst->ln_striped_shards_failed = + le64_to_cpu(src->ln_striped_shards_failed); + dst->ln_striped_shards_skipped = + le64_to_cpu(src->ln_striped_shards_skipped); + dst->ln_name_hash_repaired = le64_to_cpu(src->ln_name_hash_repaired); + dst->ln_local_lpf_scanned = le64_to_cpu(src->ln_local_lpf_scanned); + dst->ln_local_lpf_moved = le64_to_cpu(src->ln_local_lpf_moved); + dst->ln_local_lpf_skipped = le64_to_cpu(src->ln_local_lpf_skipped); + dst->ln_local_lpf_failed = le64_to_cpu(src->ln_local_lpf_failed); dst->ln_bitmap_size = le32_to_cpu(src->ln_bitmap_size); } @@ -177,6 +203,29 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, dst->ln_bad_type_repaired = cpu_to_le64(src->ln_bad_type_repaired); dst->ln_lost_dirent_repaired = cpu_to_le64(src->ln_lost_dirent_repaired); + dst->ln_striped_dirs_scanned = + cpu_to_le64(src->ln_striped_dirs_scanned); + dst->ln_striped_dirs_repaired = + cpu_to_le64(src->ln_striped_dirs_repaired); + dst->ln_striped_dirs_failed = + cpu_to_le64(src->ln_striped_dirs_failed); + dst->ln_striped_dirs_disabled = + cpu_to_le64(src->ln_striped_dirs_disabled); + dst->ln_striped_dirs_skipped = + cpu_to_le64(src->ln_striped_dirs_skipped); + dst->ln_striped_shards_scanned = + cpu_to_le64(src->ln_striped_shards_scanned); + dst->ln_striped_shards_repaired = + cpu_to_le64(src->ln_striped_shards_repaired); + dst->ln_striped_shards_failed = + cpu_to_le64(src->ln_striped_shards_failed); + dst->ln_striped_shards_skipped = + cpu_to_le64(src->ln_striped_shards_skipped); + dst->ln_name_hash_repaired = cpu_to_le64(src->ln_name_hash_repaired); + dst->ln_local_lpf_scanned = cpu_to_le64(src->ln_local_lpf_scanned); + dst->ln_local_lpf_moved = cpu_to_le64(src->ln_local_lpf_moved); + dst->ln_local_lpf_skipped = cpu_to_le64(src->ln_local_lpf_skipped); + dst->ln_local_lpf_failed = cpu_to_le64(src->ln_local_lpf_failed); dst->ln_bitmap_size = cpu_to_le32(src->ln_bitmap_size); } @@ -202,7 +251,7 @@ static void lfsck_namespace_record_failure(const struct lu_env *env, } /** - * Load the MDT bitmap from the lfsck_namespace tracing file. + * Load the MDT bitmap from the lfsck_namespace trace file. * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component @@ -270,9 +319,19 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, } /** - * \retval +ve: the lfsck_namespace is broken, the caller should reset it. - * \retval 0: succeed. - * \retval -ve: failed cases. + * Load namespace LFSCK statistics information from the trace file. + * + * For old release (Lustre-2.6 or older), the statistics information was + * stored as XATTR_NAME_LFSCK_NAMESPACE_OLD EA. But in Lustre-2.7, we need + * more statistics information. To avoid confusing old MDT when downgrade, + * Lustre-2.7 stores the namespace LFSCK statistics information as new + * XATTR_NAME_LFSCK_NAMESPACE EA. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * + * \retval 0 for success + * \retval negative error number on failure */ static int lfsck_namespace_load(const struct lu_env *env, struct lfsck_component *com) @@ -292,7 +351,7 @@ static int lfsck_namespace_load(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: invalid lfsck_namespace magic " "%#x != %#x\n", lfsck_lfsck2name(com->lc_lfsck), ns->ln_magic, LFSCK_NAMESPACE_MAGIC); - rc = 1; + rc = -ESTALE; } else { rc = 0; } @@ -301,13 +360,22 @@ static int lfsck_namespace_load(const struct lu_env *env, "expected = %d: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), len, rc); if (rc >= 0) - rc = 1; + rc = -ESTALE; + } else { + /* Check whether it is old trace file or not. + * If yes, it should be reset via returning -ESTALE. */ + rc = dt_xattr_get(env, com->lc_obj, + lfsck_buf_get(env, com->lc_file_disk, len), + XATTR_NAME_LFSCK_NAMESPACE_OLD, BYPASS_CAPA); + if (rc >= 0) + rc = -ESTALE; } + return rc; } static int lfsck_namespace_store(const struct lu_env *env, - struct lfsck_component *com) + struct lfsck_component *com, bool init) { struct dt_object *obj = com->lc_obj; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -318,6 +386,9 @@ static int lfsck_namespace_store(const struct lu_env *env, __u32 nbits = 0; int len = com->lc_file_size; int rc; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) + struct lu_buf tbuf = { &len, sizeof(len) }; +#endif ENTRY; if (lad != NULL) { @@ -349,6 +420,20 @@ static int lfsck_namespace_store(const struct lu_env *env, GOTO(out, rc); } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) + /* To be compatible with old Lustre-2.x MDT (x <= 6), generate dummy + * XATTR_NAME_LFSCK_NAMESPACE_OLD EA, then when downgrade to Lustre-2.x, + * the old LFSCK will find "invalid" XATTR_NAME_LFSCK_NAMESPACE_OLD EA, + * then reset the namespace LFSCK trace file. */ + if (init) { + rc = dt_declare_xattr_set(env, obj, &tbuf, + XATTR_NAME_LFSCK_NAMESPACE_OLD, + LU_XATTR_CREATE, handle); + if (rc != 0) + GOTO(out, rc); + } +#endif + rc = dt_trans_start_local(env, lfsck->li_bottom, handle); if (rc != 0) GOTO(out, rc); @@ -362,6 +447,13 @@ static int lfsck_namespace_store(const struct lu_env *env, XATTR_NAME_LFSCK_BITMAP, 0, handle, BYPASS_CAPA); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) + if (rc == 0 && init) + rc = dt_xattr_set(env, obj, &tbuf, + XATTR_NAME_LFSCK_NAMESPACE_OLD, + LU_XATTR_CREATE, handle, BYPASS_CAPA); +#endif + GOTO(out, rc); out: @@ -384,18 +476,18 @@ static int lfsck_namespace_init(const struct lu_env *env, ns->ln_magic = LFSCK_NAMESPACE_MAGIC; ns->ln_status = LS_INIT; down_write(&com->lc_sem); - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, true); up_write(&com->lc_sem); return rc; } /** - * Update the namespace LFSCK tracing file for the given @fid + * Update the namespace LFSCK trace file for the given @fid * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component * \param[in] fid the fid which flags to be updated in the lfsck - * tracing file + * trace file * \param[in] add true if add new flags, otherwise remove flags * * \retval 0 for succeed or nothing to be done @@ -488,7 +580,7 @@ log: dt_trans_stop(env, dev, th); CDEBUG(D_LFSCK, "%s: namespace LFSCK %s flags for "DFID" in the " - "tracing file, flags %x, old %x, new %x: rc = %d\n", + "trace file, flags %x, old %x, new %x: rc = %d\n", lfsck_lfsck2name(lfsck), add ? "add" : "del", PFID(fid), (__u32)flags, (__u32)old, (__u32)new, rc); @@ -498,9 +590,9 @@ unlock: return rc; } -static int lfsck_namespace_check_exist(const struct lu_env *env, - struct dt_object *dir, - struct dt_object *obj, const char *name) +int lfsck_namespace_check_exist(const struct lu_env *env, + struct dt_object *dir, + struct dt_object *obj, const char *name) { struct lu_fid *fid = &lfsck_env_info(env)->lti_fid; int rc; @@ -720,6 +812,8 @@ static int lfsck_namespace_filter_linkea_entry(struct linkea_data *ldata, * type "O": The MDT-object has no linkEA, and there is no name * entry that references the MDT-object. * + * type "S": The orphan MDT-object is a shard of a striped directory + * * \see lfsck_layout_recreate_parent() for more types. * * The orphan name will be like: @@ -742,6 +836,7 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env, struct lu_name *cname = &info->lti_name; struct dt_insert_rec *rec = &info->lti_dt_rec; struct lu_fid *tfid = &info->lti_fid5; + struct lu_attr *la = &info->lti_la3; const struct lu_fid *cfid = lfsck_dto2fid(orphan); const struct lu_fid *pfid; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -845,6 +940,13 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env, } } + memset(la, 0, sizeof(*la)); + la->la_ctime = cfs_time_current_sec(); + la->la_valid = LA_CTIME; + rc = dt_declare_attr_set(env, orphan, la, th); + if (rc != 0) + GOTO(stop, rc); + rc = dt_trans_start_local(env, dev, th); if (rc != 0) GOTO(stop, rc); @@ -895,6 +997,9 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env, } } + if (rc == 0) + rc = dt_attr_set(env, orphan, la, th, BYPASS_CAPA); + GOTO(stop, rc = (rc == 0 ? 1 : rc)); unlock: @@ -994,6 +1099,10 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + rc = dt_declare_attr_set(env, child, la, th); + if (rc != 0) + GOTO(stop, rc); + rc = dt_trans_start_local(env, dev, th); if (rc != 0) GOTO(stop, rc); @@ -1013,6 +1122,10 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, la->la_ctime = cfs_time_current_sec(); rc = dt_attr_set(env, parent, la, th, BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_attr_set(env, child, la, th, BYPASS_CAPA); GOTO(stop, rc = (rc == 0 ? 1 : rc)); @@ -1059,6 +1172,8 @@ log: * * \see lfsck_layout_recreate_parent() for more types. * + * \param[in] lmv pointer to master LMV EA that will be set to the orphan + * * \retval positive number for repaired cases * \retval 0 if needs to repair nothing * \retval negative error number on failure @@ -1066,7 +1181,8 @@ log: static int lfsck_namespace_create_orphan_remote(const struct lu_env *env, struct lfsck_component *com, struct dt_object *orphan, - __u32 type) + __u32 type, + struct lmv_mds_md_v1 *lmv) { struct lfsck_thread_info *info = lfsck_env_info(env); struct lfsck_request *lr = &info->lti_lr; @@ -1115,15 +1231,25 @@ static int lfsck_namespace_create_orphan_remote(const struct lu_env *env, lr->lr_active = LFSCK_TYPE_NAMESPACE; lr->lr_fid = *fid; lr->lr_type = type; + if (lmv != NULL) { + lr->lr_hash_type = lmv->lmv_hash_type; + lr->lr_stripe_count = lmv->lmv_stripe_count; + lr->lr_layout_version = lmv->lmv_layout_version; + memcpy(lr->lr_pool_name, lmv->lmv_pool_name, + sizeof(lr->lr_pool_name)); + } ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); ptlrpc_req_finished(req); - if (rc == 0) + if (rc == 0) { + orphan->do_lu.lo_header->loh_attr |= LOHA_EXISTS; rc = 1; - else if (rc == -EEXIST) + } else if (rc == -EEXIST) { + orphan->do_lu.lo_header->loh_attr |= LOHA_EXISTS; rc = 0; + } GOTO(out, rc); @@ -1158,13 +1284,16 @@ out: * * \see lfsck_layout_recreate_parent() for more types. * + * \param[in] lmv pointer to master LMV EA that will be set to the orphan + * * \retval positive number for repaired cases * \retval negative error number on failure */ static int lfsck_namespace_create_orphan_local(const struct lu_env *env, struct lfsck_component *com, struct dt_object *orphan, - __u32 type) + __u32 type, + struct lmv_mds_md_v1 *lmv) { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_attr *la = &info->lti_la; @@ -1173,6 +1302,7 @@ static int lfsck_namespace_create_orphan_local(const struct lu_env *env, struct lu_name *cname = &info->lti_name2; struct dt_insert_rec *rec = &info->lti_dt_rec; struct lu_fid *tfid = &info->lti_fid; + struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2; const struct lu_fid *cfid = lfsck_dto2fid(orphan); const struct lu_fid *pfid; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -1183,6 +1313,7 @@ static int lfsck_namespace_create_orphan_local(const struct lu_env *env, struct lustre_handle lh = { 0 }; struct linkea_data ldata = { 0 }; struct lu_buf linkea_buf; + struct lu_buf lmv_buf; char name[32]; int namelen; int idx = 0; @@ -1259,6 +1390,17 @@ static int lfsck_namespace_create_orphan_local(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + if (lmv != NULL) { + lmv->lmv_magic = LMV_MAGIC; + lmv->lmv_master_mdt_index = lfsck_dev_idx(dev); + lfsck_lmv_header_cpu_to_le(lmv2, lmv); + lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2)); + rc = dt_declare_xattr_set(env, child, &lmv_buf, + XATTR_NAME_LMV, 0, th); + if (rc != 0) + GOTO(stop, rc); + } + lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, ldata.ld_leh->leh_len); rc = dt_declare_xattr_set(env, child, &linkea_buf, @@ -1308,6 +1450,13 @@ static int lfsck_namespace_create_orphan_local(const struct lu_env *env, GOTO(unlock2, rc); } + if (lmv != NULL) { + rc = dt_xattr_set(env, child, &lmv_buf, XATTR_NAME_LMV, 0, + th, BYPASS_CAPA); + if (rc != 0) + GOTO(unlock2, rc); + } + rc = dt_xattr_set(env, child, &linkea_buf, XATTR_NAME_LINK, 0, th, BYPASS_CAPA); dt_write_unlock(env, child); @@ -1365,23 +1514,26 @@ log: * * \see lfsck_layout_recreate_parent() for more types. * + * \param[in] lmv pointer to master LMV EA that will be set to the orphan + * * \retval positive number for repaired cases * \retval 0 if needs to repair nothing * \retval negative error number on failure */ static int lfsck_namespace_create_orphan(const struct lu_env *env, struct lfsck_component *com, - struct dt_object *orphan) + struct dt_object *orphan, + struct lmv_mds_md_v1 *lmv) { struct lfsck_namespace *ns = com->lc_file_ram; int rc; if (dt_object_remote(orphan)) rc = lfsck_namespace_create_orphan_remote(env, com, orphan, - S_IFDIR); + S_IFDIR, lmv); else rc = lfsck_namespace_create_orphan_local(env, com, orphan, - S_IFDIR); + S_IFDIR, lmv); if (rc != 0) ns->ln_flags |= LF_INCONSISTENT; @@ -1821,8 +1973,6 @@ int lfsck_namespace_rebuild_linkea(const struct lu_env *env, int rc = 0; ENTRY; - LASSERT(!dt_object_remote(obj)); - th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(log, rc = PTR_ERR(th)); @@ -1992,7 +2142,7 @@ stop: dt_trans_stop(env, dev, th); /* We are not sure whether the child will become orphan or not. - * Record it in the LFSCK tracing file for further checking in + * Record it in the LFSCK trace file for further checking in * the second-stage scanning. */ if (!update && !dec && rc == 0) lfsck_namespace_trace_update(env, com, cfid, @@ -2235,6 +2385,7 @@ lfsck_namespace_dsd_single(const struct lu_env *env, struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_object *parent = NULL; + struct lmv_mds_md_v1 *lmv; int rc = 0; ENTRY; @@ -2279,8 +2430,33 @@ lfsck_namespace_dsd_single(const struct lu_env *env, lfsck_ibits_unlock(lh, LCK_EX); lost_parent: + lmv = &info->lti_lmv; + rc = lfsck_read_stripe_lmv(env, child, lmv); + if (rc != 0 && rc != -ENODATA) + GOTO(out, rc); + + if (rc == -ENODATA || lmv->lmv_magic != LMV_MAGIC_STRIPE) { + lmv = NULL; + } else if (lfsck_shard_name_to_index(env, + cname->ln_name, cname->ln_namelen, + S_IFDIR, cfid) < 0) { + /* It is an invalid name entry, we + * cannot trust the parent also. */ + rc = lfsck_namespace_shrink_linkea(env, com, child, + ldata, cname, tfid, true); + if (rc < 0) + GOTO(out, rc); + + snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf), + "-"DFID, PFID(pfid)); + rc = lfsck_namespace_insert_orphan(env, com, child, + info->lti_tmpbuf, "S", NULL); + + GOTO(out, rc); + } + /* Create the lost parent as an orphan. */ - rc = lfsck_namespace_create_orphan(env, com, parent); + rc = lfsck_namespace_create_orphan(env, com, parent, lmv); if (rc >= 0) { /* Add the missing name entry to the parent. */ rc = lfsck_namespace_insert_normal(env, com, parent, @@ -2342,6 +2518,28 @@ lost_parent: } lfsck_ibits_unlock(lh, LCK_EX); + rc = lfsck_namespace_check_name(env, parent, child, cname); + if (rc == -ENOENT) + goto lost_parent; + + if (rc < 0) + GOTO(out, rc); + + /* It is an invalid name entry, drop it. */ + if (unlikely(rc > 0)) { + rc = lfsck_namespace_shrink_linkea(env, com, child, + ldata, cname, tfid, true); + if (rc >= 0) { + snprintf(info->lti_tmpbuf, + sizeof(info->lti_tmpbuf), + "-"DFID, PFID(pfid)); + rc = lfsck_namespace_insert_orphan(env, com, + child, info->lti_tmpbuf, "D", NULL); + } + + GOTO(out, rc); + } + /* Add the missing name entry back to the namespace. */ rc = lfsck_namespace_insert_normal(env, com, parent, child, cname->ln_name); @@ -2398,6 +2596,9 @@ lost_parent: GOTO(out, rc); } + if (fid_is_zero(pfid)) + GOTO(out, rc = 0); + /* The ".." name entry is wrong, update it. */ if (!lu_fid_eq(pfid, lfsck_dto2fid(parent))) { if (!lustre_handle_is_used(lh) && retry != NULL) { @@ -2463,7 +2664,8 @@ lfsck_namespace_dsd_multiple(const struct lu_env *env, struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct dt_object *parent = NULL; struct linkea_data ldata_new = { 0 }; - int count = 0; + int dirent_count = 0; + int linkea_count = 0; int rc = 0; bool once = true; ENTRY; @@ -2477,6 +2679,7 @@ again: /* Drop invalid linkEA entry. */ if (!fid_is_sane(tfid)) { linkea_del_buf(ldata, cname); + linkea_count++; continue; } @@ -2510,6 +2713,7 @@ again: * child to be visible via other parent, then * remove this linkEA entry. */ linkea_del_buf(ldata, cname); + linkea_count++; continue; } @@ -2520,6 +2724,7 @@ again: if (unlikely(!dt_try_as_dir(env, parent))) { lfsck_object_put(env, parent); linkea_del_buf(ldata, cname); + linkea_count++; continue; } @@ -2567,6 +2772,7 @@ rebuild: RETURN(rc); linkea_del_buf(ldata, cname); + linkea_count++; linkea_first_entry(ldata); /* There may be some invalid dangling name entries under * other parent directories, remove all of them. */ @@ -2603,13 +2809,13 @@ rebuild: goto next; } - count += rc; + dirent_count += rc; next: linkea_del_buf(ldata, cname); } - ns->ln_dirent_repaired += count; + ns->ln_dirent_repaired += dirent_count; RETURN(rc); } @@ -2630,10 +2836,15 @@ next: linkea_del_buf(ldata, cname); } + linkea_first_entry(ldata); if (ldata->ld_leh->leh_reccount == 1) { rc = lfsck_namespace_dsd_single(env, com, child, pfid, ldata, lh, type, NULL); + if (rc == 0 && fid_is_zero(pfid) && linkea_count > 0) + rc = lfsck_namespace_rebuild_linkea(env, com, child, + ldata); + RETURN(rc); } @@ -2646,7 +2857,6 @@ next: RETURN(rc); } - linkea_first_entry(ldata); /* If the dangling name entry for the orphan directory object has * been remvoed, then just check whether the directory object is * still under the .lustre/lost+found/MDTxxxx/ or not. */ @@ -2672,13 +2882,13 @@ next: * If all the known name entries have been verified, then the object's hard * link attribute should match the object's linkEA entries count unless the * object's has too much hard link to be recorded in the linkEA. Such cases - * should have been marked in the LFSCK tracing file. Otherwise, trust the + * should have been marked in the LFSCK trace file. Otherwise, trust the * linkEA to update the object's nlink attribute. * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component * \param[in] obj pointer to the dt_object to be handled - * \param[in,out] nlink pointer to buffer to object's hard lock count before + * \param[in,out] la pointer to buffer to object's attribute before * and after the repairing * * \retval positive number for repaired cases @@ -2687,10 +2897,10 @@ next: */ static int lfsck_namespace_repair_nlink(const struct lu_env *env, struct lfsck_component *com, - struct dt_object *obj, __u32 *nlink) + struct dt_object *obj, + struct lu_attr *la) { struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_attr *la = &info->lti_la3; struct lu_fid *tfid = &info->lti_fid3; struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -2700,7 +2910,7 @@ static int lfsck_namespace_repair_nlink(const struct lu_env *env, struct thandle *th = NULL; struct linkea_data ldata = { 0 }; struct lustre_handle lh = { 0 }; - __u32 old = *nlink; + __u32 old = la->la_nlink; int rc = 0; __u8 flags; ENTRY; @@ -2751,17 +2961,19 @@ static int lfsck_namespace_repair_nlink(const struct lu_env *env, if (flags & LNTF_SKIP_NLINK) GOTO(unlock, rc = 0); - rc = lfsck_links_read2(env, child, &ldata); - if (rc == -ENODATA) - GOTO(unlock, rc = 0); + rc = dt_attr_get(env, child, la, BYPASS_CAPA); + if (rc != 0) + GOTO(unlock, rc = (rc == -ENOENT ? 0 : rc)); + rc = lfsck_links_read2(env, child, &ldata); if (rc != 0) - GOTO(unlock, rc); + GOTO(unlock, rc = (rc == -ENODATA ? 0 : rc)); - if (*nlink == ldata.ld_leh->leh_reccount) + if (la->la_nlink == ldata.ld_leh->leh_reccount || + unlikely(la->la_nlink == 0)) GOTO(unlock, rc = 0); - la->la_nlink = *nlink = ldata.ld_leh->leh_reccount; + la->la_nlink = ldata.ld_leh->leh_reccount; if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) GOTO(unlock, rc = 1); @@ -2782,7 +2994,7 @@ log: CDEBUG(D_LFSCK, "%s: namespace LFSCK repaired the object "DFID"'s " "nlink count from %u to %u: rc = %d\n", - lfsck_lfsck2name(lfsck), PFID(cfid), old, *nlink, rc); + lfsck_lfsck2name(lfsck), PFID(cfid), old, la->la_nlink, rc); if (rc != 0) ns->ln_flags |= LF_INCONSISTENT; @@ -2847,6 +3059,22 @@ static int lfsck_namespace_double_scan_dir(const struct lu_env *env, LASSERT(!dt_object_remote(child)); + if (flags & LNTF_UNCERTAIN_LMV) { + if (flags & LNTF_RECHECK_NAME_HASH) { + rc = lfsck_namespace_scan_shard(env, com, child); + if (rc < 0) + RETURN(rc); + + ns->ln_striped_shards_scanned++; + } else { + ns->ln_striped_shards_skipped++; + } + } + + flags &= ~(LNTF_RECHECK_NAME_HASH | LNTF_UNCERTAIN_LMV); + if (flags == 0) + RETURN(0); + if (flags & (LNTF_CHECK_LINKEA | LNTF_CHECK_PARENT) && !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) { CDEBUG(D_LFSCK, "%s: some MDT(s) maybe NOT take part in the" @@ -2899,6 +3127,8 @@ lock: } else if (lfsck->li_lpf_obj != NULL && lu_fid_eq(pfid, lfsck_dto2fid(lfsck->li_lpf_obj))) { lpf = true; + } else if (unlikely(!fid_is_sane(pfid))) { + fid_zero(pfid); } rc = lfsck_links_read(env, child, &ldata); @@ -3143,7 +3373,7 @@ lost_parent: /* Create the lost parent as an orphan. */ rc = lfsck_namespace_create_orphan(env, com, - parent); + parent, NULL); if (rc < 0) { lfsck_object_put(env, parent); @@ -3274,6 +3504,30 @@ lost_parent: GOTO(out, rc = 0); } + rc = lfsck_namespace_check_name(env, parent, child, cname); + if (rc == -ENOENT) + goto lost_parent; + + if (rc < 0) { + lfsck_object_put(env, parent); + + GOTO(out, rc); + } + + /* It is an invalid name entry, drop it. */ + if (unlikely(rc > 0)) { + lfsck_object_put(env, parent); + rc = lfsck_namespace_shrink_linkea(env, com, child, + &ldata, cname, pfid, true); + if (rc < 0) + GOTO(out, rc); + + if (rc > 0) + repaired = true; + + continue; + } + /* Add the missing name entry back to the namespace. */ rc = lfsck_namespace_insert_normal(env, com, parent, child, cname->ln_name); @@ -3318,37 +3572,38 @@ out: count = ldata.ld_leh->leh_reccount; } - /* If the LFSCK is marked as LF_INCOMPLETE, then means some - * MDT has ever tried to verify some remote MDT-object that - * resides on this MDT, but this MDT failed to respond such - * request. So means there may be some remote name entry on - * other MDT that references this object with another name, - * so we cannot know whether this linkEA is valid or not. - * So keep it there and maybe resolved when next LFSCK run. */ - if (count == 0 && !(ns->ln_flags & LF_INCOMPLETE)) { - /* If the child becomes orphan, then insert it into - * the global .lustre/lost+found/MDTxxxx directory. */ - rc = lfsck_namespace_insert_orphan(env, com, child, "", "O", - &count); - if (rc < 0) - return rc; + if (count == 0) { + /* If the LFSCK is marked as LF_INCOMPLETE, then means some + * MDT has ever tried to verify some remote MDT-object that + * resides on this MDT, but this MDT failed to respond such + * request. So means there may be some remote name entry on + * other MDT that references this object with another name, + * so we cannot know whether this linkEA is valid or not. + * So keep it there and maybe resolved when next LFSCK run. */ + if (!(ns->ln_flags & LF_INCOMPLETE)) { + /* If the child becomes orphan, then insert it into + * the global .lustre/lost+found/MDTxxxx directory. */ + rc = lfsck_namespace_insert_orphan(env, com, child, + "", "O", &count); + if (rc < 0) + return rc; - if (rc > 0) { - ns->ln_mul_ref_repaired++; - repaired = true; + if (rc > 0) { + ns->ln_mul_ref_repaired++; + repaired = true; + } } - } - - rc = dt_attr_get(env, child, la, BYPASS_CAPA); - if (rc != 0) - return rc; + } else { + rc = dt_attr_get(env, child, la, BYPASS_CAPA); + if (rc != 0) + return rc; - if (la->la_nlink != count) { - rc = lfsck_namespace_repair_nlink(env, com, child, - &la->la_nlink); - if (rc > 0) { - ns->ln_objs_nlink_repaired++; - rc = 0; + if (la->la_nlink != 0 && la->la_nlink != count) { + rc = lfsck_namespace_repair_nlink(env, com, child, la); + if (rc > 0) { + ns->ln_objs_nlink_repaired++; + rc = 0; + } } } @@ -3388,6 +3643,20 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m, "multiple_referenced_repaired: "LPU64"\n" "bad_file_type_repaired: "LPU64"\n" "lost_dirent_repaired: "LPU64"\n" + "local_lost_found_scanned: "LPU64"\n" + "local_lost_found_moved: "LPU64"\n" + "local_lost_found_skipped: "LPU64"\n" + "local_lost_found_failed: "LPU64"\n" + "striped_dirs_scanned: "LPU64"\n" + "striped_dirs_repaired: "LPU64"\n" + "striped_dirs_failed: "LPU64"\n" + "striped_dirs_disabled: "LPU64"\n" + "striped_dirs_skipped: "LPU64"\n" + "striped_shards_scanned: "LPU64"\n" + "striped_shards_repaired: "LPU64"\n" + "striped_shards_failed: "LPU64"\n" + "striped_shards_skipped: "LPU64"\n" + "name_hash_repaired: "LPU64"\n" "success_count: %u\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n", @@ -3409,11 +3678,48 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m, ns->ln_mul_ref_repaired, ns->ln_bad_type_repaired, ns->ln_lost_dirent_repaired, + ns->ln_local_lpf_scanned, + ns->ln_local_lpf_moved, + ns->ln_local_lpf_skipped, + ns->ln_local_lpf_failed, + ns->ln_striped_dirs_scanned, + ns->ln_striped_dirs_repaired, + ns->ln_striped_dirs_failed, + ns->ln_striped_dirs_disabled, + ns->ln_striped_dirs_skipped, + ns->ln_striped_shards_scanned, + ns->ln_striped_shards_repaired, + ns->ln_striped_shards_failed, + ns->ln_striped_shards_skipped, + ns->ln_name_hash_repaired, ns->ln_success_count, time_phase1, time_phase2); } +static void lfsck_namespace_release_lmv(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + + while (!list_empty(&lfsck->li_list_lmv)) { + struct lfsck_lmv_unit *llu; + struct lfsck_lmv *llmv; + + llu = list_entry(lfsck->li_list_lmv.next, + struct lfsck_lmv_unit, llu_link); + llmv = &llu->llu_lmv; + + LASSERTF(atomic_read(&llmv->ll_ref) == 1, + "still in using: %u\n", + atomic_read(&llmv->ll_ref)); + + ns->ln_striped_dirs_skipped++; + lfsck_lmv_put(env, llmv); + } +} + /* namespace APIs */ static int lfsck_namespace_reset(const struct lu_env *env, @@ -3470,7 +3776,7 @@ static int lfsck_namespace_reset(const struct lu_env *env, lad->lad_incomplete = 0; CFS_RESET_BITMAP(lad->lad_bitmap); - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, true); GOTO(out, rc); @@ -3498,6 +3804,90 @@ lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com, up_write(&com->lc_sem); } +static void lfsck_namespace_close_dir(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_lmv *llmv = lfsck->li_lmv; + struct lfsck_namespace_req *lnr; + __u32 size = + sizeof(*lnr) + LFSCK_TMPBUF_LEN; + bool wakeup = false; + ENTRY; + + if (llmv == NULL) + RETURN_EXIT; + + OBD_ALLOC(lnr, size); + if (lnr == NULL) { + ns->ln_striped_dirs_skipped++; + + RETURN_EXIT; + } + + /* Generate a dummy request to indicate that all shards' name entry + * in this striped directory has been scanned for the first time. */ + INIT_LIST_HEAD(&lnr->lnr_lar.lar_list); + lnr->lnr_obj = lfsck_object_get(lfsck->li_obj_dir); + lnr->lnr_lmv = lfsck_lmv_get(llmv); + lnr->lnr_fid = *lfsck_dto2fid(lfsck->li_obj_dir); + lnr->lnr_oit_cookie = lfsck->li_pos_current.lp_oit_cookie; + lnr->lnr_dir_cookie = MDS_DIR_END_OFF; + lnr->lnr_size = size; + + spin_lock(&lad->lad_lock); + if (lad->lad_assistant_status < 0) { + spin_unlock(&lad->lad_lock); + lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar); + ns->ln_striped_dirs_skipped++; + + RETURN_EXIT; + } + + list_add_tail(&lnr->lnr_lar.lar_list, &lad->lad_req_list); + if (lad->lad_prefetched == 0) + wakeup = true; + + lad->lad_prefetched++; + spin_unlock(&lad->lad_lock); + if (wakeup) + wake_up_all(&lad->lad_thread.t_ctl_waitq); + + EXIT; +} + +static int lfsck_namespace_open_dir(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_lmv *llmv = lfsck->li_lmv; + int rc = 0; + ENTRY; + + if (llmv == NULL) + RETURN(0); + + if (llmv->ll_lmv_master) { + struct lmv_mds_md_v1 *lmv = &llmv->ll_lmv; + + if (lmv->lmv_master_mdt_index != + lfsck_dev_idx(lfsck->li_bottom)) { + lmv->lmv_master_mdt_index = + lfsck_dev_idx(lfsck->li_bottom); + ns->ln_flags |= LF_INCONSISTENT; + llmv->ll_lmv_updated = 1; + } + } else { + rc = lfsck_namespace_verify_stripe_slave(env, com, + lfsck->li_obj_dir, llmv); + } + + RETURN(rc > 0 ? 0 : rc); +} + static int lfsck_namespace_checkpoint(const struct lu_env *env, struct lfsck_component *com, bool init) { @@ -3523,7 +3913,7 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env, com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); log: @@ -3594,6 +3984,16 @@ static int lfsck_namespace_prep(const struct lu_env *env, ns->ln_mul_ref_repaired = 0; ns->ln_bad_type_repaired = 0; ns->ln_lost_dirent_repaired = 0; + ns->ln_striped_dirs_scanned = 0; + ns->ln_striped_dirs_repaired = 0; + ns->ln_striped_dirs_failed = 0; + ns->ln_striped_dirs_disabled = 0; + ns->ln_striped_dirs_skipped = 0; + ns->ln_striped_shards_scanned = 0; + ns->ln_striped_shards_repaired = 0; + ns->ln_striped_shards_failed = 0; + ns->ln_striped_shards_skipped = 0; + ns->ln_name_hash_repaired = 0; fid_zero(&ns->ln_fid_latest_scanned_phase2); if (list_empty(&com->lc_link_dir)) list_add_tail(&com->lc_link_dir, @@ -3673,7 +4073,7 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, /* zero-linkEA object may be orphan, but it also maybe because * of upgrading. Currently, we cannot record it for double scan. - * Because it may cause the LFSCK tracing file to be too large. */ + * Because it may cause the LFSCK trace file to be too large. */ if (rc == -ENODATA) { if (S_ISDIR(lfsck_object_type(obj))) GOTO(out, rc = 0); @@ -3682,7 +4082,10 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - if (la->la_nlink > 1) + /* "la_ctime" == 1 means that it has ever been removed from + * backend /lost+found directory but not been added back to + * the normal namespace yet. */ + if (la->la_nlink > 1 || unlikely(la->la_ctime == 1)) rc = lfsck_namespace_trace_update(env, com, fid, LNTF_CHECK_LINKEA, true); @@ -3721,7 +4124,10 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - if (la->la_nlink > 1) + /* "la_ctime" == 1 means that it has ever been + * removed from backend /lost+found directory but + * not been added back to the normal namespace yet. */ + if (la->la_nlink > 1 || unlikely(la->la_ctime == 1)) rc = lfsck_namespace_trace_update(env, com, fid, LNTF_CHECK_LINKEA, true); } @@ -3745,9 +4151,28 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, struct lfsck_component *com, struct lu_dirent *ent, __u16 type) { - struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace_req *lnr; - bool wakeup = false; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct l_wait_info lwi = { 0 }; + bool wakeup = false; + + l_wait_event(mthread->t_ctl_waitq, + bk->lb_async_windows == 0 || + lad->lad_prefetched < bk->lb_async_windows || + !thread_is_running(mthread) || + thread_is_stopped(athread), + &lwi); + + if (unlikely(!thread_is_running(mthread)) || + thread_is_stopped(athread)) + return 0; + + if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir))) + return 0; lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, ent, type); if (IS_ERR(lnr)) { @@ -3792,6 +4217,8 @@ static int lfsck_namespace_post(const struct lu_env *env, lfsck_post_generic(env, com, &result); down_write(&com->lc_sem); + lfsck_namespace_release_lmv(env, com); + spin_lock(&lfsck->li_lock); if (!init) ns->ln_pos_last_checkpoint = lfsck->li_pos_checkpoint; @@ -3825,7 +4252,7 @@ static int lfsck_namespace_post(const struct lu_env *env, com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: namespace LFSCK post done: rc = %d\n", @@ -3897,7 +4324,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, lfsck->li_time_last_checkpoint; __u64 checked = ns->ln_items_checked + com->lc_new_checked; __u64 speed = checked; - __u64 new_checked = com->lc_new_checked * HZ; + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = ns->ln_run_time_phase1 + cfs_duration_sec(duration + HALF_SEC); @@ -3951,7 +4379,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, com->lc_new_checked; __u64 speed1 = ns->ln_items_checked; __u64 speed2 = checked; - __u64 new_checked = com->lc_new_checked * HZ; + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = ns->ln_run_time_phase2 + cfs_duration_sec(duration + HALF_SEC); @@ -4040,6 +4469,7 @@ static void lfsck_namespace_data_release(const struct lu_env *env, LASSERT(list_empty(&lad->lad_req_list)); com->lc_data = NULL; + lfsck_namespace_release_lmv(env, com); spin_lock(<ds->ltd_lock); list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list, @@ -4078,6 +4508,8 @@ static void lfsck_namespace_quit(const struct lu_env *env, thread_is_stopped(&lad->lad_thread)); LASSERT(list_empty(&lad->lad_req_list)); + lfsck_namespace_release_lmv(env, com); + spin_lock(<ds->ltd_lock); list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list, ltd_namespace_phase_list) { @@ -4107,6 +4539,7 @@ static int lfsck_namespace_in_notify(const struct lu_env *env, switch (lr->lr_event) { case LE_CREATE_ORPHAN: { struct dt_object *orphan = NULL; + struct lmv_mds_md_v1 *lmv; CDEBUG(D_LFSCK, "%s: namespace LFSCK handling notify from " "MDT %x to create orphan"DFID" with type %o\n", @@ -4120,8 +4553,20 @@ static int lfsck_namespace_in_notify(const struct lu_env *env, if (dt_object_exists(orphan)) GOTO(out_create, rc = -EEXIST); + if (lr->lr_stripe_count > 0) { + lmv = &lfsck_env_info(env)->lti_lmv; + memset(lmv, 0, sizeof(*lmv)); + lmv->lmv_hash_type = lr->lr_hash_type; + lmv->lmv_stripe_count = lr->lr_stripe_count; + lmv->lmv_layout_version = lr->lr_layout_version; + memcpy(lmv->lmv_pool_name, lr->lr_pool_name, + sizeof(lmv->lmv_pool_name)); + } else { + lmv = NULL; + } + rc = lfsck_namespace_create_orphan_local(env, com, orphan, - lr->lr_type); + lr->lr_type, lmv); GOTO(out_create, rc = (rc == 1) ? 0 : rc); @@ -4200,6 +4645,28 @@ log: return 0; } + case LE_SET_LMV_MASTER: { + struct dt_object *obj; + + obj = lfsck_object_find_by_dev(env, lfsck->li_bottom, + &lr->lr_fid); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + + rc = lfsck_namespace_notify_lmv_master_local(env, com, obj); + lfsck_object_put(env, obj); + + RETURN(rc > 0 ? 0 : rc); + } + case LE_SET_LMV_SLAVE: { + if (!(lr->lr_flags & LEF_RECHECK_NAME_HASH)) + ns->ln_striped_shards_repaired++; + + rc = lfsck_namespace_trace_update(env, com, &lr->lr_fid, + LNTF_RECHECK_NAME_HASH, true); + + RETURN(rc > 0 ? 0 : rc); + } case LE_PHASE1_DONE: case LE_PHASE2_DONE: case LE_PEER_EXIT: @@ -4289,6 +4756,8 @@ static int lfsck_namespace_query(const struct lu_env *env, static struct lfsck_operations lfsck_namespace_ops = { .lfsck_reset = lfsck_namespace_reset, .lfsck_fail = lfsck_namespace_fail, + .lfsck_close_dir = lfsck_namespace_close_dir, + .lfsck_open_dir = lfsck_namespace_open_dir, .lfsck_checkpoint = lfsck_namespace_checkpoint, .lfsck_prep = lfsck_namespace_prep, .lfsck_exec_oit = lfsck_namespace_exec_oit, @@ -4336,11 +4805,13 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, struct dt_allocation_hint *hint = &info->lti_hint; struct dt_object_format *dof = &info->lti_dof; struct dt_insert_rec *rec = &info->lti_dt_rec; + struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2; struct dt_object *parent = lnr->lnr_obj; const struct lu_name *cname; struct linkea_data ldata = { 0 }; struct lustre_handle lh = { 0 }; struct lu_buf linkea_buf; + struct lu_buf lmv_buf; struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct dt_device *dev = lfsck_obj2dt_dev(child); @@ -4428,9 +4899,31 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, rc = dt_declare_ref_add(env, child, th); if (rc != 0) GOTO(stop, rc); + + /* 5a. generate slave LMV EA. */ + if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) { + int idx; + + idx = lfsck_shard_name_to_index(env, + lnr->lnr_name, lnr->lnr_namelen, + type, lfsck_dto2fid(child)); + if (unlikely(idx < 0)) + GOTO(stop, rc = idx); + + *lmv2 = lnr->lnr_lmv->ll_lmv; + lmv2->lmv_magic = LMV_MAGIC_STRIPE; + lmv2->lmv_master_mdt_index = idx; + + lfsck_lmv_header_cpu_to_le(lmv2, lmv2); + lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2)); + rc = dt_declare_xattr_set(env, child, &lmv_buf, + XATTR_NAME_LMV, 0, th); + if (rc != 0) + GOTO(stop, rc); + } } - /* 5a. insert linkEA for child */ + /* 6a. insert linkEA for child */ lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, ldata.ld_leh->leh_len); rc = dt_declare_xattr_set(env, child, &linkea_buf, @@ -4472,9 +4965,17 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, rc = dt_ref_add(env, child, th); if (rc != 0) GOTO(unlock, rc); + + /* 5b. generate slave LMV EA. */ + if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) { + rc = dt_xattr_set(env, child, &lmv_buf, XATTR_NAME_LMV, + 0, th, BYPASS_CAPA); + if (rc != 0) + GOTO(unlock, rc); + } } - /* 5b. insert linkEA for child. */ + /* 6b. insert linkEA for child. */ rc = dt_xattr_set(env, child, &linkea_buf, XATTR_NAME_LINK, 0, th, BYPASS_CAPA); @@ -4529,6 +5030,7 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, bool remove; bool newdata; bool log = false; + bool bad_hash = false; int idx = 0; int count = 0; int rc; @@ -4555,11 +5057,23 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, GOTO(out, rc); } + if (unlikely(lnr->lnr_dir_cookie == MDS_DIR_END_OFF)) { + rc = lfsck_namespace_striped_dir_rescan(env, com, lnr); + + RETURN(rc); + } + if (lnr->lnr_name[0] == '.' && (lnr->lnr_namelen == 1 || fid_seq_is_dot(fid_seq(&lnr->lnr_fid)))) GOTO(out, rc = 0); - idx = lfsck_find_mdt_idx_by_fid(env, lfsck, &lnr->lnr_fid); + if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) { + rc = lfsck_namespace_handle_striped_master(env, com, lnr); + + RETURN(rc); + } + + idx = lfsck_find_mdt_idx_by_fid(env, lfsck, &lnr->lnr_fid); if (idx < 0) GOTO(out, rc = idx); @@ -4604,6 +5118,13 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, dangling: rc = lfsck_namespace_check_exist(env, dir, obj, lnr->lnr_name); if (rc == 0) { + if (!lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv, + lnr->lnr_name, lnr->lnr_namelen)) { + type = LNIT_BAD_DIRENT; + + GOTO(out, rc); + } + type = LNIT_DANGLING; rc = lfsck_namespace_repair_dangling(env, com, obj, lnr); @@ -4674,6 +5195,16 @@ again: ns->ln_flags |= LF_INCONSISTENT; + /* If the name entry hash does not match the slave striped + * directory, and the name entry does not match also, then + * it is quite possible that name entry is corrupted. */ + if (!lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv, + lnr->lnr_name, lnr->lnr_namelen)) { + type = LNIT_BAD_DIRENT; + + GOTO(stop, rc = 0); + } + /* If the file type stored in the name entry does not match * the file type claimed by the object, and the object does * not recognize the name entry, then it is quite possible @@ -4828,6 +5359,19 @@ stop: out: lfsck_ibits_unlock(&lh, LCK_EX); + if (!name_is_dot_or_dotdot(lnr->lnr_name, lnr->lnr_namelen) && + !lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv, + lnr->lnr_name, lnr->lnr_namelen) && + type != LNIT_BAD_DIRENT) { + ns->ln_flags |= LF_INCONSISTENT; + + log = false; + rc = lfsck_namespace_repair_bad_name_hash(env, com, dir, + lnr->lnr_lmv, lnr->lnr_name); + if (rc >= 0) + bad_hash = true; + } + if (rc >= 0) { switch (type) { case LNIT_BAD_TYPE: @@ -4905,6 +5449,21 @@ out: false); } + if (bad_hash) { + ns->ln_name_hash_repaired++; + + /* Not count repeatedly. */ + if (!repaired) + ns->ln_items_repaired++; + + if (bk->lb_param & LPF_DRYRUN && + lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) + lfsck_pos_fill(env, lfsck, + &ns->ln_pos_first_inconsistent, + false); + } + + rc = 0; } up_write(&com->lc_sem); @@ -4915,6 +5474,442 @@ out: return rc; } +/** + * Handle one orphan under the backend /lost+found directory + * + * Insert the orphan FID into the namespace LFSCK trace file for further + * processing (via the subsequent namespace LFSCK second-stage scanning). + * At the same time, remove the orphan name entry from backend /lost+found + * directory. There is an interval between the orphan name entry removed + * from the backend /lost+found directory and the orphan FID in the LFSCK + * trace file handled. In such interval, the LFSCK can be reset, then + * all the FIDs recorded in the namespace LFSCK trace file will be dropped. + * To guarantee that the orphans can be found when LFSCK run next time + * without e2fsck again, when remove the orphan name entry, the LFSCK + * will set the orphan's ctime attribute as 1. Since normal applications + * cannot change the object's ctime attribute as 1. Then when LFSCK run + * next time, it can record the object (that ctime is 1) in the namespace + * LFSCK trace file during the first-stage scanning. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] parent pointer to the object for the backend /lost+found + * \param[in] ent pointer to the name entry for the target under the + * backend /lost+found + * + * \retval positive for repaired + * \retval 0 if needs to repair nothing + * \retval negative error number on failure + */ +static int lfsck_namespace_scan_local_lpf_one(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *parent, + struct lu_dirent *ent) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_fid *key = &info->lti_fid; + struct lu_attr *la = &info->lti_la; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_object *obj = com->lc_obj; + struct dt_device *dev = lfsck->li_bottom; + struct dt_object *child = NULL; + struct thandle *th = NULL; + int rc = 0; + __u8 flags = 0; + bool exist = false; + ENTRY; + + child = lfsck_object_find_by_dev(env, dev, &ent->lde_fid); + if (IS_ERR(child)) + RETURN(PTR_ERR(child)); + + LASSERT(dt_object_exists(child)); + LASSERT(!dt_object_remote(child)); + + fid_cpu_to_be(key, &ent->lde_fid); + rc = dt_lookup(env, obj, (struct dt_rec *)&flags, + (const struct dt_key *)key, BYPASS_CAPA); + if (rc == 0) { + exist = true; + flags |= LNTF_CHECK_ORPHAN; + } else if (rc == -ENOENT) { + flags = LNTF_CHECK_ORPHAN; + } else { + GOTO(out, rc); + } + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + GOTO(out, rc = PTR_ERR(th)); + + /* a1. remove name entry from backend /lost+found */ + rc = dt_declare_delete(env, parent, + (const struct dt_key *)ent->lde_name, th); + if (rc != 0) + GOTO(stop, rc); + + if (S_ISDIR(lfsck_object_type(child))) { + /* a2. decrease parent's nlink */ + rc = dt_declare_ref_del(env, parent, th); + if (rc != 0) + GOTO(stop, rc); + } + + if (exist) { + /* a3. remove child's FID from the LFSCK trace file. */ + rc = dt_declare_delete(env, obj, + (const struct dt_key *)key, th); + if (rc != 0) + GOTO(stop, rc); + } else { + /* a4. set child's ctime as 1 */ + memset(la, 0, sizeof(*la)); + la->la_ctime = 1; + la->la_valid = LA_CTIME; + rc = dt_declare_attr_set(env, child, la, th); + if (rc != 0) + GOTO(stop, rc); + } + + /* a5. insert child's FID into the LFSCK trace file. */ + rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags, + (const struct dt_key *)key, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + /* b1. remove name entry from backend /lost+found */ + rc = dt_delete(env, parent, (const struct dt_key *)ent->lde_name, th, + BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + + if (S_ISDIR(lfsck_object_type(child))) { + /* b2. decrease parent's nlink */ + dt_write_lock(env, parent, 0); + rc = dt_ref_del(env, parent, th); + dt_write_unlock(env, parent); + if (rc != 0) + GOTO(stop, rc); + } + + if (exist) { + /* a3. remove child's FID from the LFSCK trace file. */ + rc = dt_delete(env, obj, (const struct dt_key *)key, th, + BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + } else { + /* b4. set child's ctime as 1 */ + rc = dt_attr_set(env, child, la, th, BYPASS_CAPA); + if (rc != 0) + GOTO(stop, rc); + } + + /* b5. insert child's FID into the LFSCK trace file. */ + rc = dt_insert(env, obj, (const struct dt_rec *)&flags, + (const struct dt_key *)key, th, BYPASS_CAPA, 1); + + GOTO(stop, rc = (rc == 0 ? 1 : rc)); + +stop: + dt_trans_stop(env, dev, th); + +out: + lu_object_put(env, &child->do_lu); + + return rc; +} + +/** + * Handle orphans under the backend /lost+found directory + * + * Some backend checker, such as e2fsck for ldiskfs may find some orphans + * and put them under the backend /lost+found directory that is invisible + * to client. The LFSCK will scan such directory, for the original client + * visible orphans, add their fids into the namespace LFSCK trace file, + * then the subsenquent namespace LFSCK second-stage scanning can handle + * them as other objects to be double scanned: either move back to normal + * namespace, or to the global visible orphan directory: + * /ROOT/.lustre/lost+found/MDTxxxx/ + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + */ +static void lfsck_namespace_scan_local_lpf(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_dirent *ent = + (struct lu_dirent *)info->lti_key; + struct lu_seq_range *range = &info->lti_range; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct ptlrpc_thread *thread = &lfsck->li_thread; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct dt_device *dev = lfsck->li_bottom; + struct lfsck_namespace *ns = com->lc_file_ram; + struct dt_object *parent; + const struct dt_it_ops *iops; + struct dt_it *di; + struct seq_server_site *ss = + lu_site2seq(dev->dd_lu_dev.ld_site); + __u64 cookie; + int rc = 0; + __u16 type; + ENTRY; + + parent = lfsck_object_find_by_dev(env, dev, &LU_BACKEND_LPF_FID); + if (IS_ERR(parent)) { + CERROR("%s: fail to find backend /lost+found: rc = %ld\n", + lfsck_lfsck2name(lfsck), PTR_ERR(parent)); + RETURN_EXIT; + } + + /* It is normal that the /lost+found does not exist for ZFS backend. */ + if (!dt_object_exists(parent)) + GOTO(out, rc = 0); + + if (unlikely(!dt_try_as_dir(env, parent))) + GOTO(out, rc = -ENOTDIR); + + CDEBUG(D_LFSCK, "%s: start to scan backend /lost+found\n", + lfsck_lfsck2name(lfsck)); + + com->lc_new_scanned = 0; + iops = &parent->do_index_ops->dio_it; + di = iops->init(env, parent, LUDA_64BITHASH | LUDA_TYPE, BYPASS_CAPA); + if (IS_ERR(di)) + GOTO(out, rc = PTR_ERR(di)); + + rc = iops->load(env, di, 0); + if (rc == 0) + rc = iops->next(env, di); + else if (rc > 0) + rc = 0; + + while (rc == 0) { + if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) && + cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + + if (unlikely(!thread_is_running(thread))) + break; + } + + rc = iops->rec(env, di, (struct dt_rec *)ent, + LUDA_64BITHASH | LUDA_TYPE); + if (rc == 0) + rc = lfsck_unpack_ent(ent, &cookie, &type); + + if (unlikely(rc != 0)) { + CDEBUG(D_LFSCK, "%s: fail to iterate backend " + "/lost+found: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + + goto skip; + } + + /* skip dot and dotdot entries */ + if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen)) + goto next; + + if (!fid_seq_in_fldb(fid_seq(&ent->lde_fid))) + goto skip; + + if (fid_is_norm(&ent->lde_fid)) { + fld_range_set_mdt(range); + rc = fld_local_lookup(env, ss->ss_server_fld, + fid_seq(&ent->lde_fid), range); + if (rc != 0) + goto skip; + } else if (lfsck_dev_idx(dev) != 0) { + /* If the returned FID is IGIF, then there are three + * possible cases: + * + * 1) The object is upgraded from old Lustre-1.8 with + * IGIF assigned to such object. + * 2) The object is a backend local object and is + * invisible to client. + * 3) The object lost its LMV EA, and since there is + * no FID-in-dirent for the orphan in the backend + * /lost+found directory, then the low layer will + * return IGIF for such object. + * + * For MDTx (x != 0), it is either case 2) or case 3), + * but from the LFSCK view, they are indistinguishable. + * To be safe, the LFSCK will keep it there and report + * some message, then the adminstrator can handle that + * furtherly. + * + * For MDT0, it is more possible the case 1). The LFSCK + * will handle the orphan as an upgraded object. */ + CDEBUG(D_LFSCK, "%s: the orphan %.*s with IGIF "DFID + "in the backend /lost+found on the MDT %04x, " + "to be safe, skip it.\n", + lfsck_lfsck2name(lfsck), ent->lde_namelen, + ent->lde_name, PFID(&ent->lde_fid), + lfsck_dev_idx(dev)); + goto skip; + } + + rc = lfsck_namespace_scan_local_lpf_one(env, com, parent, ent); + +skip: + down_write(&com->lc_sem); + com->lc_new_scanned++; + ns->ln_local_lpf_scanned++; + if (rc > 0) + ns->ln_local_lpf_moved++; + else if (rc == 0) + ns->ln_local_lpf_skipped++; + else + ns->ln_local_lpf_failed++; + up_write(&com->lc_sem); + + if (rc < 0 && bk->lb_param & LPF_FAILOUT) + break; + +next: + lfsck_control_speed_by_self(com); + if (unlikely(!thread_is_running(thread))) { + rc = 0; + break; + } + + rc = iops->next(env, di); + } + + iops->put(env, di); + iops->fini(env, di); + + EXIT; + +out: + CDEBUG(D_LFSCK, "%s: stop to scan backend /lost+found: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + + lu_object_put(env, &parent->do_lu); +} + +/** + * Rescan the striped directory after the master LMV EA reset. + * + * Sometimes, the master LMV EA of the striped directory maybe lost, so when + * the namespace LFSCK engine scan the striped directory for the first time, + * it will be regarded as a normal directory. As the LFSCK processing, some + * other LFSCK instance on other MDT will find the shard of this striped dir, + * and find that the master MDT-object of the striped directory lost its LMV + * EA, then such remote LFSCK instance will regenerate the master LMV EA and + * notify the LFSCK instance on this MDT to rescan the striped directory. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] llu the lfsck_lmv_unit that contains the striped directory + * to be rescanned. + * + * \retval positive number for success + * \retval 0 for LFSCK stopped/paused + * \retval negative error number on failure + */ +static int lfsck_namespace_rescan_striped_dir(const struct lu_env *env, + struct lfsck_component *com, + struct lfsck_lmv_unit *llu) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_assistant_data *lad = com->lc_data; + struct dt_object *dir; + const struct dt_it_ops *iops; + struct dt_it *di; + struct lu_dirent *ent = + (struct lu_dirent *)info->lti_key; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct ptlrpc_thread *thread = &lfsck->li_thread; + struct lfsck_namespace_req *lnr; + struct lfsck_assistant_req *lar; + int rc; + __u16 type; + ENTRY; + + LASSERT(list_empty(&lad->lad_req_list)); + + lfsck->li_lmv = &llu->llu_lmv; + lfsck->li_obj_dir = lfsck_object_get(llu->llu_obj); + rc = lfsck_open_dir(env, lfsck, 0); + if (rc != 0) + RETURN(rc); + + dir = lfsck->li_obj_dir; + di = lfsck->li_di_dir; + iops = &dir->do_index_ops->dio_it; + do { + rc = iops->rec(env, di, (struct dt_rec *)ent, + lfsck->li_args_dir); + if (rc == 0) + rc = lfsck_unpack_ent(ent, &lfsck->li_cookie_dir, + &type); + + if (rc != 0) { + if (bk->lb_param & LPF_FAILOUT) + GOTO(out, rc); + + goto next; + } + + if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen)) + goto next; + + lnr = lfsck_namespace_assistant_req_init(lfsck, ent, type); + if (IS_ERR(lnr)) { + if (bk->lb_param & LPF_FAILOUT) + GOTO(out, rc = PTR_ERR(lnr)); + + goto next; + } + + lar = &lnr->lnr_lar; + rc = lfsck_namespace_assistant_handler_p1(env, com, lar); + lfsck_namespace_assistant_req_fini(env, lar); + if (rc != 0 && bk->lb_param & LPF_FAILOUT) + GOTO(out, rc); + + if (unlikely(!thread_is_running(thread))) + GOTO(out, rc = 0); + +next: + rc = iops->next(env, di); + } while (rc == 0); + +out: + lfsck_close_dir(env, lfsck, rc); + if (rc <= 0) + RETURN(rc); + + /* The close_dir() may insert a dummy lnr in the lad->lad_req_list. */ + if (list_empty(&lad->lad_req_list)) + RETURN(1); + + spin_lock(&lad->lad_lock); + lar = list_entry(lad->lad_req_list.next, struct lfsck_assistant_req, + lar_list); + list_del_init(&lar->lar_list); + spin_unlock(&lad->lad_lock); + + rc = lfsck_namespace_assistant_handler_p1(env, com, lar); + lfsck_namespace_assistant_req_fini(env, lar); + + RETURN(rc == 0 ? 1 : rc); +} + static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env, struct lfsck_component *com) { @@ -4932,9 +5927,25 @@ static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env, __u8 flags = 0; ENTRY; + while (!list_empty(&lfsck->li_list_lmv)) { + struct lfsck_lmv_unit *llu; + + spin_lock(&lfsck->li_lock); + llu = list_entry(lfsck->li_list_lmv.next, + struct lfsck_lmv_unit, llu_link); + list_del_init(&llu->llu_link); + spin_unlock(&lfsck->li_lock); + + rc = lfsck_namespace_rescan_striped_dir(env, com, llu); + if (rc <= 0) + RETURN(rc); + } + CDEBUG(D_LFSCK, "%s: namespace LFSCK phase2 scan start\n", lfsck_lfsck2name(lfsck)); + lfsck_namespace_scan_local_lpf(env, com); + com->lc_new_checked = 0; com->lc_new_scanned = 0; com->lc_time_last_checkpoint = cfs_time_current(); @@ -4977,7 +5988,7 @@ static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env, goto checkpoint; } - target = lfsck_object_find(env, lfsck, &fid); + target = lfsck_object_find_by_dev(env, lfsck->li_bottom, &fid); if (IS_ERR(target)) { rc = PTR_ERR(target); goto checkpoint; @@ -5019,7 +6030,7 @@ checkpoint: ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); if (rc != 0) GOTO(put, rc); @@ -5101,7 +6112,7 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env, ns->ln_status = LS_FAILED; } - rc = lfsck_namespace_store(env, com); + rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); return rc; @@ -5552,10 +6563,10 @@ int lfsck_namespace_setup(const struct lu_env *env, GOTO(out, rc); rc = lfsck_namespace_load(env, com); - if (rc > 0) - rc = lfsck_namespace_reset(env, com, true); - else if (rc == -ENODATA) + if (rc == -ENODATA) rc = lfsck_namespace_init(env, com); + else if (rc < 0) + rc = lfsck_namespace_reset(env, com, true); if (rc != 0) GOTO(out, rc);