X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_namespace.c;h=973ca5c76273a47f2f89a094303b4f91c43ff297;hp=fa448bba3e13af886b07d0105137653b90949f57;hb=59842b15b028246d9d20fb9b8d276e16fffc908c;hpb=afcf3026c6ad203b9882eaeac76326357f26fe71 diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index fa448bb..973ca5c 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2013, 2015, Intel Corporation. + * Copyright (c) 2013, 2017, Intel Corporation. */ /* * lustre/lfsck/lfsck_namespace.c @@ -30,26 +30,25 @@ #define DEBUG_SUBSYSTEM S_LFSCK -#include #include #include #include #include #include #include -#include #include "lfsck_internal.h" #define LFSCK_NAMESPACE_MAGIC_V1 0xA0629D03 #define LFSCK_NAMESPACE_MAGIC_V2 0xA0621A0B +#define LFSCK_NAMESPACE_MAGIC_V3 0xA06249FF /* For Lustre-2.x (x <= 6), the namespace LFSCK used LFSCK_NAMESPACE_MAGIC_V1 * as the trace file magic. When downgrade to such old release, the old LFSCK * will not recognize the new LFSCK_NAMESPACE_MAGIC_V2 in the new trace file, * then it will reset the whole LFSCK, and will not cause start failure. The * similar case will happen when upgrade from such old release. */ -#define LFSCK_NAMESPACE_MAGIC LFSCK_NAMESPACE_MAGIC_V2 +#define LFSCK_NAMESPACE_MAGIC LFSCK_NAMESPACE_MAGIC_V3 enum lfsck_nameentry_check { LFSCK_NAMEENTRY_DEAD = 1, /* The object has been unlinked. */ @@ -104,8 +103,8 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst, dst->ln_status = le32_to_cpu(src->ln_status); dst->ln_flags = le32_to_cpu(src->ln_flags); dst->ln_success_count = le32_to_cpu(src->ln_success_count); - dst->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1); - dst->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2); + dst->ln_run_time_phase1 = le64_to_cpu(src->ln_run_time_phase1); + dst->ln_run_time_phase2 = le64_to_cpu(src->ln_run_time_phase2); dst->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete); dst->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start); dst->ln_time_last_checkpoint = @@ -164,6 +163,11 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst, dst->ln_local_lpf_skipped = le64_to_cpu(src->ln_local_lpf_skipped); dst->ln_local_lpf_failed = le64_to_cpu(src->ln_local_lpf_failed); dst->ln_bitmap_size = le32_to_cpu(src->ln_bitmap_size); + dst->ln_time_latest_reset = le64_to_cpu(src->ln_time_latest_reset); + dst->ln_linkea_overflow_cleared = + le64_to_cpu(src->ln_linkea_overflow_cleared); + dst->ln_agent_entries_repaired = + le64_to_cpu(src->ln_agent_entries_repaired); } static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, @@ -173,8 +177,8 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, dst->ln_status = cpu_to_le32(src->ln_status); dst->ln_flags = cpu_to_le32(src->ln_flags); dst->ln_success_count = cpu_to_le32(src->ln_success_count); - dst->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1); - dst->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2); + dst->ln_run_time_phase1 = cpu_to_le64(src->ln_run_time_phase1); + dst->ln_run_time_phase2 = cpu_to_le64(src->ln_run_time_phase2); dst->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete); dst->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start); dst->ln_time_last_checkpoint = @@ -233,6 +237,11 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst, dst->ln_local_lpf_skipped = cpu_to_le64(src->ln_local_lpf_skipped); dst->ln_local_lpf_failed = cpu_to_le64(src->ln_local_lpf_failed); dst->ln_bitmap_size = cpu_to_le32(src->ln_bitmap_size); + dst->ln_time_latest_reset = cpu_to_le64(src->ln_time_latest_reset); + dst->ln_linkea_overflow_cleared = + cpu_to_le64(src->ln_linkea_overflow_cleared); + dst->ln_agent_entries_repaired = + cpu_to_le64(src->ln_agent_entries_repaired); } static void lfsck_namespace_record_failure(const struct lu_env *env, @@ -248,7 +257,7 @@ static void lfsck_namespace_record_failure(const struct lu_env *env, ns->ln_pos_first_inconsistent = pos; CDEBUG(D_LFSCK, "%s: namespace LFSCK hit first non-repaired " - "inconsistency at the pos ["LPU64", "DFID", "LPX64"]\n", + "inconsistency at the pos [%llu, "DFID", %#llx]\n", lfsck_lfsck2name(lfsck), ns->ln_pos_first_inconsistent.lp_oit_cookie, PFID(&ns->ln_pos_first_inconsistent.lp_dir_parent), @@ -271,7 +280,7 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, struct dt_object *obj = com->lc_obj; struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_namespace *ns = com->lc_file_ram; - cfs_bitmap_t *bitmap = lad->lad_bitmap; + struct cfs_bitmap *bitmap = lad->lad_bitmap; ssize_t size; __u32 nbits; int rc; @@ -288,7 +297,7 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, if (nbits > bitmap->size) { __u32 new_bits = bitmap->size; - cfs_bitmap_t *new_bitmap; + struct cfs_bitmap *new_bitmap; while (new_bits < nbits) new_bits <<= 1; @@ -303,7 +312,7 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, } if (ns->ln_bitmap_size == 0) { - lad->lad_incomplete = 0; + clear_bit(LAD_INCOMPLETE, &lad->lad_flags); CFS_RESET_BITMAP(bitmap); RETURN(0); @@ -317,9 +326,9 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, RETURN(rc >= 0 ? -EINVAL : rc); if (cfs_bitmap_check_empty(bitmap)) - lad->lad_incomplete = 0; + clear_bit(LAD_INCOMPLETE, &lad->lad_flags); else - lad->lad_incomplete = 1; + set_bit(LAD_INCOMPLETE, &lad->lad_flags); RETURN(0); } @@ -327,12 +336,6 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env, /** * Load namespace LFSCK statistics information from the trace file. * - * For old release (Lustre-2.6 or older), the statistics information was - * stored as XATTR_NAME_LFSCK_NAMESPACE_OLD EA. But in Lustre-2.7, we need - * more statistics information. To avoid confusing old MDT when downgrade, - * Lustre-2.7 stores the namespace LFSCK statistics information as new - * XATTR_NAME_LFSCK_NAMESPACE EA. - * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component * @@ -367,35 +370,24 @@ static int lfsck_namespace_load(const struct lu_env *env, lfsck_lfsck2name(com->lc_lfsck), len, rc); if (rc >= 0) rc = -ESTALE; - } else { - /* Check whether it is old trace file or not. - * If yes, it should be reset via returning -ESTALE. */ - rc = dt_xattr_get(env, com->lc_obj, - lfsck_buf_get(env, com->lc_file_disk, len), - XATTR_NAME_LFSCK_NAMESPACE_OLD); - if (rc >= 0) - rc = -ESTALE; } return rc; } static int lfsck_namespace_store(const struct lu_env *env, - struct lfsck_component *com, bool init) + struct lfsck_component *com) { struct dt_object *obj = com->lc_obj; struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_assistant_data *lad = com->lc_data; struct dt_device *dev = lfsck_obj2dev(obj); - cfs_bitmap_t *bitmap = NULL; + struct cfs_bitmap *bitmap = NULL; struct thandle *handle; __u32 nbits = 0; int len = com->lc_file_size; int rc; -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) - struct lu_buf tbuf = { &len, sizeof(len) }; -#endif ENTRY; if (lad != NULL) { @@ -427,20 +419,6 @@ static int lfsck_namespace_store(const struct lu_env *env, GOTO(out, rc); } -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) - /* To be compatible with old Lustre-2.x MDT (x <= 6), generate dummy - * XATTR_NAME_LFSCK_NAMESPACE_OLD EA, then when downgrade to Lustre-2.x, - * the old LFSCK will find "invalid" XATTR_NAME_LFSCK_NAMESPACE_OLD EA, - * then reset the namespace LFSCK trace file. */ - if (init) { - rc = dt_declare_xattr_set(env, obj, &tbuf, - XATTR_NAME_LFSCK_NAMESPACE_OLD, - LU_XATTR_CREATE, handle); - if (rc != 0) - GOTO(out, rc); - } -#endif - rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(out, rc); @@ -453,13 +431,6 @@ static int lfsck_namespace_store(const struct lu_env *env, lfsck_buf_get(env, bitmap->data, nbits >> 3), XATTR_NAME_LFSCK_BITMAP, 0, handle); -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) - if (rc == 0 && init) - rc = dt_xattr_set(env, obj, &tbuf, - XATTR_NAME_LFSCK_NAMESPACE_OLD, - LU_XATTR_CREATE, handle); -#endif - GOTO(out, rc); out: @@ -472,64 +443,6 @@ log: return rc; } -static struct dt_object * -lfsck_namespace_load_one_trace_file(const struct lu_env *env, - struct lfsck_component *com, - struct dt_object *parent, - const char *name, bool reset) -{ - struct lfsck_instance *lfsck = com->lc_lfsck; - struct dt_object *obj; - int rc; - - if (reset) { - rc = local_object_unlink(env, lfsck->li_bottom, parent, name); - if (rc != 0 && rc != -ENOENT) - return ERR_PTR(rc); - } - - obj = local_index_find_or_create(env, lfsck->li_los, parent, name, - S_IFREG | S_IRUGO | S_IWUSR, - &dt_lfsck_features); - - return obj; -} - -static int lfsck_namespace_load_sub_trace_files(const struct lu_env *env, - struct lfsck_component *com, - bool reset) -{ - char *name = lfsck_env_info(env)->lti_key; - struct lfsck_sub_trace_obj *lsto; - struct dt_object *obj; - int rc; - int i; - - for (i = 0, lsto = &com->lc_sub_trace_objs[0]; - i < LFSCK_STF_COUNT; i++, lsto++) { - snprintf(name, NAME_MAX, "%s_%02d", LFSCK_NAMESPACE, i); - if (lsto->lsto_obj != NULL) { - if (!reset) - continue; - - lfsck_object_put(env, lsto->lsto_obj); - lsto->lsto_obj = NULL; - } - - obj = lfsck_namespace_load_one_trace_file(env, com, - com->lc_lfsck->li_lfsck_dir, name, reset); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - lsto->lsto_obj = obj; - rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features); - if (rc != 0) - return rc; - } - - return 0; -} - static int lfsck_namespace_init(const struct lu_env *env, struct lfsck_component *com) { @@ -539,11 +452,13 @@ static int lfsck_namespace_init(const struct lu_env *env, memset(ns, 0, sizeof(*ns)); ns->ln_magic = LFSCK_NAMESPACE_MAGIC; ns->ln_status = LS_INIT; + ns->ln_time_latest_reset = ktime_get_real_seconds(); down_write(&com->lc_sem); - rc = lfsck_namespace_store(env, com, true); - up_write(&com->lc_sem); + rc = lfsck_namespace_store(env, com); if (rc == 0) - rc = lfsck_namespace_load_sub_trace_files(env, com, true); + rc = lfsck_load_sub_trace_files(env, com, + &dt_lfsck_namespace_features, LFSCK_NAMESPACE, true); + up_write(&com->lc_sem); return rc; } @@ -582,9 +497,15 @@ int lfsck_namespace_trace_update(const struct lu_env *env, RETURN(0); idx = lfsck_sub_trace_file_fid2idx(fid); + mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); obj = com->lc_sub_trace_objs[idx].lsto_obj; + if (unlikely(obj == NULL)) { + mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); + RETURN(0); + } + + lfsck_object_get(obj); dev = lfsck_obj2dev(obj); - mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); fid_cpu_to_be(key, fid); rc = dt_lookup(env, obj, (struct dt_rec *)&old, (const struct dt_key *)key); @@ -641,7 +562,7 @@ int lfsck_namespace_trace_update(const struct lu_env *env, if (new != 0) { rc = dt_insert(env, obj, (const struct dt_rec *)&new, - (const struct dt_key *)key, th, 1); + (const struct dt_key *)key, th); if (rc != 0) GOTO(log, rc); } @@ -659,6 +580,7 @@ log: unlock: mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); + lfsck_object_put(env, obj); return rc; } @@ -694,20 +616,23 @@ static int lfsck_declare_namespace_exec_dir(const struct lu_env *env, { int rc; + /* For remote updating LINKEA, there may be further LFSCK action + * on remote MDT after the updating, so update the LINKEA ASAP. */ + if (dt_object_remote(obj)) + handle->th_sync = 1; + /* For destroying all invalid linkEA entries. */ rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle); - if (rc != 0) - return rc; - - /* For insert new linkEA entry. */ - rc = dt_declare_xattr_set(env, obj, - lfsck_buf_get_const(env, NULL, DEFAULT_LINKEA_SIZE), + if (rc == 0) + /* For insert new linkEA entry. */ + rc = dt_declare_xattr_set(env, obj, + lfsck_buf_get_const(env, NULL, MAX_LINKEA_SIZE), XATTR_NAME_LINK, 0, handle); return rc; } int __lfsck_links_read(const struct lu_env *env, struct dt_object *obj, - struct linkea_data *ldata) + struct linkea_data *ldata, bool with_rec) { int rc; @@ -721,7 +646,10 @@ int __lfsck_links_read(const struct lu_env *env, struct dt_object *obj, if (rc == -ERANGE) { /* Buf was too small, figure out what we need. */ rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LINK); - if (rc <= 0) + if (unlikely(rc == 0)) + return -ENODATA; + + if (rc < 0) return rc; lu_buf_realloc(ldata->ld_buf, rc); @@ -731,8 +659,15 @@ int __lfsck_links_read(const struct lu_env *env, struct dt_object *obj, rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK); } - if (rc > 0) - rc = linkea_init(ldata); + if (unlikely(rc == 0)) + return -ENODATA; + + if (rc > 0) { + if (with_rec) + rc = linkea_init_with_rec(ldata); + else + rc = linkea_init(ldata); + } return rc; } @@ -807,23 +742,97 @@ log: static int lfsck_links_write(const struct lu_env *env, struct dt_object *obj, struct linkea_data *ldata, struct thandle *handle) { - const struct lu_buf *buf = lfsck_buf_get_const(env, - ldata->ld_buf->lb_buf, - ldata->ld_leh->leh_len); + struct lu_buf buf; + int rc; + + lfsck_buf_init(&buf, ldata->ld_buf->lb_buf, ldata->ld_leh->leh_len); + +again: + rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LINK, 0, handle); + if (unlikely(rc == -ENOSPC)) { + rc = linkea_overflow_shrink(ldata); + if (likely(rc > 0)) { + buf.lb_len = rc; + goto again; + } + } + + return rc; +} + +static inline bool linkea_reclen_is_valid(const struct linkea_data *ldata) +{ + if (ldata->ld_reclen <= 0) + return false; + + if ((char *)ldata->ld_lee + ldata->ld_reclen > + (char *)ldata->ld_leh + ldata->ld_leh->leh_len) + return false; + + return true; +} + +static inline bool linkea_entry_is_valid(const struct linkea_data *ldata, + const struct lu_name *cname, + const struct lu_fid *pfid) +{ + if (!linkea_reclen_is_valid(ldata)) + return false; + + if (cname->ln_namelen <= 0 || cname->ln_namelen > NAME_MAX) + return false; - return dt_xattr_set(env, obj, buf, XATTR_NAME_LINK, 0, handle); + if (!fid_is_sane(pfid)) + return false; + + return true; } -static void lfsck_namespace_unpack_linkea_entry(struct linkea_data *ldata, - struct lu_name *cname, - struct lu_fid *pfid, - char *buf) +static int lfsck_namespace_unpack_linkea_entry(struct linkea_data *ldata, + struct lu_name *cname, + struct lu_fid *pfid, + char *buf, const int buflen) { linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, cname, pfid); + if (unlikely(!linkea_entry_is_valid(ldata, cname, pfid))) + return -EINVAL; + /* To guarantee the 'name' is terminated with '0'. */ memcpy(buf, cname->ln_name, cname->ln_namelen); buf[cname->ln_namelen] = 0; cname->ln_name = buf; + + return 0; +} + +static void lfsck_linkea_del_buf(struct linkea_data *ldata, + const struct lu_name *lname) +{ + LASSERT(ldata->ld_leh != NULL && ldata->ld_lee != NULL); + + /* If current record is corrupted, all the subsequent + * records will be dropped. */ + if (unlikely(!linkea_reclen_is_valid(ldata))) { + void *ptr = ldata->ld_lee; + + ldata->ld_leh->leh_len = sizeof(struct link_ea_header); + ldata->ld_leh->leh_reccount = 0; + linkea_first_entry(ldata); + while (ldata->ld_lee != NULL && + (char *)ldata->ld_lee < (char *)ptr) { + int reclen = (ldata->ld_lee->lee_reclen[0] << 8) | + ldata->ld_lee->lee_reclen[1]; + + ldata->ld_leh->leh_len += reclen; + ldata->ld_leh->leh_reccount++; + ldata->ld_lee = (struct link_ea_entry *) + ((char *)ldata->ld_lee + reclen); + } + + ldata->ld_lee = NULL; + } else { + linkea_del_buf(ldata, lname); + } } static int lfsck_namespace_filter_linkea_entry(struct linkea_data *ldata, @@ -841,13 +850,16 @@ static int lfsck_namespace_filter_linkea_entry(struct linkea_data *ldata, while (ldata->ld_lee != NULL) { ldata->ld_reclen = (ldata->ld_lee->lee_reclen[0] << 8) | ldata->ld_lee->lee_reclen[1]; - if (unlikely(ldata->ld_reclen == oldlen && + if (unlikely(!linkea_reclen_is_valid(ldata))) { + lfsck_linkea_del_buf(ldata, NULL); + LASSERT(ldata->ld_lee == NULL); + } else if (unlikely(ldata->ld_reclen == oldlen && memcmp(ldata->ld_lee, oldlee, oldlen) == 0)) { repeated++; if (!remove) break; - linkea_del_buf(ldata, cname); + lfsck_linkea_del_buf(ldata, cname); } else { linkea_next_entry(ldata); } @@ -915,7 +927,7 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env, struct thandle *th = NULL; struct lfsck_lock_handle *pllh = &info->lti_llh; struct lustre_handle clh = { 0 }; - struct linkea_data ldata = { NULL }; + struct linkea_data ldata2 = { NULL }; struct lu_buf linkea_buf; int namelen; int idx = 0; @@ -968,11 +980,8 @@ again: cname->ln_name = info->lti_key; cname->ln_namelen = namelen; - rc = linkea_data_new(&ldata, &info->lti_linkea_buf2); - if (rc != 0) - GOTO(log, rc); - - rc = linkea_add_buf(&ldata, cname, pfid); + rc = linkea_links_new(&ldata2, &info->lti_linkea_buf2, + cname, pfid); if (rc != 0) GOTO(log, rc); @@ -982,8 +991,8 @@ again: if (rc != 0) GOTO(log, rc); - lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, - ldata.ld_leh->leh_len); + lfsck_buf_init(&linkea_buf, ldata2.ld_buf->lb_buf, + ldata2.ld_leh->leh_len); th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(log, rc = PTR_ERR(th)); @@ -1024,7 +1033,7 @@ again: } memset(la, 0, sizeof(*la)); - la->la_ctime = cfs_time_current_sec(); + la->la_ctime = ktime_get_real_seconds(); la->la_valid = LA_CTIME; rc = dt_declare_attr_set(env, orphan, la, th); if (rc != 0) @@ -1035,9 +1044,8 @@ again: GOTO(stop, rc); dt_write_lock(env, orphan, 0); - rc = lfsck_links_read(env, orphan, &ldata); - if (likely((rc == -ENODATA) || (rc == -EINVAL) || - (rc == 0 && ldata.ld_leh->leh_reccount == 0))) { + rc = lfsck_links_read2_with_rec(env, orphan, &ldata2); + if (likely(rc == -ENODATA || rc == -EINVAL)) { if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) GOTO(unlock, rc = 1); @@ -1050,7 +1058,7 @@ again: rec->rec_type = S_IFDIR; rec->rec_fid = pfid; rc = dt_insert(env, orphan, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, 1); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock, rc); } @@ -1059,7 +1067,7 @@ again: th); } else { if (rc == 0 && count != NULL) - *count = ldata.ld_leh->leh_reccount; + *count = ldata2.ld_leh->leh_reccount; GOTO(unlock, rc); } @@ -1069,7 +1077,7 @@ again: rec->rec_type = lfsck_object_type(orphan) & S_IFMT; rec->rec_fid = cfid; rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)cname->ln_name, th, 1); + (const struct dt_key *)cname->ln_name, th); if (rc == 0 && S_ISDIR(rec->rec_type)) { dt_write_lock(env, parent, 0); rc = dt_ref_add(env, parent, th); @@ -1105,6 +1113,99 @@ log: return rc; } +static int lfsck_lmv_set(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *obj, + struct lmv_mds_md_v1 *lmv) +{ + struct dt_device *dev = lfsck->li_next; + struct thandle *th = NULL; + struct lu_buf buf = { lmv, sizeof(*lmv) }; + int rc; + + ENTRY; + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + rc = dt_declare_xattr_set(env, obj, &buf, XATTR_NAME_LMV".set", 0, th); + if (rc) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LMV".set", 0, th); + if (rc) + GOTO(stop, rc); + + EXIT; +stop: + dt_trans_stop(env, dev, th); + + return rc; +} + +static int lfsck_lmv_delete(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *obj) +{ + struct dt_device *dev = lfsck->li_next; + struct thandle *th = NULL; + int rc; + + ENTRY; + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LMV, th); + if (rc) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th); + if (rc) + GOTO(stop, rc); + + EXIT; +stop: + dt_trans_stop(env, dev, th); + + return rc; +} + +static inline int lfsck_object_is_shard(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *obj, + const struct lu_name *lname) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lmv_mds_md_v1 *lmv = &info->lti_lmv; + int rc; + + rc = lfsck_shard_name_to_index(env, lname->ln_name, lname->ln_namelen, + lfsck_object_type(obj), + lfsck_dto2fid(obj)); + if (rc < 0) + return 0; + + rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv); + if (rc == -ENODATA) + return 0; + + if (!rc && lmv->lmv_magic == LMV_MAGIC_STRIPE) + return 1; + + return rc; +} + /** * Add the specified name entry back to namespace. * @@ -1115,13 +1216,17 @@ log: * it is quite possible that the name entry is lost. Then the LFSCK * should add the name entry back to the namespace. * + * If \a child is shard, which means \a parent is a striped directory, + * if \a parent has LMV, we need to delete it before insertion because + * now parent's striping is broken and can't be parsed correctly. + * * \param[in] env pointer to the thread context * \param[in] com pointer to the lfsck component * \param[in] parent pointer to the directory under which the name entry * will be inserted into * \param[in] child pointer to the object referenced by the name entry * that to be inserted into the parent - * \param[in] name the name for the child in the parent directory + * \param[in] lname the name for the child in the parent directory * * \retval positive number for repaired cases * \retval 0 if nothing to be repaired @@ -1131,19 +1236,26 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, struct lfsck_component *com, struct dt_object *parent, struct dt_object *child, - const char *name) + const struct lu_name *lname) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_attr *la = &info->lti_la; - struct dt_insert_rec *rec = &info->lti_dt_rec; - struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_attr *la = &info->lti_la; + struct dt_insert_rec *rec = &info->lti_dt_rec; + struct lfsck_instance *lfsck = com->lc_lfsck; /* The child and its name may be on different MDTs. */ - const struct lu_fid *pfid = lfsck_dto2fid(parent); - const struct lu_fid *cfid = lfsck_dto2fid(child); - struct dt_device *dev = lfsck->li_next; - struct thandle *th = NULL; - struct lfsck_lock_handle *llh = &info->lti_llh; - int rc = 0; + const struct lu_fid *pfid = lfsck_dto2fid(parent); + const struct lu_fid *cfid = lfsck_dto2fid(child); + struct dt_device *dev = lfsck->li_next; + struct thandle *th = NULL; + struct lfsck_lock_handle *llh = &info->lti_llh; + struct lmv_mds_md_v1 *lmv = &info->lti_lmv; + struct lu_buf buf = { lmv, sizeof(*lmv) }; + /* whether parent's LMV is deleted before insertion */ + bool parent_lmv_deleted = false; + /* whether parent's LMV is missing */ + bool parent_lmv_lost = false; + int rc = 0; + ENTRY; /* @parent/@child may be based on lfsck->li_bottom, @@ -1153,9 +1265,6 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, if (IS_ERR(parent)) GOTO(log, rc = PTR_ERR(parent)); - if (unlikely(!dt_try_as_dir(env, parent))) - GOTO(log, rc = -ENOTDIR); - child = lfsck_object_locate(dev, child); if (IS_ERR(child)) GOTO(log, rc = PTR_ERR(child)); @@ -1163,11 +1272,57 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) GOTO(log, rc = 1); - rc = lfsck_lock(env, lfsck, parent, name, llh, - MDS_INODELOCK_UPDATE, LCK_PW); - if (rc != 0) + rc = lfsck_lock(env, lfsck, parent, lname->ln_name, llh, + MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE | + MDS_INODELOCK_XATTR, LCK_EX); + if (rc) GOTO(log, rc); + rc = lfsck_object_is_shard(env, lfsck, child, lname); + if (rc < 0) + GOTO(unlock, rc); + + if (rc == 1) { + rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv); + if (!rc) { + /* + * To add a shard, we need to convert parent to a + * plain directory by deleting its LMV, and after + * insertion set it back. + */ + rc = lfsck_lmv_delete(env, lfsck, parent); + if (rc) + GOTO(unlock, rc); + parent_lmv_deleted = true; + lmv->lmv_layout_version++; + lfsck_lmv_header_cpu_to_le(lmv, lmv); + } else if (rc == -ENODATA) { + struct lu_seq_range *range = &info->lti_range; + struct seq_server_site *ss = lfsck_dev_site(lfsck); + + rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv); + if (rc) + GOTO(unlock, rc); + + fld_range_set_mdt(range); + rc = fld_server_lookup(env, ss->ss_server_fld, + fid_seq(lfsck_dto2fid(parent)), range); + if (rc) + GOTO(unlock, rc); + + parent_lmv_lost = true; + lmv->lmv_magic = LMV_MAGIC; + lmv->lmv_master_mdt_index = range->lsr_index; + lmv->lmv_layout_version++; + lfsck_lmv_header_cpu_to_le(lmv, lmv); + } else { + GOTO(unlock, rc); + } + } + + if (unlikely(!dt_try_as_dir(env, parent))) + GOTO(unlock, rc = -ENOTDIR); + th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(unlock, rc = PTR_ERR(th)); @@ -1175,7 +1330,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, rec->rec_type = lfsck_object_type(child) & S_IFMT; rec->rec_fid = cfid; rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th); + (const struct dt_key *)lname->ln_name, th); if (rc != 0) GOTO(stop, rc); @@ -1185,8 +1340,14 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, GOTO(stop, rc); } - memset(la, 0, sizeof(*la)); - la->la_ctime = cfs_time_current_sec(); + if (parent_lmv_lost) { + rc = dt_declare_xattr_set(env, parent, &buf, + XATTR_NAME_LMV".set", 0, th); + if (rc) + GOTO(stop, rc); + } + + la->la_ctime = ktime_get_real_seconds(); la->la_valid = LA_CTIME; rc = dt_declare_attr_set(env, parent, la, th); if (rc != 0) @@ -1201,7 +1362,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, GOTO(stop, rc); rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, 1); + (const struct dt_key *)lname->ln_name, th); if (rc != 0) GOTO(stop, rc); @@ -1213,7 +1374,13 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env, GOTO(stop, rc); } - la->la_ctime = cfs_time_current_sec(); + if (parent_lmv_lost) { + rc = dt_xattr_set(env, parent, &buf, XATTR_NAME_LMV".set", 0, + th); + if (rc) + GOTO(stop, rc); + } + rc = dt_attr_set(env, parent, la, th); if (rc != 0) GOTO(stop, rc); @@ -1226,12 +1393,15 @@ stop: dt_trans_stop(env, dev, th); unlock: + if (parent_lmv_deleted) + lfsck_lmv_set(env, lfsck, parent, lmv); + lfsck_unlock(llh); log: CDEBUG(D_LFSCK, "%s: namespace LFSCK insert object "DFID" with " "the name %s and type %o to the parent "DFID": rc = %d\n", - lfsck_lfsck2name(lfsck), PFID(cfid), name, + lfsck_lfsck2name(lfsck), PFID(cfid), lname->ln_name, lfsck_object_type(child) & S_IFMT, PFID(pfid), rc); if (rc != 0) { @@ -1371,11 +1541,8 @@ again: memset(dof, 0, sizeof(*dof)); dof->dof_type = dt_mode_to_dft(S_IFDIR); - rc = linkea_data_new(&ldata, &info->lti_linkea_buf2); - if (rc != 0) - GOTO(unlock1, rc); - - rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent)); + rc = linkea_links_new(&ldata, &info->lti_linkea_buf2, + cname, lfsck_dto2fid(parent)); if (rc != 0) GOTO(unlock1, rc); @@ -1454,13 +1621,13 @@ again: rec->rec_fid = cfid; rc = dt_insert(env, orphan, (const struct dt_rec *)rec, - (const struct dt_key *)dot, th, 1); + (const struct dt_key *)dot, th); if (rc != 0) GOTO(unlock2, rc); rec->rec_fid = lfsck_dto2fid(parent); rc = dt_insert(env, orphan, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, 1); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock2, rc); @@ -1478,7 +1645,7 @@ again: rec->rec_fid = cfid; rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, 1); + (const struct dt_key *)name, th); if (rc == 0) { dt_write_lock(env, parent, 0); rc = dt_ref_add(env, parent, th); @@ -1548,6 +1715,7 @@ static int lfsck_namespace_shrink_linkea(const struct lu_env *env, struct lustre_handle lh = { 0 }; struct linkea_data ldata_new = { NULL }; struct lu_buf linkea_buf; + int buflen = 0; int rc = 0; ENTRY; @@ -1558,20 +1726,27 @@ static int lfsck_namespace_shrink_linkea(const struct lu_env *env, GOTO(log, rc); if (next) - linkea_del_buf(ldata, cname); + lfsck_linkea_del_buf(ldata, cname); else lfsck_namespace_filter_linkea_entry(ldata, cname, pfid, true); - lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf, - ldata->ld_leh->leh_len); + if (ldata->ld_leh->leh_reccount > 0 || + unlikely(ldata->ld_leh->leh_overflow_time)) { + lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf, + ldata->ld_leh->leh_len); + buflen = linkea_buf.lb_len; + } again: th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(unlock1, rc = PTR_ERR(th)); - rc = dt_declare_xattr_set(env, obj, &linkea_buf, - XATTR_NAME_LINK, 0, th); + if (buflen != 0) + rc = dt_declare_xattr_set(env, obj, &linkea_buf, + XATTR_NAME_LINK, 0, th); + else + rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, th); if (rc != 0) GOTO(stop, rc); @@ -1583,9 +1758,9 @@ again: if (unlikely(lfsck_is_dead_obj(obj))) GOTO(unlock2, rc = -ENOENT); - rc = lfsck_links_read2(env, obj, &ldata_new); - if (rc != 0) - GOTO(unlock2, rc); + rc = lfsck_links_read2_with_rec(env, obj, &ldata_new); + if (rc) + GOTO(unlock2, rc = (rc == -ENODATA ? 0 : rc)); /* The specified linkEA entry has been removed by race. */ rc = linkea_links_find(&ldata_new, cname, pfid); @@ -1596,22 +1771,30 @@ again: GOTO(unlock2, rc = 1); if (next) - linkea_del_buf(&ldata_new, cname); + lfsck_linkea_del_buf(&ldata_new, cname); else lfsck_namespace_filter_linkea_entry(&ldata_new, cname, pfid, true); - if (linkea_buf.lb_len < ldata_new.ld_leh->leh_len) { + /* + * linkea may change because it doesn't take lock in the first read, if + * it becomes larger, restart from beginning. + */ + if ((ldata_new.ld_leh->leh_reccount > 0 || + unlikely(ldata_new.ld_leh->leh_overflow_time)) && + buflen < ldata_new.ld_leh->leh_len) { dt_write_unlock(env, obj); dt_trans_stop(env, dev, th); lfsck_buf_init(&linkea_buf, ldata_new.ld_buf->lb_buf, ldata_new.ld_leh->leh_len); + buflen = linkea_buf.lb_len; goto again; } - lfsck_buf_init(&linkea_buf, ldata_new.ld_buf->lb_buf, - ldata_new.ld_leh->leh_len); - rc = dt_xattr_set(env, obj, &linkea_buf, XATTR_NAME_LINK, 0, th); + if (buflen) + rc = lfsck_links_write(env, obj, &ldata_new, th); + else + rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, th); GOTO(unlock2, rc = (rc == 0 ? 1 : rc)); @@ -1853,7 +2036,7 @@ static int lfsck_namespace_replace_cond(const struct lu_env *env, replace: dt_read_lock(env, child, 0); - rc = lfsck_links_read2(env, child, &ldata); + rc = lfsck_links_read2_with_rec(env, child, &ldata); dt_read_unlock(env, child); /* Someone changed the child, no need to replace. */ @@ -1908,7 +2091,7 @@ replace: GOTO(stop, rc); rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, 1); + (const struct dt_key *)name, th); GOTO(stop, rc = (rc == 0 ? 1 : rc)); @@ -2035,7 +2218,7 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct dt_insert_rec *rec = &info->lti_dt_rec; const struct lu_fid *pfid = lfsck_dto2fid(parent); - const struct lu_fid *cfid = lfsck_dto2fid(child); + struct lu_fid cfid = {0}; struct lu_fid tfid; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_device *dev = lfsck->li_next; @@ -2045,6 +2228,8 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, int rc = 0; ENTRY; + if (child) + cfid = *lfsck_dto2fid(child); parent = lfsck_object_locate(dev, parent); if (IS_ERR(parent)) GOTO(log, rc = PTR_ERR(parent)); @@ -2071,7 +2256,8 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, if (update) { rec->rec_type = lfsck_object_type(child) & S_IFMT; - rec->rec_fid = cfid; + LASSERT(!fid_is_zero(&cfid)); + rec->rec_fid = &cfid; rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec, (const struct dt_key *)name2, th); @@ -2079,7 +2265,7 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, GOTO(stop, rc); } - if (dec) { + if (dec && S_ISDIR(type)) { rc = dt_declare_ref_del(env, parent, th); if (rc != 0) GOTO(stop, rc); @@ -2102,7 +2288,7 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, /* Someone has removed the bad name entry and reused it for other * object by race. */ - if (!lu_fid_eq(&tfid, cfid)) + if (!lu_fid_eq(&tfid, &cfid)) GOTO(unlock2, rc = 0); if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) @@ -2115,12 +2301,12 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env, if (update) { rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name2, th, 1); + (const struct dt_key *)name2, th); if (rc != 0) GOTO(unlock2, rc); } - if (dec) { + if (dec && S_ISDIR(type)) { rc = dt_ref_del(env, parent, th); if (rc != 0) GOTO(unlock2, rc); @@ -2137,8 +2323,8 @@ stop: /* We are not sure whether the child will become orphan or not. * Record it in the LFSCK trace file for further checking in * the second-stage scanning. */ - if (!update && !dec && rc == 0) - lfsck_namespace_trace_update(env, com, cfid, + if (!update && !dec && child && rc == 0) + lfsck_namespace_trace_update(env, com, &cfid, LNTF_CHECK_LINKEA, true); unlock1: @@ -2151,7 +2337,7 @@ log: "entry for: parent "DFID", child "DFID", name %s, type " "in name entry %o, type claimed by child %o. repair it " "by %s with new name2 %s: rc = %d\n", - lfsck_lfsck2name(lfsck), PFID(pfid), PFID(cfid), + lfsck_lfsck2name(lfsck), PFID(pfid), PFID(&cfid), name, type, update ? lfsck_object_type(child) : 0, update ? "updating" : "removing", name2, rc); @@ -2201,11 +2387,7 @@ static int lfsck_namespace_repair_unmatched_pairs(const struct lu_env *env, LASSERT(!dt_object_remote(obj)); LASSERT(S_ISDIR(lfsck_object_type(obj))); - rc = linkea_data_new(&ldata, &info->lti_big_buf); - if (rc != 0) - GOTO(log, rc); - - rc = linkea_add_buf(&ldata, cname, pfid); + rc = linkea_links_new(&ldata, &info->lti_big_buf, cname, pfid); if (rc != 0) GOTO(log, rc); @@ -2247,12 +2429,11 @@ static int lfsck_namespace_repair_unmatched_pairs(const struct lu_env *env, dt_delete(env, obj, (const struct dt_key *)dotdot, th); rc = dt_insert(env, obj, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, 1); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock, rc); - rc = dt_xattr_set(env, obj, &linkea_buf, - XATTR_NAME_LINK, 0, th); + rc = lfsck_links_write(env, obj, &ldata, th); GOTO(unlock, rc = (rc == 0 ? 1 : rc)); @@ -2357,6 +2538,7 @@ lfsck_namespace_dsd_orphan(const struct lu_env *env, * \param[out] type to tell the caller what the inconsistency is * \param[in] retry if found inconsistency, but the caller does not hold * ldlm lock on the @child, then set @retry as true + * \param[in] unknown set if does not know how to repair the inconsistency * * \retval positive number for repaired cases * \retval 0 if nothing to be repaired @@ -2370,7 +2552,7 @@ lfsck_namespace_dsd_single(const struct lu_env *env, struct linkea_data *ldata, struct lustre_handle *lh, enum lfsck_namespace_inconsistency_type *type, - bool *retry) + bool *retry, bool *unknown) { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_name *cname = &info->lti_name; @@ -2383,9 +2565,11 @@ lfsck_namespace_dsd_single(const struct lu_env *env, int rc = 0; ENTRY; - lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid, info->lti_key); + rc = lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid, + info->lti_key, + sizeof(info->lti_key)); /* The unique linkEA entry with bad parent will be handled as orphan. */ - if (!fid_is_sane(&tfid)) { + if (rc != 0) { if (!lustre_handle_is_used(lh) && retry != NULL) *retry = true; else @@ -2425,7 +2609,7 @@ lfsck_namespace_dsd_single(const struct lu_env *env, lost_parent: lmv = &info->lti_lmv; - rc = lfsck_read_stripe_lmv(env, child, lmv); + rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv); if (rc != 0 && rc != -ENODATA) GOTO(out, rc); @@ -2454,7 +2638,7 @@ lost_parent: if (rc >= 0) { /* Add the missing name entry to the parent. */ rc = lfsck_namespace_insert_normal(env, com, parent, - child, cname->ln_name); + child, cname); if (unlikely(rc == -EEXIST)) { /* Unfortunately, someone reused the name * under the parent by race. So we have @@ -2479,7 +2663,7 @@ lost_parent: } GOTO(out, rc); - } + } /* !dt_object_exists(parent) */ /* The unique linkEA entry with bad parent will be handled as orphan. */ if (unlikely(!dt_try_as_dir(env, parent))) { @@ -2512,7 +2696,8 @@ lost_parent: } lfsck_ibits_unlock(lh, LCK_EX); - rc = lfsck_namespace_check_name(env, parent, child, cname); + rc = lfsck_namespace_check_name(env, lfsck, parent, child, + cname); if (rc == -ENOENT) goto lost_parent; @@ -2536,7 +2721,7 @@ lost_parent: /* Add the missing name entry back to the namespace. */ rc = lfsck_namespace_insert_normal(env, com, parent, child, - cname->ln_name); + cname); if (unlikely(rc == -ESTALE)) /* It may happen when the remote object has been * removed, but the local MDT is not aware of that. */ @@ -2565,7 +2750,7 @@ lost_parent: } GOTO(out, rc); - } + } /* rc == -ENOENT */ if (rc != 0) GOTO(out, rc); @@ -2590,8 +2775,18 @@ lost_parent: GOTO(out, rc); } - if (fid_is_zero(pfid)) + /* Zero FID may because the remote directroy object has invalid linkEA, + * or lost linkEA. Under such case, the LFSCK on this MDT does not know + * how to repair the inconsistency, but the namespace LFSCK on the MDT + * where its name entry resides may has more information (name, FID) to + * repair such inconsistency. So here, keep the inconsistency to avoid + * some imporper repairing. */ + if (fid_is_zero(pfid)) { + if (unknown) + *unknown = true; + GOTO(out, rc = 0); + } /* The ".." name entry is wrong, update it. */ if (!lu_fid_eq(pfid, lfsck_dto2fid(parent))) { @@ -2633,6 +2828,7 @@ out: * \param[in,out] lh ldlm lock handler for the given @child * \param[out] type to tell the caller what the inconsistency is * \param[in] lpf true if the ".." entry is under lost+found/MDTxxxx/ + * \param[in] unknown set if does not know how to repair the inconsistency * * \retval positive number for repaired cases * \retval 0 if nothing to be repaired @@ -2646,7 +2842,7 @@ lfsck_namespace_dsd_multiple(const struct lu_env *env, struct linkea_data *ldata, struct lustre_handle *lh, enum lfsck_namespace_inconsistency_type *type, - bool lpf) + bool lpf, bool *unknown) { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_name *cname = &info->lti_name; @@ -2659,24 +2855,24 @@ lfsck_namespace_dsd_multiple(const struct lu_env *env, struct dt_object *parent = NULL; struct linkea_data ldata_new = { NULL }; int dirent_count = 0; - int linkea_count = 0; int rc = 0; bool once = true; ENTRY; again: while (ldata->ld_lee != NULL) { - lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid, - info->lti_key); - /* Drop repeated linkEA entries. */ - lfsck_namespace_filter_linkea_entry(ldata, cname, &tfid, true); + rc = lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid, + info->lti_key, + sizeof(info->lti_key)); /* Drop invalid linkEA entry. */ - if (!fid_is_sane(&tfid)) { - linkea_del_buf(ldata, cname); - linkea_count++; + if (rc != 0) { + lfsck_linkea_del_buf(ldata, cname); continue; } + /* Drop repeated linkEA entries. */ + lfsck_namespace_filter_linkea_entry(ldata, cname, &tfid, true); + /* If current dotdot is the .lustre/lost+found/MDTxxxx/, * then it is possible that: the directry object has ever * been lost, but its name entry was there. In the former @@ -2690,7 +2886,8 @@ again: * When the LFSCK runs again, if the dangling name is still * there, the LFSCK should move the orphan directory object * back to the normal namespace. */ - if (!lpf && !lu_fid_eq(pfid, &tfid) && once) { + if (!lpf && !fid_is_zero(pfid) && + !lu_fid_eq(pfid, &tfid) && once) { linkea_next_entry(ldata); continue; } @@ -2706,8 +2903,7 @@ again: * there is still other chance to make the * child to be visible via other parent, then * remove this linkEA entry. */ - linkea_del_buf(ldata, cname); - linkea_count++; + lfsck_linkea_del_buf(ldata, cname); continue; } @@ -2717,8 +2913,7 @@ again: /* The linkEA entry with bad parent will be removed. */ if (unlikely(!dt_try_as_dir(env, parent))) { lfsck_object_put(env, parent); - linkea_del_buf(ldata, cname); - linkea_count++; + lfsck_linkea_del_buf(ldata, cname); continue; } @@ -2739,7 +2934,11 @@ again: if (lu_fid_eq(&tfid, cfid)) { lfsck_object_put(env, parent); - if (!lu_fid_eq(pfid, pfid2)) { + /* If the parent (that is declared via linkEA entry) + * directory contains the specified child, but such + * parent does not match the dotdot name entry, then + * trust the linkEA. */ + if (!fid_is_zero(pfid) && !lu_fid_eq(pfid, pfid2)) { *type = LNIT_UNMATCHED_PAIRS; rc = lfsck_namespace_repair_unmatched_pairs(env, com, child, pfid2, cname); @@ -2751,11 +2950,8 @@ rebuild: /* It is the most common case that we find the * name entry corresponding to the linkEA entry * that matches the ".." name entry. */ - rc = linkea_data_new(&ldata_new, &info->lti_big_buf); - if (rc != 0) - RETURN(rc); - - rc = linkea_add_buf(&ldata_new, cname, pfid2); + rc = linkea_links_new(&ldata_new, &info->lti_big_buf, + cname, pfid2); if (rc != 0) RETURN(rc); @@ -2764,15 +2960,15 @@ rebuild: if (rc < 0) RETURN(rc); - linkea_del_buf(ldata, cname); - linkea_count++; + lfsck_linkea_del_buf(ldata, cname); linkea_first_entry(ldata); /* There may be some invalid dangling name entries under * other parent directories, remove all of them. */ while (ldata->ld_lee != NULL) { - lfsck_namespace_unpack_linkea_entry(ldata, - cname, &tfid, info->lti_key); - if (!fid_is_sane(&tfid)) + rc = lfsck_namespace_unpack_linkea_entry(ldata, + cname, &tfid, info->lti_key, + sizeof(info->lti_key)); + if (rc != 0) goto next; parent = lfsck_object_find_bottom(env, lfsck, @@ -2805,13 +3001,13 @@ rebuild: dirent_count += rc; next: - linkea_del_buf(ldata, cname); + lfsck_linkea_del_buf(ldata, cname); } ns->ln_dirent_repaired += dirent_count; RETURN(rc); - } + } /* lu_fid_eq(&tfid, lfsck_dto2fid(child)) */ lfsck_ibits_unlock(lh, LCK_EX); /* The name entry references another MDT-object that may be @@ -2826,17 +3022,17 @@ next: if (rc > 0) goto rebuild; - linkea_del_buf(ldata, cname); - } + lfsck_linkea_del_buf(ldata, cname); + } /* while (ldata->ld_lee != NULL) */ + + /* If there is still linkEA overflow, return. */ + if (unlikely(ldata->ld_leh->leh_overflow_time)) + RETURN(0); linkea_first_entry(ldata); if (ldata->ld_leh->leh_reccount == 1) { rc = lfsck_namespace_dsd_single(env, com, child, pfid, ldata, - lh, type, NULL); - - if (rc == 0 && fid_is_zero(pfid) && linkea_count > 0) - rc = lfsck_namespace_rebuild_linkea(env, com, child, - ldata); + lh, type, NULL, unknown); RETURN(rc); } @@ -2874,7 +3070,7 @@ next: * * If all the known name entries have been verified, then the object's hard * link attribute should match the object's linkEA entries count unless the - * object's has too much hard link to be recorded in the linkEA. Such cases + * object's has too many hard link to be recorded in the linkEA. Such cases * should have been marked in the LFSCK trace file. Otherwise, trust the * linkEA to update the object's nlink attribute. * @@ -2893,8 +3089,6 @@ static int lfsck_namespace_repair_nlink(const struct lu_env *env, struct dt_object *obj, struct lu_attr *la) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_fid *tfid = &info->lti_fid3; struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_device *dev = lfsck_obj2dev(obj); @@ -2903,13 +3097,10 @@ static int lfsck_namespace_repair_nlink(const struct lu_env *env, struct linkea_data ldata = { NULL }; struct lustre_handle lh = { 0 }; __u32 old = la->la_nlink; - int idx; int rc = 0; - __u8 flags; ENTRY; LASSERT(!dt_object_remote(obj)); - LASSERT(S_ISREG(lfsck_object_type(obj))); rc = lfsck_ibits_lock(env, lfsck, obj, &lh, MDS_INODELOCK_UPDATE, LCK_PW); @@ -2940,26 +3131,20 @@ static int lfsck_namespace_repair_nlink(const struct lu_env *env, if (ns->ln_flags & LF_INCOMPLETE) GOTO(unlock, rc = 0); - fid_cpu_to_be(tfid, cfid); - idx = lfsck_sub_trace_file_fid2idx(cfid); - rc = dt_lookup(env, com->lc_sub_trace_objs[idx].lsto_obj, - (struct dt_rec *)&flags, (const struct dt_key *)tfid); - if (rc != 0) - GOTO(unlock, rc); - - if (flags & LNTF_SKIP_NLINK) - GOTO(unlock, rc = 0); - rc = dt_attr_get(env, obj, la); if (rc != 0) GOTO(unlock, rc = (rc == -ENOENT ? 0 : rc)); - rc = lfsck_links_read2(env, obj, &ldata); - if (rc != 0) + rc = lfsck_links_read2_with_rec(env, obj, &ldata); + if (rc) GOTO(unlock, rc = (rc == -ENODATA ? 0 : rc)); - if (la->la_nlink == ldata.ld_leh->leh_reccount || - unlikely(la->la_nlink == 0)) + /* XXX: Currently, we only update the nlink attribute if the known + * linkEA entries is larger than the nlink attribute. That is + * safe action. */ + if (la->la_nlink >= ldata.ld_leh->leh_reccount || + unlikely(la->la_nlink == 0 || + ldata.ld_leh->leh_overflow_time)) GOTO(unlock, rc = 0); la->la_nlink = ldata.ld_leh->leh_reccount; @@ -3170,13 +3355,13 @@ lock: } GOTO(out, rc); - } + } /* rc != 0 */ linkea_first_entry(&ldata); /* This is the most common case: the object has unique linkEA entry. */ if (ldata.ld_leh->leh_reccount == 1) { rc = lfsck_namespace_dsd_single(env, com, child, pfid, &ldata, - &lh, &type, &retry); + &lh, &type, &retry, &unknown); if (retry) { LASSERT(!lustre_handle_is_used(&lh)); @@ -3208,7 +3393,7 @@ lock: * but the LFSCK cannot aware that at that time, then it adds * the bad linkEA entry for further processing. */ rc = lfsck_namespace_dsd_multiple(env, com, child, pfid, &ldata, - &lh, &type, lpf); + &lh, &type, lpf, &unknown); GOTO(out, rc); @@ -3236,6 +3421,251 @@ out: return rc; } +static inline bool +lfsck_namespace_linkea_stale_overflow(struct linkea_data *ldata, + struct lfsck_namespace *ns) +{ + /* Both the leh_overflow_time and ln_time_latest_reset are + * local time based, so need NOT to care about clock drift + * among the servers. */ + return ldata->ld_leh->leh_overflow_time && + ldata->ld_leh->leh_overflow_time < ns->ln_time_latest_reset; +} + +/** + * Clear the object's linkEA overflow timestamp. + * + * If the MDT-object has too many hard links as to the linkEA cannot hold + * all of them, then overflow timestamp will be set in the linkEA header. + * If some hard links are removed after that, then it is possible to hold + * other missed linkEA entries. If the namespace LFSCK have added all the + * related linkEA entries, then it will remove the overflow timestamp. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] ldata pointer to the linkEA data for the given @obj + * \param[in] obj pointer to the dt_object to be handled + * + * \retval positive number for repaired cases + * \retval 0 if nothing to be repaired + * \retval negative error number on failure + */ +static int lfsck_namespace_linkea_clear_overflow(const struct lu_env *env, + struct lfsck_component *com, + struct linkea_data *ldata, + struct dt_object *obj) +{ + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_device *dev = lfsck_obj2dev(obj); + struct thandle *th = NULL; + struct lustre_handle lh = { 0 }; + struct lu_buf linkea_buf; + int rc = 0; + ENTRY; + + LASSERT(!dt_object_remote(obj)); + + rc = lfsck_ibits_lock(env, lfsck, obj, &lh, + MDS_INODELOCK_UPDATE, LCK_PW); + if (rc != 0) + GOTO(log, rc); + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + GOTO(log, rc = PTR_ERR(th)); + + rc = dt_declare_xattr_set(env, obj, + lfsck_buf_get_const(env, NULL, MAX_LINKEA_SIZE), + XATTR_NAME_LINK, 0, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, obj, 0); + rc = lfsck_links_read(env, obj, ldata); + if (rc != 0) + GOTO(unlock, rc); + + if (unlikely(!lfsck_namespace_linkea_stale_overflow(ldata, ns))) + GOTO(unlock, rc = 0); + + ldata->ld_leh->leh_overflow_time = 0; + if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) + GOTO(unlock, rc = 1); + + /* If all known entries are in the linkEA, then the 'leh_reccount' + * should NOT be zero. */ + LASSERT(ldata->ld_leh->leh_reccount > 0); + + lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf, + ldata->ld_leh->leh_len); + rc = dt_xattr_set(env, obj, &linkea_buf, XATTR_NAME_LINK, 0, th); + if (unlikely(rc == -ENOSPC)) + rc = 0; + else if (!rc) + rc = 1; + + GOTO(unlock, rc); + +unlock: + dt_write_unlock(env, obj); + +stop: + dt_trans_stop(env, dev, th); + +log: + lfsck_ibits_unlock(&lh, LCK_PW); + CDEBUG(D_LFSCK, "%s: clear linkea overflow timestamp for the object " + DFID": rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc); + + return rc; +} + +/** + * Verify the object's agent entry. + * + * If the object claims to have agent entry but the linkEA does not contain + * remote parent, then remove the agent entry. Otherwise, if the object has + * no agent entry but its linkEA contains remote parent, then will generate + * agent entry for it. + * + * \param[in] env pointer to the thread context + * \param[in] com pointer to the lfsck component + * \param[in] obj pointer to the dt_object to be handled + * + * \retval positive number for repaired cases + * \retval 0 if nothing to be repaired + * \retval negative error number on failure + */ +static int lfsck_namespace_check_agent_entry(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *obj) +{ + struct linkea_data ldata = { NULL }; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lu_fid *pfid = &info->lti_fid2; + struct lu_name *cname = &info->lti_name; + struct lu_seq_range *range = &info->lti_range; + struct seq_server_site *ss = lfsck_dev_site(lfsck); + __u32 idx = lfsck_dev_idx(lfsck); + int rc; + bool remote = false; + ENTRY; + + if (!(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) + RETURN(0); + + rc = lfsck_links_read_with_rec(env, obj, &ldata); + if (rc == -ENOENT || rc == -ENODATA) + RETURN(0); + + if (rc && rc != -EINVAL) + GOTO(out, rc); + + /* We check the agent entry again after verifying the linkEA + * successfully. So invalid linkEA should be dryrun mode. */ + if (rc == -EINVAL || unlikely(!ldata.ld_leh->leh_reccount)) + RETURN(0); + + linkea_first_entry(&ldata); + while (ldata.ld_lee != NULL && !remote) { + linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, + cname, pfid); + if (!linkea_entry_is_valid(&ldata, cname, pfid)) + GOTO(out, rc = 0); + + fld_range_set_mdt(range); + rc = fld_server_lookup(env, ss->ss_server_fld, + fid_seq(pfid), range); + if (rc) + GOTO(out, rc = (rc == -ENOENT ? 0 : rc)); + + if (range->lsr_index != idx) + remote = true; + else + linkea_next_entry(&ldata); + } + + if ((lu_object_has_agent_entry(&obj->do_lu) && !remote) || + (!lu_object_has_agent_entry(&obj->do_lu) && remote)) { + struct dt_device *dev = lfsck_obj2dev(obj); + struct linkea_data ldata2 = { NULL }; + struct lustre_handle lh = { 0 }; + struct lu_buf linkea_buf; + struct thandle *handle; + + if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN) + GOTO(out, rc = 1); + + rc = lfsck_ibits_lock(env, lfsck, obj, &lh, + MDS_INODELOCK_UPDATE | + MDS_INODELOCK_XATTR, LCK_EX); + if (rc) + GOTO(out, rc); + + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(unlock, rc = PTR_ERR(handle)); + + lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, + ldata.ld_leh->leh_len); + rc = dt_declare_xattr_set(env, obj, &linkea_buf, + XATTR_NAME_LINK, LU_XATTR_REPLACE, handle); + if (rc) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, handle); + if (rc) + GOTO(stop, rc); + + dt_write_lock(env, obj, 0); + rc = lfsck_links_read2_with_rec(env, obj, &ldata2); + if (rc) { + if (rc == -ENOENT || rc == -ENODATA) + rc = 0; + GOTO(unlock2, rc); + } + + /* If someone changed linkEA by race, then the agent + * entry will be updated by lower layer automatically. */ + if (ldata.ld_leh->leh_len != ldata2.ld_leh->leh_len || + memcmp(ldata.ld_buf->lb_buf, ldata2.ld_buf->lb_buf, + ldata.ld_leh->leh_len) != 0) + GOTO(unlock2, rc = 0); + + rc = dt_xattr_set(env, obj, &linkea_buf, XATTR_NAME_LINK, + LU_XATTR_REPLACE, handle); + if (!rc) + rc = 1; + + GOTO(unlock2, rc); + +unlock2: + dt_write_unlock(env, obj); +stop: + dt_trans_stop(env, dev, handle); +unlock: + lfsck_ibits_unlock(&lh, LCK_EX); + } + + GOTO(out, rc); + +out: + if (rc > 0) + ns->ln_agent_entries_repaired++; + if (rc) + CDEBUG(D_LFSCK, "%s: repair agent entry for "DFID": rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc); + return rc; +} + /** * Double scan the MDT-object for namespace LFSCK. * @@ -3283,47 +3713,76 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env, if (S_ISDIR(lfsck_object_type(child))) { dt_read_unlock(env, child); rc = lfsck_namespace_double_scan_dir(env, com, child, flags); + if (!rc && flags & LNTF_CHECK_AGENT_ENTRY) + rc = lfsck_namespace_check_agent_entry(env, com, child); RETURN(rc); } rc = lfsck_links_read(env, child, &ldata); dt_read_unlock(env, child); + + if (rc == -EINVAL) { + struct lustre_handle lh = { 0 }; + + rc = lfsck_ibits_lock(env, com->lc_lfsck, child, &lh, + MDS_INODELOCK_UPDATE | + MDS_INODELOCK_XATTR, LCK_EX); + if (rc == 0) { + rc = lfsck_namespace_links_remove(env, com, child); + lfsck_ibits_unlock(&lh, LCK_EX); + } + + GOTO(out, rc); + } + if (rc != 0) GOTO(out, rc); + if (!(ns->ln_flags & LF_INCOMPLETE) && + unlikely(lfsck_namespace_linkea_stale_overflow(&ldata, ns))) { + rc = lfsck_namespace_linkea_clear_overflow(env, com, &ldata, + child); + if (rc < 0) + GOTO(out, rc); + + if (rc > 0) + ns->ln_linkea_overflow_cleared++; + } + linkea_first_entry(&ldata); while (ldata.ld_lee != NULL) { - lfsck_namespace_unpack_linkea_entry(&ldata, cname, pfid, - info->lti_key); - rc = lfsck_namespace_filter_linkea_entry(&ldata, cname, pfid, - false); - /* Found repeated linkEA entries */ - if (rc > 0) { + rc = lfsck_namespace_unpack_linkea_entry(&ldata, cname, pfid, + info->lti_key, + sizeof(info->lti_key)); + /* Invalid PFID in the linkEA entry. */ + if (rc != 0) { rc = lfsck_namespace_shrink_linkea(env, com, child, - &ldata, cname, pfid, false); + &ldata, cname, pfid, true); if (rc < 0) GOTO(out, rc); - if (rc == 0) - continue; - - repaired = true; + if (rc > 0) + repaired = true; - /* fall through */ + continue; } - /* Invalid PFID in the linkEA entry. */ - if (!fid_is_sane(pfid)) { + rc = lfsck_namespace_filter_linkea_entry(&ldata, cname, pfid, + false); + /* Found repeated linkEA entries */ + if (rc > 0) { rc = lfsck_namespace_shrink_linkea(env, com, child, - &ldata, cname, pfid, true); + &ldata, cname, pfid, false); if (rc < 0) GOTO(out, rc); - if (rc > 0) - repaired = true; + if (rc == 0) + continue; - continue; + repaired = true; + + /* fall through */ } parent = lfsck_object_find_bottom(env, lfsck, pfid); @@ -3371,7 +3830,7 @@ lost_parent: /* Add the missing name entry to the parent. */ rc = lfsck_namespace_insert_normal(env, com, - parent, child, cname->ln_name); + parent, child, cname); if (unlikely(rc == -EEXIST)) /* Unfortunately, someone reused the * name under the parent by race. So we @@ -3396,7 +3855,7 @@ lost_parent: repaired = true; continue; - } + } /* !dt_object_exists(parent) */ /* The linkEA entry with bad parent will be removed. */ if (unlikely(!dt_try_as_dir(env, parent))) { @@ -3456,12 +3915,14 @@ lost_parent: continue; } + /* The following handles -ENOENT case */ + rc = dt_attr_get(env, child, la); if (rc != 0) GOTO(out, rc); /* If there is no name entry in the parent dir and the object - * link count is less than the linkea entries count, then the + * link count is fewer than the linkea entries count, then the * linkea entry should be removed. */ if (ldata.ld_leh->leh_reccount > la->la_nlink) { rc = lfsck_namespace_shrink_linkea_cond(env, com, @@ -3489,7 +3950,8 @@ lost_parent: GOTO(out, rc = 0); } - rc = lfsck_namespace_check_name(env, parent, child, cname); + rc = lfsck_namespace_check_name(env, lfsck, parent, child, + cname); if (rc == -ENOENT) goto lost_parent; @@ -3515,7 +3977,7 @@ lost_parent: /* Add the missing name entry back to the namespace. */ rc = lfsck_namespace_insert_normal(env, com, parent, child, - cname->ln_name); + cname); if (unlikely(rc == -ESTALE)) /* It may happen when the remote object has been * removed, but the local MDT is not aware of that. */ @@ -3551,11 +4013,8 @@ out: if (rc < 0 && rc != -ENODATA) return rc; - if (rc == 0) { - LASSERT(ldata.ld_leh != NULL); - + if (rc == 0 && ldata.ld_leh != NULL) count = ldata.ld_leh->leh_reccount; - } if (count == 0) { /* If the LFSCK is marked as LF_INCOMPLETE, then means some @@ -3565,7 +4024,9 @@ out: * other MDT that references this object with another name, * so we cannot know whether this linkEA is valid or not. * So keep it there and maybe resolved when next LFSCK run. */ - if (!(ns->ln_flags & LF_INCOMPLETE)) { + if (!(ns->ln_flags & LF_INCOMPLETE) && + (ldata.ld_leh == NULL || + !ldata.ld_leh->leh_overflow_time)) { /* If the child becomes orphan, then insert it into * the global .lustre/lost+found/MDTxxxx directory. */ rc = lfsck_namespace_insert_orphan(env, com, child, @@ -3584,10 +4045,24 @@ out: return rc; if (la->la_nlink != 0 && la->la_nlink != count) { - rc = lfsck_namespace_repair_nlink(env, com, child, la); - if (rc > 0) { - ns->ln_objs_nlink_repaired++; - rc = 0; + if (unlikely(!S_ISREG(lfsck_object_type(child)) && + !S_ISLNK(lfsck_object_type(child)))) { + CDEBUG(D_LFSCK, "%s: namespace LFSCK finds " + "the object "DFID"'s nlink count %d " + "does not match linkEA count %d, " + "type %o, skip it.\n", + lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(child)), + la->la_nlink, count, + lfsck_object_type(child)); + } else if (la->la_nlink < count && + likely(!ldata.ld_leh->leh_overflow_time)) { + rc = lfsck_namespace_repair_nlink(env, com, + child, la); + if (rc > 0) { + ns->ln_objs_nlink_repaired++; + rc = 0; + } } } } @@ -3600,6 +4075,9 @@ out: rc = 1; } + if (!rc && flags & LNTF_CHECK_AGENT_ENTRY) + rc = lfsck_namespace_check_agent_entry(env, com, child); + return rc; } @@ -3607,79 +4085,88 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m, struct lfsck_namespace *ns, __u64 checked_phase1, __u64 checked_phase2, - __u32 time_phase1, - __u32 time_phase2) + time64_t time_phase1, + time64_t time_phase2, bool dryrun) { - seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "updated_phase1: "LPU64"\n" - "updated_phase2: "LPU64"\n" - "failed_phase1: "LPU64"\n" - "failed_phase2: "LPU64"\n" - "directories: "LPU64"\n" - "dirent_repaired: "LPU64"\n" - "linkea_repaired: "LPU64"\n" - "nlinks_repaired: "LPU64"\n" - "multiple_linked_checked: "LPU64"\n" - "multiple_linked_repaired: "LPU64"\n" - "unknown_inconsistency: "LPU64"\n" - "unmatched_pairs_repaired: "LPU64"\n" - "dangling_repaired: "LPU64"\n" - "multiple_referenced_repaired: "LPU64"\n" - "bad_file_type_repaired: "LPU64"\n" - "lost_dirent_repaired: "LPU64"\n" - "local_lost_found_scanned: "LPU64"\n" - "local_lost_found_moved: "LPU64"\n" - "local_lost_found_skipped: "LPU64"\n" - "local_lost_found_failed: "LPU64"\n" - "striped_dirs_scanned: "LPU64"\n" - "striped_dirs_repaired: "LPU64"\n" - "striped_dirs_failed: "LPU64"\n" - "striped_dirs_disabled: "LPU64"\n" - "striped_dirs_skipped: "LPU64"\n" - "striped_shards_scanned: "LPU64"\n" - "striped_shards_repaired: "LPU64"\n" - "striped_shards_failed: "LPU64"\n" - "striped_shards_skipped: "LPU64"\n" - "name_hash_repaired: "LPU64"\n" - "success_count: %u\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n", - checked_phase1, - checked_phase2, - ns->ln_items_repaired, - ns->ln_objs_repaired_phase2, - ns->ln_items_failed, - ns->ln_objs_failed_phase2, - ns->ln_dirs_checked, - ns->ln_dirent_repaired, - ns->ln_linkea_repaired, - ns->ln_objs_nlink_repaired, - ns->ln_mul_linked_checked, - ns->ln_mul_linked_repaired, - ns->ln_unknown_inconsistency, - ns->ln_unmatched_pairs_repaired, - ns->ln_dangling_repaired, - ns->ln_mul_ref_repaired, - ns->ln_bad_type_repaired, - ns->ln_lost_dirent_repaired, - ns->ln_local_lpf_scanned, - ns->ln_local_lpf_moved, - ns->ln_local_lpf_skipped, - ns->ln_local_lpf_failed, - ns->ln_striped_dirs_scanned, - ns->ln_striped_dirs_repaired, - ns->ln_striped_dirs_failed, - ns->ln_striped_dirs_disabled, - ns->ln_striped_dirs_skipped, - ns->ln_striped_shards_scanned, - ns->ln_striped_shards_repaired, - ns->ln_striped_shards_failed, - ns->ln_striped_shards_skipped, - ns->ln_name_hash_repaired, - ns->ln_success_count, - time_phase1, - time_phase2); + const char *postfix = dryrun ? "inconsistent" : "repaired"; + + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" + "%s_phase1: %llu\n" + "%s_phase2: %llu\n" + "failed_phase1: %llu\n" + "failed_phase2: %llu\n" + "directories: %llu\n" + "dirent_%s: %llu\n" + "linkea_%s: %llu\n" + "nlinks_%s: %llu\n" + "multiple_linked_checked: %llu\n" + "multiple_linked_%s: %llu\n" + "unknown_inconsistency: %llu\n" + "unmatched_pairs_%s: %llu\n" + "dangling_%s: %llu\n" + "multiple_referenced_%s: %llu\n" + "bad_file_type_%s: %llu\n" + "lost_dirent_%s: %llu\n" + "local_lost_found_scanned: %llu\n" + "local_lost_found_moved: %llu\n" + "local_lost_found_skipped: %llu\n" + "local_lost_found_failed: %llu\n" + "striped_dirs_scanned: %llu\n" + "striped_dirs_%s: %llu\n" + "striped_dirs_failed: %llu\n" + "striped_dirs_disabled: %llu\n" + "striped_dirs_skipped: %llu\n" + "striped_shards_scanned: %llu\n" + "striped_shards_%s: %llu\n" + "striped_shards_failed: %llu\n" + "striped_shards_skipped: %llu\n" + "name_hash_%s: %llu\n" + "linkea_overflow_%s: %llu\n" + "agent_entries_%s: %llu\n" + "success_count: %u\n" + "run_time_phase1: %lld seconds\n" + "run_time_phase2: %lld seconds\n", + checked_phase1, + checked_phase2, + dryrun ? "inconsistent" : "updated", + ns->ln_items_repaired, + dryrun ? "inconsistent" : "updated", + ns->ln_objs_repaired_phase2, + ns->ln_items_failed, + ns->ln_objs_failed_phase2, + ns->ln_dirs_checked, + postfix, ns->ln_dirent_repaired, + postfix, ns->ln_linkea_repaired, + postfix, ns->ln_objs_nlink_repaired, + ns->ln_mul_linked_checked, + postfix, ns->ln_mul_linked_repaired, + ns->ln_unknown_inconsistency, + postfix, ns->ln_unmatched_pairs_repaired, + postfix, ns->ln_dangling_repaired, + postfix, ns->ln_mul_ref_repaired, + postfix, ns->ln_bad_type_repaired, + postfix, ns->ln_lost_dirent_repaired, + ns->ln_local_lpf_scanned, + ns->ln_local_lpf_moved, + ns->ln_local_lpf_skipped, + ns->ln_local_lpf_failed, + ns->ln_striped_dirs_scanned, + postfix, ns->ln_striped_dirs_repaired, + ns->ln_striped_dirs_failed, + ns->ln_striped_dirs_disabled, + ns->ln_striped_dirs_skipped, + ns->ln_striped_shards_scanned, + postfix, ns->ln_striped_shards_repaired, + ns->ln_striped_shards_failed, + ns->ln_striped_shards_skipped, + postfix, ns->ln_name_hash_repaired, + dryrun ? "inconsistent" : "cleared", + ns->ln_linkea_overflow_cleared, + postfix, ns->ln_agent_entries_repaired, + ns->ln_success_count, + time_phase1, + time_phase2); } static void lfsck_namespace_release_lmv(const struct lu_env *env, @@ -3741,7 +4228,6 @@ static int lfsck_namespace_reset(const struct lu_env *env, struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_assistant_data *lad = com->lc_data; struct dt_object *root; - struct dt_object *dto; int rc; ENTRY; @@ -3757,7 +4243,7 @@ static int lfsck_namespace_reset(const struct lu_env *env, memset(ns, 0, sizeof(*ns)); } else { __u32 count = ns->ln_success_count; - __u64 last_time = ns->ln_time_last_complete; + time64_t last_time = ns->ln_time_last_complete; memset(ns, 0, sizeof(*ns)); ns->ln_success_count = count; @@ -3765,23 +4251,23 @@ static int lfsck_namespace_reset(const struct lu_env *env, } ns->ln_magic = LFSCK_NAMESPACE_MAGIC; ns->ln_status = LS_INIT; + ns->ln_time_latest_reset = ktime_get_real_seconds(); - lfsck_object_put(env, com->lc_obj); - com->lc_obj = NULL; - dto = lfsck_namespace_load_one_trace_file(env, com, root, - LFSCK_NAMESPACE, true); - if (IS_ERR(dto)) - GOTO(out, rc = PTR_ERR(dto)); + rc = lfsck_load_one_trace_file(env, com, root, &com->lc_obj, + &dt_lfsck_namespace_features, + LFSCK_NAMESPACE, true); + if (rc) + GOTO(out, rc); - com->lc_obj = dto; - rc = lfsck_namespace_load_sub_trace_files(env, com, true); + rc = lfsck_load_sub_trace_files(env, com, &dt_lfsck_namespace_features, + LFSCK_NAMESPACE, true); if (rc != 0) GOTO(out, rc); - lad->lad_incomplete = 0; + clear_bit(LAD_INCOMPLETE, &lad->lad_flags); CFS_RESET_BITMAP(lad->lad_bitmap); - rc = lfsck_namespace_store(env, com, true); + rc = lfsck_namespace_store(env, com); GOTO(out, rc); @@ -3818,14 +4304,19 @@ static void lfsck_namespace_close_dir(const struct lu_env *env, struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_lmv *llmv = lfsck->li_lmv; struct lfsck_namespace_req *lnr; - __u32 size = - sizeof(*lnr) + LFSCK_TMPBUF_LEN; - bool wakeup = false; + struct lu_attr *la = &lfsck_env_info(env)->lti_la2; + __u32 size = sizeof(*lnr) + LFSCK_TMPBUF_LEN; + int rc; + bool wakeup = false; ENTRY; if (llmv == NULL) RETURN_EXIT; + rc = dt_attr_get(env, lfsck->li_obj_dir, la); + if (rc) + RETURN_EXIT; + OBD_ALLOC(lnr, size); if (lnr == NULL) { ns->ln_striped_dirs_skipped++; @@ -3834,7 +4325,7 @@ static void lfsck_namespace_close_dir(const struct lu_env *env, } lso = lfsck_assistant_object_init(env, lfsck_dto2fid(lfsck->li_obj_dir), - NULL, lfsck->li_pos_current.lp_oit_cookie, true); + la, lfsck->li_pos_current.lp_oit_cookie, true); if (IS_ERR(lso)) { OBD_FREE(lnr, size); ns->ln_striped_dirs_skipped++; @@ -3850,9 +4341,12 @@ static void lfsck_namespace_close_dir(const struct lu_env *env, lnr->lnr_fid = *lfsck_dto2fid(lfsck->li_obj_dir); lnr->lnr_dir_cookie = MDS_DIR_END_OFF; lnr->lnr_size = size; + lnr->lnr_type = lso->lso_attr.la_mode; spin_lock(&lad->lad_lock); - if (lad->lad_assistant_status < 0) { + if (lad->lad_assistant_status < 0 || + unlikely(!thread_is_running(&lfsck->li_thread) || + !thread_is_running(&lad->lad_thread))) { spin_unlock(&lad->lad_lock); lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar); ns->ln_striped_dirs_skipped++; @@ -3919,19 +4413,19 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env, ns->ln_pos_latest_start = lfsck->li_pos_checkpoint; } else { ns->ln_pos_last_checkpoint = lfsck->li_pos_checkpoint; - ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + ns->ln_time_last_checkpoint = ktime_get_real_seconds(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); log: - CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos ["LPU64 - ", "DFID", "LPX64"], status = %d: rc = %d\n", + CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos [%llu" + ", "DFID", %#llx], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), lfsck->li_pos_current.lp_oit_cookie, PFID(&lfsck->li_pos_current.lp_dir_parent), lfsck->li_pos_current.lp_dir_cookie, ns->ln_status, rc); @@ -3963,7 +4457,7 @@ static int lfsck_namespace_prep(const struct lu_env *env, } down_write(&com->lc_sem); - ns->ln_time_latest_start = cfs_time_current_sec(); + ns->ln_time_latest_start = ktime_get_real_seconds(); spin_lock(&lfsck->li_lock); if (ns->ln_flags & LF_SCANNED_ONCE) { @@ -4032,8 +4526,8 @@ static int lfsck_namespace_prep(const struct lu_env *env, rc = lfsck_start_assistant(env, com, lsp); - CDEBUG(D_LFSCK, "%s: namespace LFSCK prep done, start pos ["LPU64", " - DFID", "LPX64"]: rc = %d\n", + CDEBUG(D_LFSCK, "%s: namespace LFSCK prep done, start pos [%llu, " + DFID", %#llx]: rc = %d\n", lfsck_lfsck2name(lfsck), pos->lp_oit_cookie, PFID(&pos->lp_dir_parent), pos->lp_dir_cookie, rc); @@ -4044,19 +4538,32 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, struct lfsck_component *com, struct dt_object *obj) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct lfsck_namespace *ns = com->lc_file_ram; - struct lfsck_instance *lfsck = com->lc_lfsck; - const struct lu_fid *fid = lfsck_dto2fid(obj); - struct lu_fid *pfid = &info->lti_fid2; - struct lu_name *cname = &info->lti_name; - struct lu_seq_range *range = &info->lti_range; - struct seq_server_site *ss = lfsck_dev_site(lfsck); - struct linkea_data ldata = { NULL }; - __u32 idx = lfsck_dev_idx(lfsck); - int rc; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_instance *lfsck = com->lc_lfsck; + const struct lu_fid *fid = lfsck_dto2fid(obj); + struct lu_fid *pfid = &info->lti_fid2; + struct lu_name *cname = &info->lti_name; + struct lu_seq_range *range = &info->lti_range; + struct seq_server_site *ss = lfsck_dev_site(lfsck); + struct linkea_data ldata = { NULL }; + __u32 idx = lfsck_dev_idx(lfsck); + struct lu_attr la = { .la_valid = 0 }; + bool remote = false; + int rc; ENTRY; + rc = dt_attr_get(env, obj, &la); + if (unlikely(rc || (la.la_valid & LA_FLAGS && + la.la_flags & LUSTRE_ORPHAN_FL))) { + CDEBUG(D_INFO, + "%s: skip orphan "DFID", %llx/%x: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(fid), + la.la_valid, la.la_flags, rc); + + return rc; + } + rc = lfsck_links_read(env, obj, &ldata); if (rc == -ENOENT) GOTO(out, rc = 0); @@ -4081,14 +4588,47 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, GOTO(out, rc = (rc == -ENOENT ? 0 : rc)); } - if (rc == -ENODATA) { + if (rc && rc != -ENODATA) + GOTO(out, rc); + + if (rc == -ENODATA || unlikely(!ldata.ld_leh->leh_reccount)) { rc = lfsck_namespace_check_for_double_scan(env, com, obj); GOTO(out, rc); } - if (rc != 0) - GOTO(out, rc); + linkea_first_entry(&ldata); + while (ldata.ld_lee != NULL) { + linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, + cname, pfid); + if (!fid_is_sane(pfid)) { + rc = lfsck_namespace_trace_update(env, com, fid, + LNTF_CHECK_PARENT, true); + } else if (!linkea_entry_is_valid(&ldata, cname, pfid)) { + GOTO(out, rc); + } else { + fld_range_set_mdt(range); + rc = fld_server_lookup(env, ss->ss_server_fld, + fid_seq(pfid), range); + if ((rc == -ENOENT) || + (!rc && range->lsr_index != idx)) { + remote = true; + break; + } + } + if (rc) + GOTO(out, rc); + + linkea_next_entry(&ldata); + } + + if ((lu_object_has_agent_entry(&obj->do_lu) && !remote) || + (!lu_object_has_agent_entry(&obj->do_lu) && remote)) { + rc = lfsck_namespace_trace_update(env, com, fid, + LNTF_CHECK_AGENT_ENTRY, true); + if (rc) + GOTO(out, rc); + } /* Record multiple-linked object. */ if (ldata.ld_leh->leh_reccount > 1) { @@ -4098,23 +4638,11 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env, GOTO(out, rc); } - linkea_first_entry(&ldata); - linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname, pfid); - if (!fid_is_sane(pfid)) { + if (remote) rc = lfsck_namespace_trace_update(env, com, fid, - LNTF_CHECK_PARENT, true); - } else { - fld_range_set_mdt(range); - rc = fld_local_lookup(env, ss->ss_server_fld, - fid_seq(pfid), range); - if ((rc == -ENOENT) || - (rc == 0 && range->lsr_index != idx)) - rc = lfsck_namespace_trace_update(env, com, fid, - LNTF_CHECK_LINKEA, true); - else - rc = lfsck_namespace_check_for_double_scan(env, com, - obj); - } + LNTF_CHECK_LINKEA, true); + else + rc = lfsck_namespace_check_for_double_scan(env, com, obj); GOTO(out, rc); @@ -4146,11 +4674,11 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, l_wait_event(mthread->t_ctl_waitq, lad->lad_prefetched < bk->lb_async_windows || !thread_is_running(mthread) || - thread_is_stopped(athread), + !thread_is_running(athread), &lwi); - if (unlikely(!thread_is_running(mthread)) || - thread_is_stopped(athread)) + if (unlikely(!thread_is_running(mthread) || + !thread_is_running(athread))) return 0; if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir))) @@ -4165,7 +4693,9 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, } spin_lock(&lad->lad_lock); - if (lad->lad_assistant_status < 0) { + if (lad->lad_assistant_status < 0 || + unlikely(!thread_is_running(mthread) || + !thread_is_running(athread))) { spin_unlock(&lad->lad_lock); lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar); return lad->lad_assistant_status; @@ -4227,14 +4757,14 @@ static int lfsck_namespace_post(const struct lu_env *env, spin_unlock(&lfsck->li_lock); if (!init) { - ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + ns->ln_time_last_checkpoint = ktime_get_real_seconds(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: namespace LFSCK post done: rc = %d\n", @@ -4243,14 +4773,13 @@ static int lfsck_namespace_post(const struct lu_env *env, RETURN(rc); } -static int +static void lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, struct seq_file *m) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_namespace *ns = com->lc_file_ram; - int rc; down_read(&com->lc_sem); seq_printf(m, "name: lfsck_namespace\n" @@ -4259,182 +4788,163 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "status: %s\n", ns->ln_magic, bk->lb_version, - lfsck_status2names(ns->ln_status)); + lfsck_status2name(ns->ln_status)); - rc = lfsck_bits_dump(m, ns->ln_flags, lfsck_flags_names, "flags"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, ns->ln_flags, lfsck_flags_names, "flags"); - rc = lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - rc = lfsck_time_dump(m, ns->ln_time_last_complete, - "last_completed"); - if (rc < 0) - goto out; + lfsck_time_dump(m, ns->ln_time_last_complete, "last_completed"); - rc = lfsck_time_dump(m, ns->ln_time_latest_start, - "latest_start"); - if (rc < 0) - goto out; + lfsck_time_dump(m, ns->ln_time_latest_start, "latest_start"); - rc = lfsck_time_dump(m, ns->ln_time_last_checkpoint, - "last_checkpoint"); - if (rc < 0) - goto out; + lfsck_time_dump(m, ns->ln_time_last_checkpoint, "last_checkpoint"); - rc = lfsck_pos_dump(m, &ns->ln_pos_latest_start, - "latest_start_position"); - if (rc < 0) - goto out; + lfsck_pos_dump(m, &ns->ln_pos_latest_start, "latest_start_position"); - rc = lfsck_pos_dump(m, &ns->ln_pos_last_checkpoint, - "last_checkpoint_position"); - if (rc < 0) - goto out; + lfsck_pos_dump(m, &ns->ln_pos_last_checkpoint, + "last_checkpoint_position"); - rc = lfsck_pos_dump(m, &ns->ln_pos_first_inconsistent, - "first_failure_position"); - if (rc < 0) - goto out; + lfsck_pos_dump(m, &ns->ln_pos_first_inconsistent, + "first_failure_position"); if (ns->ln_status == LS_SCANNING_PHASE1) { struct lfsck_position pos; - const struct dt_it_ops *iops; - cfs_duration_t duration = cfs_time_current() - - lfsck->li_time_last_checkpoint; - __u64 checked = ns->ln_items_checked + com->lc_new_checked; - __u64 speed = checked; - __u64 new_checked = com->lc_new_checked * - msecs_to_jiffies(MSEC_PER_SEC); - __u32 rtime = ns->ln_run_time_phase1 + - cfs_duration_sec(duration + HALF_SEC); + time64_t duration = ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + u64 checked = ns->ln_items_checked + com->lc_new_checked; + u64 speed = checked; + u64 new_checked = com->lc_new_checked; + time64_t rtime = ns->ln_run_time_phase1 + duration; if (duration != 0) - do_div(new_checked, duration); + new_checked = div64_s64(new_checked, duration); if (rtime != 0) - do_div(speed, rtime); - - lfsck_namespace_dump_statistics(m, ns, checked, 0, rtime, 0); - seq_printf(m, "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: N/A\n" - "average_speed_total: "LPU64" items/sec\n" - "real_time_speed_phase1: "LPU64" items/sec\n" - "real_time_speed_phase2: N/A\n", - speed, - speed, - new_checked); - - LASSERT(lfsck->li_di_oit != NULL); - - iops = &lfsck->li_obj_oit->do_index_ops->dio_it; - - /* The low layer otable-based iteration position may NOT - * exactly match the namespace-based directory traversal - * cookie. Generally, it is not a serious issue. But the - * caller should NOT make assumption on that. */ - pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit); - if (!lfsck->li_current_oit_processed) - pos.lp_oit_cookie--; - - spin_lock(&lfsck->li_lock); - if (lfsck->li_di_dir != NULL) { - pos.lp_dir_cookie = lfsck->li_cookie_dir; - if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) { + speed = div64_s64(speed, rtime); + + lfsck_namespace_dump_statistics(m, ns, checked, 0, rtime, 0, + bk->lb_param & LPF_DRYRUN); + seq_printf(m, "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: N/A\n" + "average_speed_total: %llu items/sec\n" + "real_time_speed_phase1: %llu items/sec\n" + "real_time_speed_phase2: N/A\n", + speed, + speed, + new_checked); + + if (likely(lfsck->li_di_oit)) { + const struct dt_it_ops *iops = + &lfsck->li_obj_oit->do_index_ops->dio_it; + + /* The low layer otable-based iteration position may NOT + * exactly match the namespace-based directory traversal + * cookie. Generally, it is not a serious issue. But the + * caller should NOT make assumption on that. */ + pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit); + if (!lfsck->li_current_oit_processed) + pos.lp_oit_cookie--; + + spin_lock(&lfsck->li_lock); + if (lfsck->li_di_dir) { + pos.lp_dir_cookie = lfsck->li_cookie_dir; + if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) { + fid_zero(&pos.lp_dir_parent); + pos.lp_dir_cookie = 0; + } else { + pos.lp_dir_parent = + *lfsck_dto2fid(lfsck->li_obj_dir); + } + } else { fid_zero(&pos.lp_dir_parent); pos.lp_dir_cookie = 0; - } else { - pos.lp_dir_parent = - *lfsck_dto2fid(lfsck->li_obj_dir); } + spin_unlock(&lfsck->li_lock); } else { - fid_zero(&pos.lp_dir_parent); - pos.lp_dir_cookie = 0; + pos = ns->ln_pos_last_checkpoint; } - spin_unlock(&lfsck->li_lock); + lfsck_pos_dump(m, &pos, "current_position"); } else if (ns->ln_status == LS_SCANNING_PHASE2) { - cfs_duration_t duration = cfs_time_current() - - com->lc_time_last_checkpoint; + time64_t duration = ktime_get_seconds() - + com->lc_time_last_checkpoint; __u64 checked = ns->ln_objs_checked_phase2 + com->lc_new_checked; __u64 speed1 = ns->ln_items_checked; __u64 speed2 = checked; __u64 speed0 = speed1 + speed2; - __u64 new_checked = com->lc_new_checked * - msecs_to_jiffies(MSEC_PER_SEC); - __u32 rtime = ns->ln_run_time_phase2 + - cfs_duration_sec(duration + HALF_SEC); - __u32 time0 = ns->ln_run_time_phase1 + rtime; + __u64 new_checked = com->lc_new_checked; + time64_t rtime = ns->ln_run_time_phase2 + duration; + time64_t time0 = ns->ln_run_time_phase1 + rtime; if (duration != 0) - do_div(new_checked, duration); + new_checked = div64_s64(new_checked, duration); if (ns->ln_run_time_phase1 != 0) - do_div(speed1, ns->ln_run_time_phase1); + speed1 = div64_s64(speed1, ns->ln_run_time_phase1); else if (ns->ln_items_checked != 0) time0++; if (rtime != 0) - do_div(speed2, rtime); + speed2 = div64_s64(speed2, rtime); else if (checked != 0) time0++; if (time0 != 0) - do_div(speed0, time0); + speed0 = div64_s64(speed0, time0); lfsck_namespace_dump_statistics(m, ns, ns->ln_items_checked, checked, - ns->ln_run_time_phase1, rtime); - seq_printf(m, "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" - "average_speed_total: "LPU64" items/sec\n" - "real_time_speed_phase1: N/A\n" - "real_time_speed_phase2: "LPU64" objs/sec\n" - "current_position: "DFID"\n", - speed1, - speed2, - speed0, - new_checked, - PFID(&ns->ln_fid_latest_scanned_phase2)); + ns->ln_run_time_phase1, rtime, + bk->lb_param & LPF_DRYRUN); + seq_printf(m, "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu objs/sec\n" + "average_speed_total: %llu items/sec\n" + "real_time_speed_phase1: N/A\n" + "real_time_speed_phase2: %llu objs/sec\n" + "current_position: "DFID"\n", + speed1, + speed2, + speed0, + new_checked, + PFID(&ns->ln_fid_latest_scanned_phase2)); } else { __u64 speed1 = ns->ln_items_checked; __u64 speed2 = ns->ln_objs_checked_phase2; __u64 speed0 = speed1 + speed2; - __u32 time0 = ns->ln_run_time_phase1 + ns->ln_run_time_phase2; + time64_t time0 = ns->ln_run_time_phase1 + ns->ln_run_time_phase2; if (ns->ln_run_time_phase1 != 0) - do_div(speed1, ns->ln_run_time_phase1); + speed1 = div64_s64(speed1, ns->ln_run_time_phase1); else if (ns->ln_items_checked != 0) time0++; if (ns->ln_run_time_phase2 != 0) - do_div(speed2, ns->ln_run_time_phase2); + speed2 = div64_s64(speed2, ns->ln_run_time_phase2); else if (ns->ln_objs_checked_phase2 != 0) time0++; if (time0 != 0) - do_div(speed0, time0); + speed0 = div64_s64(speed0, time0); lfsck_namespace_dump_statistics(m, ns, ns->ln_items_checked, ns->ln_objs_checked_phase2, ns->ln_run_time_phase1, - ns->ln_run_time_phase2); - seq_printf(m, "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" - "average_speed_total: "LPU64" items/sec\n" - "real_time_speed_phase1: N/A\n" - "real_time_speed_phase2: N/A\n" - "current_position: N/A\n", - speed1, - speed2, - speed0); + ns->ln_run_time_phase2, + bk->lb_param & LPF_DRYRUN); + seq_printf(m, "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu objs/sec\n" + "average_speed_total: %llu items/sec\n" + "real_time_speed_phase1: N/A\n" + "real_time_speed_phase2: N/A\n" + "current_position: N/A\n", + speed1, + speed2, + speed0); } -out: + up_read(&com->lc_sem); - return 0; } static int lfsck_namespace_double_scan(const struct lu_env *env, @@ -4532,98 +5042,18 @@ static void lfsck_namespace_quit(const struct lu_env *env, static int lfsck_namespace_in_notify(const struct lu_env *env, struct lfsck_component *com, - struct lfsck_request *lr, - struct thandle *th) + struct lfsck_request *lr) { - struct lfsck_instance *lfsck = com->lc_lfsck; - struct lfsck_namespace *ns = com->lc_file_ram; - struct lfsck_assistant_data *lad = com->lc_data; - struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs; - struct lfsck_tgt_desc *ltd; - int rc = 0; - bool fail = false; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs; + struct lfsck_tgt_desc *ltd; + int rc = 0; + bool fail = false; ENTRY; switch (lr->lr_event) { - case LE_SKIP_NLINK_DECLARE: { - struct dt_object *obj; - struct lu_fid *key = &lfsck_env_info(env)->lti_fid3; - int idx; - __u8 flags = 0; - - LASSERT(th != NULL); - - idx = lfsck_sub_trace_file_fid2idx(&lr->lr_fid); - obj = com->lc_sub_trace_objs[idx].lsto_obj; - fid_cpu_to_be(key, &lr->lr_fid); - mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); - rc = dt_declare_delete(env, obj, - (const struct dt_key *)key, th); - if (rc == 0) - rc = dt_declare_insert(env, obj, - (const struct dt_rec *)&flags, - (const struct dt_key *)key, th); - mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); - - RETURN(rc); - } - case LE_SKIP_NLINK: { - struct dt_object *obj; - struct lu_fid *key = &lfsck_env_info(env)->lti_fid3; - int idx; - __u8 flags = 0; - bool exist = false; - ENTRY; - - LASSERT(th != NULL); - - idx = lfsck_sub_trace_file_fid2idx(&lr->lr_fid); - obj = com->lc_sub_trace_objs[idx].lsto_obj; - fid_cpu_to_be(key, &lr->lr_fid); - mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); - rc = dt_lookup(env, obj, (struct dt_rec *)&flags, - (const struct dt_key *)key); - if (rc == 0) { - if (flags & LNTF_SKIP_NLINK) { - mutex_unlock( - &com->lc_sub_trace_objs[idx].lsto_mutex); - - RETURN(0); - } - - exist = true; - } else if (rc != -ENOENT) { - GOTO(log, rc); - } - - flags |= LNTF_SKIP_NLINK; - if (exist) { - rc = dt_delete(env, obj, (const struct dt_key *)key, - th); - if (rc != 0) - GOTO(log, rc); - } - - rc = dt_insert(env, obj, (const struct dt_rec *)&flags, - (const struct dt_key *)key, th, 1); - - GOTO(log, rc); - -log: - mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); - CDEBUG(D_LFSCK, "%s: RPC service thread mark the "DFID - " to be skipped for namespace double scan: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(&lr->lr_fid), rc); - - if (rc != 0) - /* If we cannot record this object in the LFSCK tracing, - * we have to mark the LFSC as LF_INCOMPLETE, then the - * LFSCK will skip nlink attribute verification for - * all objects. */ - ns->ln_flags |= LF_INCOMPLETE; - - return 0; - } case LE_SET_LMV_MASTER: { struct dt_object *obj; @@ -4726,12 +5156,81 @@ log: RETURN(0); } +static void lfsck_namespace_repaired(struct lfsck_namespace *ns, __u64 *count) +{ + *count += ns->ln_objs_nlink_repaired; + *count += ns->ln_dirent_repaired; + *count += ns->ln_linkea_repaired; + *count += ns->ln_mul_linked_repaired; + *count += ns->ln_unmatched_pairs_repaired; + *count += ns->ln_dangling_repaired; + *count += ns->ln_mul_ref_repaired; + *count += ns->ln_bad_type_repaired; + *count += ns->ln_lost_dirent_repaired; + *count += ns->ln_striped_dirs_disabled; + *count += ns->ln_striped_dirs_repaired; + *count += ns->ln_striped_shards_repaired; + *count += ns->ln_name_hash_repaired; + *count += ns->ln_local_lpf_moved; +} + +static int lfsck_namespace_query_all(const struct lu_env *env, + struct lfsck_component *com, + __u32 *mdts_count, __u64 *repaired) +{ + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_tgt_descs *ltds = &com->lc_lfsck->li_mdt_descs; + struct lfsck_tgt_desc *ltd; + int idx; + int rc; + ENTRY; + + rc = lfsck_query_all(env, com); + if (rc != 0) + RETURN(rc); + + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + mdts_count[ltd->ltd_namespace_status]++; + *repaired += ltd->ltd_namespace_repaired; + } + up_read(<ds->ltd_rw_sem); + + down_read(&com->lc_sem); + mdts_count[ns->ln_status]++; + lfsck_namespace_repaired(ns, repaired); + up_read(&com->lc_sem); + + RETURN(0); +} + static int lfsck_namespace_query(const struct lu_env *env, - struct lfsck_component *com) + struct lfsck_component *com, + struct lfsck_request *req, + struct lfsck_reply *rep, + struct lfsck_query *que, int idx) { struct lfsck_namespace *ns = com->lc_file_ram; + int rc = 0; + + if (que != NULL) { + LASSERT(com->lc_lfsck->li_master); - return ns->ln_status; + rc = lfsck_namespace_query_all(env, com, + que->lu_mdts_count[idx], + &que->lu_repaired[idx]); + } else { + down_read(&com->lc_sem); + rep->lr_status = ns->ln_status; + if (req->lr_flags & LEF_QUERY_ALL) + lfsck_namespace_repaired(ns, &rep->lr_repaired); + up_read(&com->lc_sem); + } + + return rc; } static struct lfsck_operations lfsck_namespace_ops = { @@ -4784,26 +5283,28 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, struct dt_object *child, struct lfsck_namespace_req *lnr) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_attr *la = &info->lti_la; - struct dt_allocation_hint *hint = &info->lti_hint; - struct dt_object_format *dof = &info->lti_dof; - struct dt_insert_rec *rec = &info->lti_dt_rec; - struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2; - const struct lu_name *cname; - const struct lu_fid *pfid = lfsck_dto2fid(parent); - const struct lu_fid *cfid = lfsck_dto2fid(child); - struct linkea_data ldata = { NULL }; - struct lfsck_lock_handle *llh = &info->lti_llh; - struct lu_buf linkea_buf; - struct lu_buf lmv_buf; - struct lfsck_instance *lfsck = com->lc_lfsck; - struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; - struct dt_device *dev = lfsck->li_next; - struct thandle *th = NULL; - int rc = 0; - __u16 type = lnr->lnr_type; - bool create; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_attr *la = &info->lti_la; + struct dt_allocation_hint *hint = &info->lti_hint; + struct dt_object_format *dof = &info->lti_dof; + struct dt_insert_rec *rec = &info->lti_dt_rec; + struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2; + const struct lu_name *cname; + const struct lu_fid *pfid = lfsck_dto2fid(parent); + const struct lu_fid *cfid = lfsck_dto2fid(child); + struct linkea_data ldata = { NULL }; + struct lfsck_lock_handle *llh = &info->lti_llh; + struct lustre_handle rlh = { 0 }; + struct lustre_handle clh = { 0 }; + struct lu_buf linkea_buf; + struct lu_buf lmv_buf; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct dt_device *dev = lfsck->li_next; + struct thandle *th = NULL; + int rc = 0; + __u16 type = lnr->lnr_type; + bool create; ENTRY; cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen); @@ -4829,16 +5330,13 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, if (IS_ERR(child)) GOTO(log, rc = PTR_ERR(child)); - rc = linkea_data_new(&ldata, &info->lti_linkea_buf2); - if (rc != 0) - GOTO(log, rc); - - rc = linkea_add_buf(&ldata, cname, pfid); + rc = linkea_links_new(&ldata, &info->lti_linkea_buf2, + cname, pfid); if (rc != 0) GOTO(log, rc); rc = lfsck_lock(env, lfsck, parent, lnr->lnr_name, llh, - MDS_INODELOCK_UPDATE, LCK_PR); + MDS_INODELOCK_UPDATE, LCK_PW); if (rc != 0) GOTO(log, rc); @@ -4846,17 +5344,38 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, if (rc != 0) GOTO(log, rc); + if (dt_object_remote(child)) { + rc = lfsck_remote_lookup_lock(env, lfsck, parent, child, &rlh, + LCK_EX); + if (rc != 0) + GOTO(log, rc); + } + + rc = lfsck_ibits_lock(env, lfsck, child, &clh, + MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP | + MDS_INODELOCK_XATTR, LCK_EX); + if (rc != 0) + GOTO(unlock_remote_lookup, rc); + /* Set the ctime as zero, then others can know it is created for * repairing dangling name entry by LFSCK. And if the LFSCK made * wrong decision and the real MDT-object has been found later, * then the LFSCK has chance to fix the incosistency properly. */ memset(la, 0, sizeof(*la)); - la->la_mode = (type & S_IFMT) | 0600; - la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | - LA_ATIME | LA_MTIME | LA_CTIME; - - child->do_ops->do_ah_init(env, hint, parent, child, - la->la_mode & S_IFMT); + if (S_ISDIR(type)) + la->la_mode = (type & S_IFMT) | 0700; + else + la->la_mode = (type & S_IFMT) | 0600; + la->la_valid = LA_TYPE | LA_MODE | LA_CTIME; + + /* + * if it's directory, skip do_ah_init() to create a plain directory + * because it may have shards already, which will be inserted back + * later, besides, it may be remote, and creating stripe directory + * remotely is not supported. + */ + if (S_ISREG(type)) + child->do_ops->do_ah_init(env, hint, parent, child, type); memset(dof, 0, sizeof(*dof)); dof->dof_type = dt_mode_to_dft(type); @@ -4866,7 +5385,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, th = dt_trans_create(env, dev); if (IS_ERR(th)) - GOTO(log, rc = PTR_ERR(th)); + GOTO(unlock_child, rc = PTR_ERR(th)); /* 1a. create child. */ rc = dt_declare_create(env, child, la, hint, dof, th); @@ -4916,7 +5435,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, lfsck_lmv_header_cpu_to_le(lmv2, lmv2); lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2)); rc = dt_declare_xattr_set(env, child, &lmv_buf, - XATTR_NAME_LMV, 0, th); + XATTR_NAME_LMV".set", 0, th); if (rc != 0) GOTO(stop, rc); } @@ -4930,6 +5449,21 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + /* 7a. if child is remote, delete and insert to generate local agent */ + if (dt_object_remote(child)) { + rc = dt_declare_delete(env, parent, + (const struct dt_key *)lnr->lnr_name, + th); + if (rc) + GOTO(stop, rc); + + rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec, + (const struct dt_key *)lnr->lnr_name, + th); + if (rc) + GOTO(stop, rc); + } + rc = dt_trans_start_local(env, dev, th); if (rc != 0) GOTO(stop, rc = (rc == -EEXIST ? 1 : rc)); @@ -4950,21 +5484,21 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, rec->rec_type = S_IFDIR; rec->rec_fid = cfid; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dot, th, 1); + (const struct dt_key *)dot, th); if (rc != 0) GOTO(unlock, rc); /* 4b. insert dotdot into child dir */ rec->rec_fid = pfid; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, 1); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock, rc); /* 5b. generate slave LMV EA. */ if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) { - rc = dt_xattr_set(env, child, &lmv_buf, XATTR_NAME_LMV, - 0, th); + rc = dt_xattr_set(env, child, &lmv_buf, + XATTR_NAME_LMV".set", 0, th); if (rc != 0) GOTO(unlock, rc); } @@ -4973,6 +5507,23 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env, /* 6b. insert linkEA for child. */ rc = dt_xattr_set(env, child, &linkea_buf, XATTR_NAME_LINK, 0, th); + if (rc) + GOTO(unlock, rc); + + /* 7b. if child is remote, delete and insert to generate local agent */ + if (dt_object_remote(child)) { + rc = dt_delete(env, parent, + (const struct dt_key *)lnr->lnr_name, th); + if (rc) + GOTO(unlock, rc); + + rec->rec_type = type; + rec->rec_fid = cfid; + rc = dt_insert(env, parent, (const struct dt_rec *)rec, + (const struct dt_key *)lnr->lnr_name, th); + if (rc) + GOTO(unlock, rc); + } GOTO(unlock, rc); @@ -4982,6 +5533,11 @@ unlock: stop: dt_trans_stop(env, dev, th); +unlock_child: + lfsck_ibits_unlock(&clh, LCK_EX); +unlock_remote_lookup: + if (dt_object_remote(child)) + lfsck_ibits_unlock(&rlh, LCK_EX); log: lfsck_unlock(llh); CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found dangling " @@ -5288,6 +5844,8 @@ nodata: goto again; } + LASSERT(handle != NULL); + if (dir == NULL) { dir = lfsck_assistant_object_load(env, lfsck, lso); if (IS_ERR(dir)) { @@ -5311,7 +5869,7 @@ nodata: LASSERT(newdata); rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, handle); - if (rc != 0) + if (rc != 0 && rc != -ENOENT && rc != -ENODATA) GOTO(stop, rc); } @@ -5323,37 +5881,8 @@ nodata: } rc = linkea_add_buf(&ldata, cname, pfid); - if (rc != 0) - GOTO(stop, rc); - - rc = lfsck_links_write(env, obj, &ldata, handle); - if (unlikely(rc == -ENOSPC) && - S_ISREG(lfsck_object_type(obj)) && !dt_object_remote(obj)) { - if (handle != NULL) { - LASSERT(dt_write_locked(env, obj)); - - dt_write_unlock(env, obj); - dtlocked = false; - - dt_trans_stop(env, dev, handle); - handle = NULL; - - lfsck_ibits_unlock(&lh, LCK_EX); - } - - rc = lfsck_namespace_trace_update(env, com, - &lnr->lnr_fid, LNTF_SKIP_NLINK, true); - if (rc != 0) - /* If we cannot record this object in the - * LFSCK tracing, we have to mark the LFSCK - * as LF_INCOMPLETE, then the LFSCK will - * skip nlink attribute verification for - * all objects. */ - ns->ln_flags |= LF_INCOMPLETE; - - GOTO(out, rc = 0); - } - + if (rc == 0) + rc = lfsck_links_write(env, obj, &ldata, handle); if (rc != 0) GOTO(stop, rc); @@ -5435,7 +5964,8 @@ out: break; } - if (count == 1 && S_ISREG(lfsck_object_type(obj))) + if (obj != NULL && count == 1 && + S_ISREG(lfsck_object_type(obj))) dt_attr_get(env, obj, la); } @@ -5457,15 +5987,15 @@ trace: if (!(bk->lb_param & LPF_FAILOUT)) rc = 0; } else { - if (log) - CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant " - "repaired the entry: "DFID", parent "DFID - ", name %.*s\n", lfsck_lfsck2name(lfsck), - PFID(&lnr->lnr_fid), PFID(pfid), - lnr->lnr_namelen, lnr->lnr_name); - if (repaired) { ns->ln_items_repaired++; + if (log) + CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant " + "repaired the entry: "DFID", parent "DFID + ", name %.*s, type %d\n", + lfsck_lfsck2name(lfsck), + PFID(&lnr->lnr_fid), PFID(pfid), + lnr->lnr_namelen, lnr->lnr_name, type); switch (type) { case LNIT_DANGLING: @@ -5492,8 +6022,17 @@ trace: ns->ln_name_hash_repaired++; /* Not count repeatedly. */ - if (!repaired) + if (!repaired) { ns->ln_items_repaired++; + if (log) + CDEBUG(D_LFSCK, "%s: namespace LFSCK " + "assistant repaired the entry: " + DFID", parent "DFID + ", name %.*s\n", + lfsck_lfsck2name(lfsck), + PFID(&lnr->lnr_fid), PFID(pfid), + lnr->lnr_namelen, lnr->lnr_name); + } if (bk->lb_param & LPF_DRYRUN && lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) @@ -5659,7 +6198,7 @@ static int lfsck_namespace_scan_local_lpf_one(const struct lu_env *env, /* b5. insert child's FID into the LFSCK trace file. */ rc = dt_insert(env, obj, (const struct dt_rec *)&flags, - (const struct dt_key *)key, th, 1); + (const struct dt_key *)key, th); GOTO(stop, rc = (rc == 0 ? 1 : rc)); @@ -6044,8 +6583,9 @@ checkpoint: down_write(&com->lc_sem); com->lc_new_checked++; com->lc_new_scanned++; - if (rc >= 0 && fid_is_sane(&fid)) + if (rc >= 0) ns->ln_fid_latest_scanned_phase2 = fid; + if (rc > 0) ns->ln_objs_repaired_phase2++; else if (rc < 0) @@ -6055,25 +6595,22 @@ checkpoint: if (rc < 0 && bk->lb_param & LPF_FAILOUT) GOTO(put, rc); - if (unlikely(cfs_time_beforeq(com->lc_time_next_checkpoint, - cfs_time_current())) && + if (unlikely(com->lc_time_next_checkpoint <= + ktime_get_seconds()) && com->lc_new_checked != 0) { down_write(&com->lc_sem); - ns->ln_run_time_phase2 += - cfs_duration_sec(cfs_time_current() + - HALF_SEC - com->lc_time_last_checkpoint); - ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_run_time_phase2 += ktime_get_seconds() - + com->lc_time_last_checkpoint; + ns->ln_time_last_checkpoint = ktime_get_real_seconds(); ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; - rc = lfsck_namespace_store(env, com, false); + lfsck_namespace_store(env, com); up_write(&com->lc_sem); - if (rc != 0) - GOTO(put, rc); - com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_last_checkpoint = ktime_get_seconds(); com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + - cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + LFSCK_CHECKPOINT_INTERVAL; } lfsck_control_speed_by_self(com); @@ -6124,9 +6661,9 @@ static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env, com->lc_new_checked = 0; com->lc_new_scanned = 0; - com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_last_checkpoint = ktime_get_seconds(); com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + - cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + LFSCK_CHECKPOINT_INTERVAL; i = lfsck_sub_trace_file_fid2idx(&ns->ln_fid_latest_scanned_phase2); rc = lfsck_namespace_double_scan_one_trace_file(env, com, @@ -6148,6 +6685,10 @@ static void lfsck_namespace_assistant_fill_pos(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_namespace_req *lnr; + if (((struct lfsck_namespace *)(com->lc_file_ram))->ln_status != + LS_SCANNING_PHASE1) + return; + if (list_empty(&lad->lad_req_list)) return; @@ -6167,9 +6708,9 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env, struct lfsck_namespace *ns = com->lc_file_ram; down_write(&com->lc_sem); - ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - com->lc_time_last_checkpoint); - ns->ln_time_last_checkpoint = cfs_time_current_sec(); + ns->ln_run_time_phase2 += ktime_get_seconds() - + com->lc_time_last_checkpoint; + ns->ln_time_last_checkpoint = ktime_get_real_seconds(); ns->ln_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; @@ -6178,8 +6719,9 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env, ns->ln_status = LS_PARTIAL; else ns->ln_status = LS_COMPLETED; + ns->ln_flags &= ~LF_SCANNED_ONCE; if (!(lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)) - ns->ln_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT); + ns->ln_flags &= ~LF_INCONSISTENT; ns->ln_time_last_complete = ns->ln_time_last_checkpoint; ns->ln_success_count++; } else if (rc == 0) { @@ -6191,7 +6733,7 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env, ns->ln_status = LS_FAILED; } - rc = lfsck_namespace_store(env, com, false); + rc = lfsck_namespace_store(env, com); up_write(&com->lc_sem); return rc; @@ -6249,7 +6791,7 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env, int rc = 0; ENTRY; - if (!lad->lad_incomplete) + if (!test_bit(LAD_INCOMPLETE, &lad->lad_flags)) RETURN_EXIT; set = ptlrpc_prep_set(); @@ -6263,7 +6805,8 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env, down_read(<ds->ltd_rw_sem); cfs_foreach_bit(lad->lad_bitmap, idx) { ltd = lfsck_ltd2tgt(ltds, idx); - LASSERT(ltd != NULL); + if (unlikely(!ltd)) + continue; laia->laia_ltd = ltd; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, @@ -6276,7 +6819,7 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env, } up_read(<ds->ltd_rw_sem); - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); GOTO(out, rc); @@ -6327,7 +6870,7 @@ int lfsck_verify_linkea(const struct lu_env *env, struct dt_object *obj, LASSERT(S_ISDIR(lfsck_object_type(obj))); - rc = lfsck_links_read(env, obj, &ldata); + rc = lfsck_links_read_with_rec(env, obj, &ldata); if (rc == -ENODATA) { dirty = true; } else if (rc == 0) { @@ -6344,11 +6887,8 @@ int lfsck_verify_linkea(const struct lu_env *env, struct dt_object *obj, if (!dirty) RETURN(rc); - rc = linkea_data_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf); - if (rc != 0) - RETURN(rc); - - rc = linkea_add_buf(&ldata, cname, pfid); + rc = linkea_links_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf, + cname, pfid); if (rc != 0) RETURN(rc); @@ -6399,15 +6939,15 @@ int lfsck_links_get_first(const struct lu_env *env, struct dt_object *obj, struct linkea_data ldata = { NULL }; int rc; - rc = lfsck_links_read(env, obj, &ldata); - if (rc != 0) + rc = lfsck_links_read_with_rec(env, obj, &ldata); + if (rc) return rc; linkea_first_entry(&ldata); - if (ldata.ld_lee == NULL) - return -ENODATA; - linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname, pfid); + if (!linkea_entry_is_valid(&ldata, cname, pfid)) + return -EINVAL; + /* To guarantee the 'name' is terminated with '0'. */ memcpy(name, cname->ln_name, cname->ln_namelen); name[cname->ln_namelen] = 0; @@ -6481,7 +7021,7 @@ int lfsck_update_name_entry(const struct lu_env *env, GOTO(stop, rc); rc = dt_insert(env, dir, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, 1); + (const struct dt_key *)name, th); if (rc == 0 && S_ISDIR(type) && !exists) { dt_write_lock(env, dir, 0); rc = dt_ref_add(env, dir, th); @@ -6554,18 +7094,22 @@ int lfsck_namespace_setup(const struct lu_env *env, obj = local_index_find_or_create(env, lfsck->li_los, root, LFSCK_NAMESPACE, S_IFREG | S_IRUGO | S_IWUSR, - &dt_lfsck_features); + &dt_lfsck_namespace_features); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); com->lc_obj = obj; rc = lfsck_namespace_load(env, com); - if (rc == -ENODATA) + if (rc == -ENODATA) { rc = lfsck_namespace_init(env, com); - else if (rc < 0) + } else if (rc < 0) { rc = lfsck_namespace_reset(env, com, true); - else - rc = lfsck_namespace_load_sub_trace_files(env, com, false); + } else { + rc = lfsck_load_sub_trace_files(env, com, + &dt_lfsck_namespace_features, LFSCK_NAMESPACE, false); + if (rc) + rc = lfsck_namespace_reset(env, com, true); + } if (rc != 0) GOTO(out, rc);