X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_layout.c;h=6cdbd83716afab6977f1355e16471bbeb158973c;hb=9667225cdcf7308402893ff7216fce26df7ee04a;hp=c35553942ebb3c5b87210cf1d89935535fd5a691;hpb=e2cdf469b0224e631e7d86046a2de5d92e80b7ca;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index c355539..6cdbd83 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2014, 2016, Intel Corporation. + * Copyright (c) 2014, 2017, Intel Corporation. */ /* * lustre/lfsck/lfsck_layout.c @@ -36,13 +36,11 @@ #include #include -#include #include #include #include #include #include -#include #include #include @@ -50,8 +48,10 @@ #define LFSCK_LAYOUT_MAGIC_V1 0xB173AE14 #define LFSCK_LAYOUT_MAGIC_V2 0xB1734D76 +#define LFSCK_LAYOUT_MAGIC_V3 0xB17371B9 +#define LFSCK_LAYOUT_MAGIC_V4 0xB1732FED -#define LFSCK_LAYOUT_MAGIC LFSCK_LAYOUT_MAGIC_V2 +#define LFSCK_LAYOUT_MAGIC LFSCK_LAYOUT_MAGIC_V4 struct lfsck_layout_seq { struct list_head lls_list; @@ -86,7 +86,7 @@ struct lfsck_layout_slave_data { __u64 llsd_touch_gen; struct dt_object *llsd_rb_obj; struct rb_root llsd_rb_root; - rwlock_t llsd_rb_lock; + struct rw_semaphore llsd_rb_rwsem; unsigned int llsd_rbtree_valid:1; }; @@ -287,7 +287,8 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env, down_read(<ds->ltd_rw_sem); cfs_foreach_bit(lad->lad_bitmap, idx) { ltd = lfsck_ltd2tgt(ltds, idx); - LASSERT(ltd != NULL); + if (unlikely(!ltd)) + continue; laia->laia_ltd = ltd; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, @@ -307,7 +308,7 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env, up_read(<ds->ltd_rw_sem); if (rc == 0 && atomic_read(&count) > 0) - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); @@ -327,7 +328,8 @@ out: } static int lfsck_layout_verify_header_v1v3(struct dt_object *obj, - struct lov_mds_md_v1 *lmm) + struct lov_mds_md_v1 *lmm, + __u64 start, __u32 comp_id) { __u32 magic; __u32 pattern; @@ -352,10 +354,24 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj, } pattern = le32_to_cpu(lmm->lmm_pattern); - /* XXX: currently, we only support LOV_PATTERN_RAID0. */ + +#if 0 + /* XXX: DoM file verification will be supportted via LU-11081. */ + if (lov_pattern(pattern) == LOV_PATTERN_MDT) { + if (start != 0) { + CDEBUG(D_LFSCK, "The DoM entry for "DFID" is not " + "the first component in the mirror %x/%llu\n", + PFID(lfsck_dto2fid(obj)), comp_id, start); + + return -EINVAL; + } + } +#endif + if (lov_pattern(pattern) != LOV_PATTERN_RAID0) { CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file " - DFID"\n", pattern, PFID(lfsck_dto2fid(obj))); + DFID" in the component %x\n", + pattern, PFID(lfsck_dto2fid(obj)), comp_id); return -EOPNOTSUPP; } @@ -381,14 +397,15 @@ static int lfsck_layout_verify_header(struct dt_object *obj, return -EINVAL; } - for (i = 0; i < count; i++) { + for (i = 0; i < count && !rc; i++) { struct lov_comp_md_entry_v1 *lcme = &lcm->lcm_entries[i]; __u64 start = le64_to_cpu(lcme->lcme_extent.e_start); __u64 end = le64_to_cpu(lcme->lcme_extent.e_end); __u32 comp_id = le32_to_cpu(lcme->lcme_id); - if (unlikely(comp_id == 0 || comp_id > LCME_ID_MAX)) { + if (unlikely(comp_id == LCME_ID_INVAL || + comp_id > LCME_ID_MAX)) { CDEBUG(D_LFSCK, "found invalid FPL ID %u " "for the file "DFID" at idx %d\n", comp_id, PFID(lfsck_dto2fid(obj)), i); @@ -409,13 +426,12 @@ static int lfsck_layout_verify_header(struct dt_object *obj, } rc = lfsck_layout_verify_header_v1v3(obj, - (struct lov_mds_md_v1 *)((char *)lmm + - le32_to_cpu(lcme->lcme_offset))); - if (rc) - return rc; + (struct lov_mds_md_v1 *)((char *)lmm + + le32_to_cpu(lcme->lcme_offset)), start, + comp_id); } } else { - rc = lfsck_layout_verify_header_v1v3(obj, lmm); + rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0); } return rc; @@ -432,7 +448,7 @@ again: if (rc == -ERANGE) { rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV); if (rc <= 0) - return rc; + return !rc ? -ENODATA : rc; lu_buf_realloc(buf, rc); if (buf->lb_buf == NULL) @@ -441,11 +457,8 @@ again: goto again; } - if (rc == -ENODATA) - rc = 0; - if (rc <= 0) - return rc; + return !rc ? -ENODATA : rc; if (unlikely(buf->lb_buf == NULL)) { lu_buf_alloc(buf, rc); @@ -656,9 +669,9 @@ static void lfsck_rbtree_cleanup(const struct lu_env *env, lfsck->li_bottom->dd_record_fid_accessed = 0; /* Invalid the rbtree, then no others will use it. */ - write_lock(&llsd->llsd_rb_lock); + down_write(&llsd->llsd_rb_rwsem); llsd->llsd_rbtree_valid = 0; - write_unlock(&llsd->llsd_rb_lock); + up_write(&llsd->llsd_rb_rwsem); while (node != NULL) { next = rb_next(node); @@ -695,7 +708,7 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env, if (!fid_is_idif(fid) && !fid_is_norm(fid)) RETURN_EXIT; - read_lock(&llsd->llsd_rb_lock); + down_read(&llsd->llsd_rb_rwsem); if (!llsd->llsd_rbtree_valid) GOTO(unlock, rc = 0); @@ -705,13 +718,13 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env, LASSERT(!insert); - read_unlock(&llsd->llsd_rb_lock); + up_read(&llsd->llsd_rb_rwsem); tmp = lfsck_rbtree_new(env, fid); if (IS_ERR(tmp)) GOTO(out, rc = PTR_ERR(tmp)); insert = true; - write_lock(&llsd->llsd_rb_lock); + down_write(&llsd->llsd_rb_rwsem); if (!llsd->llsd_rbtree_valid) { lfsck_rbtree_free(tmp); GOTO(unlock, rc = 0); @@ -733,9 +746,9 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env, unlock: if (insert) - write_unlock(&llsd->llsd_rb_lock); + up_write(&llsd->llsd_rb_rwsem); else - read_unlock(&llsd->llsd_rb_lock); + up_read(&llsd->llsd_rb_rwsem); out: if (rc != 0 && accessed) { struct lfsck_layout *lo = com->lc_file_ram; @@ -792,8 +805,8 @@ static void lfsck_layout_le_to_cpu(struct lfsck_layout *des, des->ll_status = le32_to_cpu(src->ll_status); des->ll_flags = le32_to_cpu(src->ll_flags); des->ll_success_count = le32_to_cpu(src->ll_success_count); - des->ll_run_time_phase1 = le32_to_cpu(src->ll_run_time_phase1); - des->ll_run_time_phase2 = le32_to_cpu(src->ll_run_time_phase2); + des->ll_run_time_phase1 = le64_to_cpu(src->ll_run_time_phase1); + des->ll_run_time_phase2 = le64_to_cpu(src->ll_run_time_phase2); des->ll_time_last_complete = le64_to_cpu(src->ll_time_last_complete); des->ll_time_latest_start = le64_to_cpu(src->ll_time_latest_start); des->ll_time_last_checkpoint = @@ -824,8 +837,8 @@ static void lfsck_layout_cpu_to_le(struct lfsck_layout *des, des->ll_status = cpu_to_le32(src->ll_status); des->ll_flags = cpu_to_le32(src->ll_flags); des->ll_success_count = cpu_to_le32(src->ll_success_count); - des->ll_run_time_phase1 = cpu_to_le32(src->ll_run_time_phase1); - des->ll_run_time_phase2 = cpu_to_le32(src->ll_run_time_phase2); + des->ll_run_time_phase1 = cpu_to_le64(src->ll_run_time_phase1); + des->ll_run_time_phase2 = cpu_to_le64(src->ll_run_time_phase2); des->ll_time_last_complete = cpu_to_le64(src->ll_time_last_complete); des->ll_time_latest_start = cpu_to_le64(src->ll_time_latest_start); des->ll_time_last_checkpoint = @@ -1438,10 +1451,13 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_layout *lo = com->lc_file_ram; + CDEBUG(D_LFSCK, "%s: layout LFSCK double scan: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + down_write(&com->lc_sem); - lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - com->lc_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase2 += ktime_get_seconds() - + com->lc_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase2 += com->lc_new_checked; if (rc > 0) { @@ -1459,8 +1475,9 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, lo->ll_status = LS_COMPLETED; } } + lo->ll_flags &= ~LF_SCANNED_ONCE; if (!(lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)) - lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT); + lo->ll_flags &= ~LF_INCONSISTENT; lo->ll_time_last_complete = lo->ll_time_last_checkpoint; lo->ll_success_count++; } else if (rc == 0) { @@ -1475,6 +1492,9 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, rc = lfsck_layout_store(env, com); up_write(&com->lc_sem); + CDEBUG(D_LFSCK, "%s: layout LFSCK double scan result %u: rc = %d\n", + lfsck_lfsck2name(lfsck), lo->ll_status, rc); + return rc; } @@ -1540,7 +1560,7 @@ static int lfsck_layout_ins_dangling_rec(const struct lu_env *env, GOTO(unlock, rc); rc = dt_insert(env, obj, (const struct dt_rec *)rec, - (const struct dt_key *)key, th, 1); + (const struct dt_key *)key, th); GOTO(unlock, rc); @@ -1779,12 +1799,13 @@ static int lfsck_layout_new_v1_lovea(const struct lu_env *env, } static int lfsck_layout_new_comp_lovea(const struct lu_env *env, - struct ost_layout *ol, - struct dt_object *parent, - struct lu_buf *buf, __u32 ea_off, - struct lov_mds_md_v1 **lmm, - struct lov_ost_data_v1 **objs) + struct lu_orphan_rec_v3 *rec, + struct dt_object *parent, + struct lu_buf *buf, __u32 ea_off, + struct lov_mds_md_v1 **lmm, + struct lov_ost_data_v1 **objs) { + struct ost_layout *ol = &rec->lor_layout; struct lov_comp_md_v1 *lcm; struct lov_comp_md_entry_v1 *lcme; __u32 pattern = LOV_PATTERN_RAID0; @@ -1799,9 +1820,22 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, lcm = buf->lb_buf; lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1); lcm->lcm_size = cpu_to_le32(size); - lcm->lcm_layout_gen = cpu_to_le32(1); - lcm->lcm_flags = 0; + if (rec->lor_range) { + lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version + + rec->lor_range); + lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING); + } else if (rec->lor_layout_version) { + lcm->lcm_layout_gen = cpu_to_le32(rec->lor_layout_version + + rec->lor_range); + lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE); + } else { + lcm->lcm_layout_gen = cpu_to_le32(1); + lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE); + } lcm->lcm_entry_count = cpu_to_le16(1); + /* Currently, we do not know how many mirrors will be, set it as zero + * at the beginning. It will be updated when more mirrors are found. */ + lcm->lcm_mirror_count = 0; lcme = &lcm->lcm_entries[0]; lcme->lcme_id = cpu_to_le32(ol->ol_comp_id); @@ -1810,6 +1844,7 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, lcme->lcme_extent.e_end = cpu_to_le64(ol->ol_comp_end); lcme->lcme_offset = cpu_to_le32(offset); lcme->lcme_size = cpu_to_le32(lcme_size); + lcme->lcme_layout_gen = lcm->lcm_layout_gen; if (ol->ol_stripe_count > 1) pattern |= LOV_PATTERN_F_HOLE; @@ -1821,15 +1856,66 @@ static int lfsck_layout_new_comp_lovea(const struct lu_env *env, return size; } -static int lfsck_layout_add_comp_comp(const struct lu_env *env, - struct lfsck_instance *lfsck, - struct thandle *handle, - struct ost_layout *ol, - struct dt_object *parent, - const struct lu_fid *cfid, - struct lu_buf *buf, __u32 ost_idx, - __u32 ea_off, int pos) +static void lfsck_layout_update_lcm(struct lov_comp_md_v1 *lcm, + struct lov_comp_md_entry_v1 *lcme, + __u32 version, __u32 range) { + struct lov_comp_md_entry_v1 *tmp; + __u64 start = le64_to_cpu(lcme->lcme_extent.e_start); + __u64 end = le64_to_cpu(lcme->lcme_extent.e_end); + __u32 gen = version + range; + __u32 tmp_gen; + int i; + __u16 count = le16_to_cpu(lcm->lcm_entry_count); + __u16 flags = le16_to_cpu(lcm->lcm_flags); + + if (!gen) + gen = 1; + lcme->lcme_layout_gen = cpu_to_le32(gen); + if (le32_to_cpu(lcm->lcm_layout_gen) < gen) + lcm->lcm_layout_gen = cpu_to_le32(gen); + + if (range) + lcm->lcm_flags = cpu_to_le16(LCM_FL_WRITE_PENDING); + else if (flags == LCM_FL_NONE && le16_to_cpu(lcm->lcm_mirror_count) > 0) + lcm->lcm_flags = cpu_to_le16(LCM_FL_RDONLY); + + for (i = 0; i < count; i++) { + tmp = &lcm->lcm_entries[i]; + if (le64_to_cpu(tmp->lcme_extent.e_end) <= start) + continue; + + if (le64_to_cpu(tmp->lcme_extent.e_start) >= end) + continue; + + if (le32_to_cpu(tmp->lcme_flags) & LCME_FL_STALE) + continue; + + tmp_gen = le32_to_cpu(tmp->lcme_layout_gen); + /* "lcme_layout_gen == 0" but without LCME_FL_STALE flag, + * then it should be the latest version of all mirrors. */ + if (tmp_gen == 0 || tmp_gen > gen) { + lcme->lcme_flags = cpu_to_le32( + le32_to_cpu(lcme->lcme_flags) | LCME_FL_STALE); + break; + } + + if (tmp_gen < gen) + tmp->lcme_flags = cpu_to_le32( + le32_to_cpu(tmp->lcme_flags) | LCME_FL_STALE); + } +} + +static int lfsck_layout_add_comp(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct thandle *handle, + struct lu_orphan_rec_v3 *rec, + struct dt_object *parent, + const struct lu_fid *cfid, + struct lu_buf *buf, __u32 ost_idx, + __u32 ea_off, int pos, bool new_mirror) +{ + struct ost_layout *ol = &rec->lor_layout; struct lov_comp_md_v1 *lcm = buf->lb_buf; struct lov_comp_md_entry_v1 *lcme; struct lov_mds_md_v1 *lmm; @@ -1849,8 +1935,9 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, * have reallocated the buf. */ lcm = buf->lb_buf; lcm->lcm_size = cpu_to_le32(size); - le32_add_cpu(&lcm->lcm_layout_gen, 1); lcm->lcm_entry_count = cpu_to_le16(count + 1); + if (new_mirror) + le16_add_cpu(&lcm->lcm_mirror_count, 1); /* 1. Move the component bodies from [pos, count-1] to [pos+1, count] * with distance of 'added'. */ @@ -1915,6 +2002,10 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, ol->ol_stripe_size, ea_off, pattern, ol->ol_stripe_count); + /* 6. Update mirror related flags and version. */ + lfsck_layout_update_lcm(lcm, lcme, rec->lor_layout_version, + rec->lor_range); + rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid, buf, lmm, objs, LU_XATTR_REPLACE, ost_idx, le32_to_cpu(lcm->lcm_size)); @@ -1922,10 +2013,12 @@ static int lfsck_layout_add_comp_comp(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant add new COMP for " DFID": parent "DFID", OST-index %u, stripe-index %u, " "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, " - "comp_end %llu, %s LOV EA hole: rc = %d\n", + "comp_end %llu, layout version %u, range %u, " + "%s LOV EA hole: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count, ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range, le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ? "with" : "without", rc); @@ -2005,24 +2098,25 @@ static int lfsck_layout_extend_v1v3_lovea(const struct lu_env *env, static int lfsck_layout_update_lovea(const struct lu_env *env, struct lfsck_instance *lfsck, struct thandle *handle, - struct ost_layout *ol, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, const struct lu_fid *cfid, struct lu_buf *buf, int fl, __u32 ost_idx, __u32 ea_off) { + struct ost_layout *ol = &rec->lor_layout; struct lov_mds_md_v1 *lmm = NULL; struct lov_ost_data_v1 *objs = NULL; int rc = 0; ENTRY; if (ol->ol_comp_id != 0) - rc = lfsck_layout_new_comp_lovea(env, ol, parent, buf, ea_off, - &lmm, &objs); + rc = lfsck_layout_new_comp_lovea(env, rec, parent, buf, ea_off, + &lmm, &objs); else - rc = lfsck_layout_new_v1_lovea(env, lfsck, ol, parent, buf, - ea_off, &lmm, &objs); - + rc = lfsck_layout_new_v1_lovea(env, lfsck, &rec->lor_layout, + parent, buf, ea_off, &lmm, + &objs); if (rc > 0) rc = lfsck_layout_refill_lovea(env, lfsck, handle, parent, cfid, buf, lmm, objs, fl, ost_idx, rc); @@ -2030,10 +2124,12 @@ static int lfsck_layout_update_lovea(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant created layout EA for " DFID": parent "DFID", OST-index %u, stripe-index %u, " "stripe_size %u, stripe_count %u, comp_id %u, comp_start %llu, " - "comp_end %llu, fl %d, %s LOV EA hole: rc = %d\n", + "comp_end %llu, layout version %u, range %u, fl %d, " + "%s LOV EA hole: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_stripe_size, ol->ol_stripe_count, - ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, fl, + ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range, fl, le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_HOLE ? "with" : "without", rc); @@ -2043,7 +2139,8 @@ static int lfsck_layout_update_lovea(const struct lu_env *env, static int __lfsck_layout_update_pfid(const struct lu_env *env, struct dt_object *child, const struct lu_fid *pfid, - const struct ost_layout *ol, __u32 offset) + const struct ost_layout *ol, __u32 offset, + __u32 version, __u32 range) { struct dt_device *dev = lfsck_obj2dev(child); struct filter_fid *ff = &lfsck_env_info(env)->lti_ff; @@ -2058,6 +2155,8 @@ static int __lfsck_layout_update_pfid(const struct lu_env *env, * parent MDT-object's layout EA. */ ff->ff_parent.f_stripe_idx = cpu_to_le32(offset); ost_layout_cpu_to_le(&ff->ff_layout, ol); + ff->ff_layout_version = cpu_to_le32(version); + ff->ff_range = cpu_to_le32(range); lfsck_buf_init(&buf, ff, sizeof(*ff)); handle = dt_trans_create(env, dev); @@ -2092,7 +2191,7 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, struct dt_object *parent, struct lu_fid *cfid, struct dt_device *cdev, - struct ost_layout *ol, __u32 ea_off) + struct lu_orphan_rec_v3 *rec, __u32 ea_off) { struct dt_object *child; int rc = 0; @@ -2104,7 +2203,9 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, rc = __lfsck_layout_update_pfid(env, child, lu_object_fid(&parent->do_lu), - ol, ea_off); + &rec->lor_layout, ea_off, + rec->lor_layout_version, + rec->lor_range); lfsck_object_put(env, child); RETURN(rc == 0 ? 1 : rc); @@ -2181,7 +2282,7 @@ static int lfsck_lovea_size(struct ost_layout *ol, __u32 ea_off) static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct lu_fid *cfid, const char *infix, const char *type, @@ -2193,7 +2294,6 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lu_attr *la = &info->lti_la2; struct dt_object_format *dof = &info->lti_dof; struct lfsck_instance *lfsck = com->lc_lfsck; - struct ost_layout *ol = &rec->lor_layout; struct lu_fid *pfid = &rec->lor_rec.lor_fid; struct lu_fid *tfid = &info->lti_fid3; struct dt_device *dev = lfsck->li_bottom; @@ -2255,7 +2355,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, * the stripe(s). The LFSCK will specify the LOV EA via * lfsck_layout_update_lovea(). */ - size = lfsck_lovea_size(ol, ea_off); + size = lfsck_lovea_size(&rec->lor_layout, ea_off); if (ea_buf->lb_len < size) { lu_buf_realloc(ea_buf, size); if (ea_buf->lb_buf == NULL) @@ -2332,14 +2432,14 @@ again: dt_write_lock(env, pobj, 0); rc = dt_create(env, pobj, la, NULL, dof, th); if (rc == 0) - rc = lfsck_layout_update_lovea(env, lfsck, th, ol, pobj, cfid, + rc = lfsck_layout_update_lovea(env, lfsck, th, rec, pobj, cfid, &lov_buf, LU_XATTR_CREATE, ltd->ltd_index, ea_off); dt_write_unlock(env, pobj); if (rc < 0) GOTO(stop, rc); rc = dt_insert(env, lpf, (const struct dt_rec *)dtrec, - (const struct dt_key *)name, th, 1); + (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); @@ -2349,7 +2449,10 @@ again: th = NULL; /* The 2nd transaction. */ - rc = __lfsck_layout_update_pfid(env, cobj, pfid, ol, ea_off); + rc = __lfsck_layout_update_pfid(env, cobj, pfid, + &rec->lor_layout, ea_off, + rec->lor_layout_version, + rec->lor_range); } GOTO(stop, rc); @@ -2477,10 +2580,10 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, memset(policy, 0, sizeof(*policy)); policy->l_extent.end = OBD_OBJECT_EOF; ost_fid_build_resid(fid, resid); - rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_EXTENT, - policy, LCK_EX, &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, - LVB_T_NONE, NULL, &lh); + rc = ldlm_cli_enqueue_local(env, lfsck->li_namespace, resid, + LDLM_EXTENT, policy, LCK_EX, &flags, + ldlm_blocking_ast, ldlm_completion_ast, + NULL, NULL, 0, LVB_T_NONE, NULL, &lh); if (rc != ELDLM_OK) GOTO(put, rc = -EIO); @@ -2552,7 +2655,7 @@ put: static int lfsck_layout_conflict_create(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, struct lu_fid *cfid, struct lu_buf *ea_buf, @@ -2656,7 +2759,7 @@ out: static int lfsck_layout_recreate_lovea(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct dt_object *parent, struct lu_fid *cfid, __u32 ost_idx, __u32 ea_off) @@ -2682,8 +2785,10 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, int rc = 0; int rc1; int i; - __u16 count; - bool locked = false; + int pos = 0; + __u16 count; + bool locked = false; + bool new_mirror = true; ENTRY; rc = lfsck_ibits_lock(env, lfsck, parent, &lh, @@ -2693,11 +2798,12 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK assistant failed to recreate " "LOV EA for "DFID": parent "DFID", OST-index %u, " "stripe-index %u, comp_id %u, comp_start %llu, " - "comp_end %llu: rc = %d\n", + "comp_end %llu, layout version %u, range %u: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(cfid), PFID(lfsck_dto2fid(parent)), ost_idx, ea_off, ol->ol_comp_id, ol->ol_comp_start, - ol->ol_comp_end, rc); + ol->ol_comp_end, rec->lor_layout_version, + rec->lor_range, rc); RETURN(rc); } @@ -2768,7 +2874,7 @@ again: LASSERT(buf->lb_len >= lovea_size); - rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent, + rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent, cfid, buf, fl, ost_idx, ea_off); GOTO(unlock_parent, rc); @@ -2784,28 +2890,44 @@ again: LASSERT(buf->lb_len >= lovea_size); - rc = lfsck_layout_update_lovea(env, lfsck, handle, ol, parent, + rc = lfsck_layout_update_lovea(env, lfsck, handle, rec, parent, cfid, buf, fl, ost_idx, ea_off); GOTO(unlock_parent, rc); } /* For other unknown magic/pattern, keep the current LOV EA. */ - if (rc1 != 0) + if (rc1 == -EOPNOTSUPP) + GOTO(unlock_parent, rc1 = 0); + + if (rc1) GOTO(unlock_parent, rc = rc1); magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_COMP_V1) { __u64 start; __u64 end; + __u16 mirror_id0 = mirror_id_of(ol->ol_comp_id); + __u16 mirror_id1; + + if (bk->lb_param & LPF_DRYRUN) + GOTO(unlock_parent, rc = 1); lcm = buf->lb_buf; count = le16_to_cpu(lcm->lcm_entry_count); - for (i = 0; i < count; i++) { + for (i = 0; i < count; pos = ++i) { lcme = &lcm->lcm_entries[i]; start = le64_to_cpu(lcme->lcme_extent.e_start); end = le64_to_cpu(lcme->lcme_extent.e_end); + mirror_id1 = mirror_id_of(le32_to_cpu(lcme->lcme_id)); + if (mirror_id0 > mirror_id1) + continue; + + if (mirror_id0 < mirror_id1) + break; + + new_mirror = false; if (end <= ol->ol_comp_start) continue; @@ -2818,8 +2940,8 @@ again: goto further; } - rc = lfsck_layout_add_comp_comp(env, lfsck, handle, ol, parent, - cfid, buf, ost_idx, ea_off, i); + rc = lfsck_layout_add_comp(env, lfsck, handle, rec, parent, + cfid, buf, ost_idx, ea_off, pos, new_mirror); GOTO(unlock_parent, rc); } @@ -2842,8 +2964,14 @@ further: goto again; } - if (lcme && !(flags & LCME_FL_INIT)) + if (lcm) { + LASSERT(lcme); + lcme->lcme_flags = cpu_to_le32(flags | LCME_FL_INIT); + lfsck_layout_update_lcm(lcm, lcme, + rec->lor_layout_version, + rec->lor_range); + } rc = lfsck_layout_extend_v1v3_lovea(env, lfsck, handle, ol, parent, cfid, buf, ost_idx, ea_off); @@ -2906,11 +3034,12 @@ further: GOTO(unlock_parent, rc = -EINVAL); } - le32_add_cpu(&lcm->lcm_layout_gen, 1); lovea_size = le32_to_cpu(lcm->lcm_size); - if (!(flags & LCME_FL_INIT)) - lcme->lcme_flags = cpu_to_le32(flags | - LCME_FL_INIT); + lcme->lcme_flags = cpu_to_le32(flags | + LCME_FL_INIT); + lfsck_layout_update_lcm(lcm, lcme, + rec->lor_layout_version, + rec->lor_range); } LASSERTF(buf->lb_len >= lovea_size, @@ -2960,7 +3089,7 @@ further: lfsck_ibits_unlock(&lh, LCK_EX); rc = lfsck_layout_update_pfid(env, com, parent, cfid, ltd->ltd_tgt, - ol, i); + rec, i); CDEBUG(D_LFSCK, "%s layout LFSCK assistant " "updated OST-object's pfid for "DFID @@ -3010,7 +3139,7 @@ unlock_layout: static int lfsck_layout_scan_orphan_one(const struct lu_env *env, struct lfsck_component *com, struct lfsck_tgt_desc *ltd, - struct lu_orphan_rec_v2 *rec, + struct lu_orphan_rec_v3 *rec, struct lu_fid *cfid) { struct lfsck_layout *lo = com->lc_file_ram; @@ -3092,7 +3221,6 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_thread_info *info = lfsck_env_info(env); - struct ost_id *oi = &info->lti_oi; struct lu_fid *fid = &info->lti_fid; struct dt_object *obj; const struct dt_it_ops *iops; @@ -3112,11 +3240,8 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, RETURN(0); } - ostid_set_seq(oi, FID_SEQ_IDIF); - ostid_set_id(oi, 0); - rc = ostid_to_fid(fid, oi, ltd->ltd_index); - if (rc != 0) - GOTO(log, rc); + fid->f_seq = fid_idif_seq(0, ltd->ltd_index); + fid->f_oid = fid->f_ver = 0; obj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, fid); if (unlikely(IS_ERR(obj))) @@ -3153,7 +3278,7 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, do { struct dt_key *key; - struct lu_orphan_rec_v2 *rec = &info->lti_rec; + struct lu_orphan_rec_v3 *rec = &info->lti_rec; if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) && unlikely(!thread_is_running(&lfsck->li_thread))) @@ -3192,9 +3317,10 @@ log: return rc > 0 ? 0 : rc; } -static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, +static int lfsck_lov2layout(struct lov_mds_md_v1 *lmm, struct filter_fid *ff, __u32 comp_id) { + struct ost_layout *ol = &ff->ff_layout; __u32 magic = le32_to_cpu(lmm->lmm_magic); int rc = 0; ENTRY; @@ -3205,9 +3331,11 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, ol->ol_comp_start = 0; ol->ol_comp_end = 0; ol->ol_comp_id = 0; + ff->ff_layout_version = 0; + ff->ff_range = 0; } else if (magic == LOV_MAGIC_COMP_V1) { struct lov_comp_md_v1 *lcm = (struct lov_comp_md_v1 *)lmm; - struct lov_comp_md_entry_v1 *lcme; + struct lov_comp_md_entry_v1 *lcme = NULL; __u16 count = le16_to_cpu(lcm->lcm_entry_count); int i; @@ -3226,11 +3354,13 @@ static int lfsck_lmm2layout(struct lov_mds_md_v1 *lmm, struct ost_layout *ol, GOTO(out, rc = 1); lmm = (void *)lmm + le32_to_cpu(lcme->lcme_offset); - ol->ol_stripe_size = lmm->lmm_stripe_size; - ol->ol_stripe_count = lmm->lmm_stripe_count; - ol->ol_comp_start = lcme->lcme_extent.e_start; - ol->ol_comp_end = lcme->lcme_extent.e_end; - ol->ol_comp_id = lcme->lcme_id; + ol->ol_stripe_size = le32_to_cpu(lmm->lmm_stripe_size); + ol->ol_stripe_count = le32_to_cpu(lmm->lmm_stripe_count); + ol->ol_comp_start = le64_to_cpu(lcme->lcme_extent.e_start); + ol->ol_comp_end = le64_to_cpu(lcme->lcme_extent.e_end); + ol->ol_comp_id = le32_to_cpu(lcme->lcme_id); + ff->ff_layout_version = le32_to_cpu(lcme->lcme_layout_gen); + ff->ff_range = 0; } else { GOTO(out, rc = -EINVAL); } @@ -3274,7 +3404,6 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *ff = &info->lti_ff; - struct ost_layout *ol = &ff->ff_layout; struct dt_object_format *dof = &info->lti_dof; struct lu_attr *la = &info->lti_la; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -3314,10 +3443,12 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); rc = lfsck_layout_get_lovea(env, parent, tbuf); - if (rc < 0) + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) GOTO(unlock1, rc); - rc = lfsck_lmm2layout(tbuf->lb_buf, ol, comp_id); + rc = lfsck_lov2layout(tbuf->lb_buf, ff, comp_id); if (rc) GOTO(unlock1, rc); @@ -3354,6 +3485,8 @@ static int __lfsck_layout_repair_dangling(const struct lu_env *env, int idx2; rc = lfsck_layout_get_lovea(env, parent, lovea); + if (unlikely(rc == -ENODATA)) + rc = 0; if (rc <= 0) GOTO(unlock2, rc); @@ -3526,7 +3659,6 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *ff = &info->lti_ff; - struct ost_layout *ol = &ff->ff_layout; struct dt_object *child = llr->llr_child; struct dt_device *dev = lfsck_obj2dev(child); const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); @@ -3551,10 +3683,12 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, ff->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx); rc = lfsck_layout_get_lovea(env, parent, tbuf); - if (rc < 0) + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) GOTO(unlock1, rc); - rc = lfsck_lmm2layout(tbuf->lb_buf, ol, llr->llr_comp_id); + rc = lfsck_lov2layout(tbuf->lb_buf, ff, llr->llr_comp_id); if (rc) GOTO(unlock1, rc); @@ -3747,8 +3881,10 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, GOTO(unlock, rc = 0); rc = lfsck_layout_get_lovea(env, parent, buf); - if (unlikely(!rc || rc == -ENODATA)) - GOTO(unlock, rc = 0); + if (unlikely(rc == -ENODATA)) + rc = 0; + if (rc <= 0) + GOTO(unlock, rc); lmm = buf->lb_buf; magic = le32_to_cpu(lmm->lmm_magic); @@ -3843,6 +3979,7 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, struct dt_device *dev = lfsck_obj2dev(child); struct thandle *handle; int rc; + dt_obj_version_t version; ENTRY; tla->la_uid = pla->la_uid; @@ -3865,14 +4002,18 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock, rc = 1); + version = dt_version_get(env, child); + if (version == -EOPNOTSUPP) + version = 0; + /* Get the latest parent's owner. */ rc = dt_attr_get(env, parent, pla); if (rc != 0) GOTO(unlock, rc); /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ - if (unlikely(tla->la_uid != pla->la_uid || - tla->la_gid != pla->la_gid)) + if (unlikely((!version && tla->la_ctime == 0) || + tla->la_uid != pla->la_uid || tla->la_gid != pla->la_gid)) rc = 1; else rc = dt_attr_set(env, child, tla, handle); @@ -3950,9 +4091,12 @@ static int lfsck_layout_check_parent(const struct lu_env *env, * is in such layout. If yes, it is multiple referenced, otherwise it * is unmatched referenced case. */ rc = lfsck_layout_get_lovea(env, tobj, buf); - if (rc == 0 || rc == -ENOENT) + if (rc == 0 || rc == -ENODATA || rc == -ENOENT) GOTO(out, rc = LLIT_UNMATCHED_PAIR); + if (unlikely(rc == -EOPNOTSUPP)) + GOTO(out, rc = LLIT_NONE); + if (rc < 0) GOTO(out, rc); @@ -4094,7 +4238,7 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (lso->lso_dead) RETURN(0); - CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val); + CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ENGINE_DELAY, cfs_fail_val); rc = dt_attr_get(env, child, cla); if (rc == -ENOENT) { @@ -4330,23 +4474,22 @@ checkpoint: if (rc < 0 && bk->lb_param & LPF_FAILOUT) GOTO(put, rc); - if (unlikely(cfs_time_beforeq(com->lc_time_next_checkpoint, - cfs_time_current())) && + if (unlikely(com->lc_time_next_checkpoint <= + ktime_get_seconds()) && com->lc_new_checked != 0) { down_write(&com->lc_sem); - lo->ll_run_time_phase2 += - cfs_duration_sec(cfs_time_current() + - HALF_SEC - com->lc_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase2 += ktime_get_seconds() - + com->lc_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase2 += com->lc_new_checked; com->lc_new_checked = 0; lfsck_layout_store(env, com); up_write(&com->lc_sem); - com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_last_checkpoint = ktime_get_seconds(); com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + - cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + LFSCK_CHECKPOINT_INTERVAL; } lfsck_control_speed_by_self(com); @@ -4412,9 +4555,9 @@ static int lfsck_layout_assistant_handler_p2(const struct lu_env *env, com->lc_new_checked = 0; com->lc_new_scanned = 0; - com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_last_checkpoint = ktime_get_seconds(); com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + - cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + LFSCK_CHECKPOINT_INTERVAL; i = lfsck_sub_trace_file_fid2idx( &lo->ll_lldk_latest_scanned_phase2.lldk_fid); @@ -4441,12 +4584,12 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, void *args, int rc) { struct lfsck_layout_slave_async_args *llsaa = args; - struct obd_export *exp = llsaa->llsaa_exp; - struct lfsck_component *com = llsaa->llsaa_com; - struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst; - struct lfsck_layout_slave_data *llsd = com->lc_data; - struct lfsck_reply *lr = NULL; - bool done = false; + struct obd_export *exp = llsaa->llsaa_exp; + struct lfsck_component *com = llsaa->llsaa_com; + struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst; + struct lfsck_layout_slave_data *llsd = com->lc_data; + struct lfsck_reply *lr = NULL; + bool done = false; if (rc != 0) { /* It is probably caused by network trouble, or target crash, @@ -4520,6 +4663,7 @@ static int lfsck_layout_async_query(const struct lu_env *env, llsaa->llsaa_llst = llst; req->rq_interpret_reply = lfsck_layout_slave_async_interpret; req->rq_allow_intr = 1; + req->rq_no_delay = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -4549,6 +4693,7 @@ static int lfsck_layout_async_notify(const struct lu_env *env, *tmp = *lr; ptlrpc_request_set_replen(req); req->rq_allow_intr = 1; + req->rq_no_delay = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -4615,7 +4760,7 @@ lfsck_layout_slave_query_master(const struct lu_env *env, } spin_unlock(&llsd->llsd_lock); - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); GOTO(log, rc = (rc1 != 0 ? rc1 : rc)); @@ -4693,7 +4838,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, } spin_unlock(&llsd->llsd_lock); - ptlrpc_set_wait(set); + ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); RETURN_EXIT; @@ -4739,9 +4884,6 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, if (rc < 0) GOTO(unlock, rc); - if (rc == 0) - GOTO(unlock, rc = -ENODATA); - lmm = buf->lb_buf; magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_COMP_V1) { @@ -4894,6 +5036,8 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env, rc = __lfsck_layout_update_pfid(env, obj, &lrl->lrl_ff_client.ff_parent, &lrl->lrl_ff_client.ff_layout, + lrl->lrl_ff_client.ff_layout_version, + lrl->lrl_ff_client.ff_range, lrl->lrl_ff_client.ff_parent.f_ver); GOTO(unlock, rc); @@ -4926,7 +5070,7 @@ static int lfsck_layout_reset(const struct lu_env *env, memset(lo, 0, com->lc_file_size); } else { __u32 count = lo->ll_success_count; - __u64 last_time = lo->ll_time_last_complete; + time64_t last_time = lo->ll_time_last_complete; memset(lo, 0, com->lc_file_size); lo->ll_success_count = count; @@ -4987,9 +5131,9 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, } else { lo->ll_pos_last_checkpoint = lfsck->li_pos_checkpoint.lp_oit_cookie; - lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase1 += com->lc_new_checked; com->lc_new_checked = 0; } @@ -5021,9 +5165,9 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, } else { lo->ll_pos_last_checkpoint = lfsck->li_pos_checkpoint.lp_oit_cookie; - lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase1 += com->lc_new_checked; com->lc_new_checked = 0; } @@ -5067,7 +5211,7 @@ static int lfsck_layout_prep(const struct lu_env *env, } down_write(&com->lc_sem); - lo->ll_time_latest_start = cfs_time_current_sec(); + lo->ll_time_latest_start = ktime_get_real_seconds(); spin_lock(&lfsck->li_lock); if (lo->ll_flags & LF_SCANNED_ONCE) { if (!lfsck->li_drop_dryrun || @@ -5135,9 +5279,9 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, if (rc == 0 && start != NULL && start->ls_flags & LPF_OST_ORPHAN) { LASSERT(!llsd->llsd_rbtree_valid); - write_lock(&llsd->llsd_rb_lock); + down_write(&llsd->llsd_rb_rwsem); rc = lfsck_rbtree_setup(env, com); - write_unlock(&llsd->llsd_rb_lock); + up_write(&llsd->llsd_rb_rwsem); } CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos [" @@ -5285,13 +5429,8 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, * cause the parent object cannot be purged, then cause the * child object cannot be purged also. So the LFSCK thread * will fall into deadlock. - * - * We introduce non-blocked version lu_object_find() to allow - * the LFSCK thread to return failure immediately (instead of - * wait) when it finds dying (child) object, then the LFSCK - * thread can check whether the parent object is dying or not. - * So avoid above deadlock. LU-5395 */ - cobj = lfsck_object_find_by_dev_nowait(env, tgt->ltd_tgt, fid); + */ + cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid); if (IS_ERR(cobj)) { if (lfsck_is_dead_obj(parent)) { lfsck_tgt_put(tgt); @@ -5303,16 +5442,13 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, goto next; } - if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) { - rc = dt_declare_attr_get(env, cobj); - if (rc != 0) - goto next; + rc = dt_declare_attr_get(env, cobj); + if (rc) + goto next; - rc = dt_declare_xattr_get(env, cobj, &buf, - XATTR_NAME_FID); - if (rc != 0) - goto next; - } + rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID); + if (rc) + goto next; if (lso == NULL) { struct lu_attr *attr = &info->lti_la; @@ -5436,10 +5572,12 @@ again: GOTO(out, rc = 0); rc = lfsck_layout_get_lovea(env, obj, buf); - if (rc <= 0) + if (rc == -EINVAL || rc == -ENODATA || rc == -EOPNOTSUPP) /* Skip bad lov EA during the 1st cycle scanning, and * try to recover it via orphan in the 2nd scanning. */ - GOTO(out, rc = (rc == -EINVAL ? 0 : rc)); + rc = 0; + if (rc <= 0) + GOTO(out, rc); size = rc; lmm = buf->lb_buf; @@ -5729,9 +5867,9 @@ static int lfsck_layout_master_post(const struct lu_env *env, spin_unlock(&lfsck->li_lock); if (!init) { - lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase1 += com->lc_new_checked; com->lc_new_checked = 0; } @@ -5797,9 +5935,9 @@ static int lfsck_layout_slave_post(const struct lu_env *env, LE_LASTID_REBUILT); if (!init) { - lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_run_time_phase1 += ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + lo->ll_time_last_checkpoint = ktime_get_real_seconds(); lo->ll_objs_checked_phase1 += com->lc_new_checked; com->lc_new_checked = 0; } @@ -5821,8 +5959,14 @@ static void lfsck_layout_dump(const struct lu_env *env, struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; + const char *prefix; down_read(&com->lc_sem); + if (bk->lb_param & LPF_DRYRUN) + prefix = "inconsistent"; + else + prefix = "repaired"; + seq_printf(m, "name: lfsck_layout\n" "magic: %#x\n" "version: %d\n" @@ -5849,46 +5993,44 @@ static void lfsck_layout_dump(const struct lu_env *env, lo->ll_pos_first_inconsistent); seq_printf(m, "success_count: %u\n" - "repaired_dangling: %llu\n" - "repaired_unmatched_pair: %llu\n" - "repaired_multiple_referenced: %llu\n" - "repaired_orphan: %llu\n" - "repaired_inconsistent_owner: %llu\n" - "repaired_others: %llu\n" + "%s_dangling: %llu\n" + "%s_unmatched_pair: %llu\n" + "%s_multiple_referenced: %llu\n" + "%s_orphan: %llu\n" + "%s_inconsistent_owner: %llu\n" + "%s_others: %llu\n" "skipped: %llu\n" "failed_phase1: %llu\n" "failed_phase2: %llu\n", lo->ll_success_count, - lo->ll_objs_repaired[LLIT_DANGLING - 1], - lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], - lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], - lo->ll_objs_repaired[LLIT_ORPHAN - 1], - lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], - lo->ll_objs_repaired[LLIT_OTHERS - 1], + prefix, lo->ll_objs_repaired[LLIT_DANGLING - 1], + prefix, lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], + prefix, lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], + prefix, lo->ll_objs_repaired[LLIT_ORPHAN - 1], + prefix, lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], + prefix, lo->ll_objs_repaired[LLIT_OTHERS - 1], lo->ll_objs_skipped, lo->ll_objs_failed_phase1, lo->ll_objs_failed_phase2); if (lo->ll_status == LS_SCANNING_PHASE1) { - __u64 pos; - cfs_duration_t duration = cfs_time_current() - - lfsck->li_time_last_checkpoint; - __u64 checked = lo->ll_objs_checked_phase1 + - com->lc_new_checked; - __u64 speed = checked; - __u64 new_checked = com->lc_new_checked * - msecs_to_jiffies(MSEC_PER_SEC); - __u32 rtime = lo->ll_run_time_phase1 + - cfs_duration_sec(duration + HALF_SEC); + time64_t duration = ktime_get_seconds() - + lfsck->li_time_last_checkpoint; + u64 checked = lo->ll_objs_checked_phase1 + + com->lc_new_checked; + u64 speed = checked; + u64 new_checked = com->lc_new_checked; + time64_t rtime = lo->ll_run_time_phase1 + duration; + u64 pos; if (duration != 0) - do_div(new_checked, duration); + new_checked = div64_s64(new_checked, duration); if (rtime != 0) - do_div(speed, rtime); + speed = div64_s64(speed, rtime); seq_printf(m, "checked_phase1: %llu\n" "checked_phase2: %llu\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" + "run_time_phase1: %lld seconds\n" + "run_time_phase2: %lld seconds\n" "average_speed_phase1: %llu items/sec\n" "average_speed_phase2: N/A\n" "real-time_speed_phase1: %llu items/sec\n" @@ -5917,27 +6059,25 @@ static void lfsck_layout_dump(const struct lu_env *env, seq_printf(m, "current_position: %llu\n", pos); } else if (lo->ll_status == LS_SCANNING_PHASE2) { - cfs_duration_t duration = cfs_time_current() - - com->lc_time_last_checkpoint; - __u64 checked = lo->ll_objs_checked_phase2 + - com->lc_new_checked; - __u64 speed1 = lo->ll_objs_checked_phase1; - __u64 speed2 = checked; - __u64 new_checked = com->lc_new_checked * - msecs_to_jiffies(MSEC_PER_SEC); - __u32 rtime = lo->ll_run_time_phase2 + - cfs_duration_sec(duration + HALF_SEC); + time64_t duration = ktime_get_seconds() - + com->lc_time_last_checkpoint; + u64 checked = lo->ll_objs_checked_phase2 + + com->lc_new_checked; + u64 speed1 = lo->ll_objs_checked_phase1; + u64 speed2 = checked; + u64 new_checked = com->lc_new_checked; + time64_t rtime = lo->ll_run_time_phase2 + duration; if (duration != 0) - do_div(new_checked, duration); + new_checked = div64_s64(new_checked, duration); if (lo->ll_run_time_phase1 != 0) - do_div(speed1, lo->ll_run_time_phase1); + speed1 = div64_s64(speed1, lo->ll_run_time_phase1); if (rtime != 0) - do_div(speed2, rtime); + speed2 = div64_s64(speed2, rtime); seq_printf(m, "checked_phase1: %llu\n" "checked_phase2: %llu\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" + "run_time_phase1: %lld seconds\n" + "run_time_phase2: %lld seconds\n" "average_speed_phase1: %llu items/sec\n" "average_speed_phase2: %llu items/sec\n" "real-time_speed_phase1: N/A\n" @@ -5956,13 +6096,13 @@ static void lfsck_layout_dump(const struct lu_env *env, __u64 speed2 = lo->ll_objs_checked_phase2; if (lo->ll_run_time_phase1 != 0) - do_div(speed1, lo->ll_run_time_phase1); + speed1 = div64_s64(speed1, lo->ll_run_time_phase1); if (lo->ll_run_time_phase2 != 0) - do_div(speed2, lo->ll_run_time_phase2); + speed2 = div64_s64(speed2, lo->ll_run_time_phase2); seq_printf(m, "checked_phase1: %llu\n" "checked_phase2: %llu\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" + "run_time_phase1: %lld seconds\n" + "run_time_phase2: %lld seconds\n" "average_speed_phase1: %llu items/sec\n" "average_speed_phase2: %llu objs/sec\n" "real-time_speed_phase1: N/A\n" @@ -6037,9 +6177,9 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, com->lc_new_checked = 0; com->lc_new_scanned = 0; - com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_last_checkpoint = ktime_get_seconds(); com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + - cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + LFSCK_CHECKPOINT_INTERVAL; while (1) { struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30), @@ -6307,12 +6447,12 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, break; case LE_PHASE2_DONE: ltd->ltd_layout_done = 1; - if (!list_empty(<d->ltd_layout_list)) { + if (!list_empty(<d->ltd_layout_list)) list_del_init(<d->ltd_layout_list); - if (lr->lr_flags2 & LF_INCOMPLETE) { - lfsck_lad_set_bitmap(env, com, ltd->ltd_index); - fail = true; - } + + if (lr->lr_flags2 & LF_INCOMPLETE) { + lfsck_lad_set_bitmap(env, com, ltd->ltd_index); + fail = true; } break; @@ -6687,7 +6827,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) INIT_LIST_HEAD(&llsd->llsd_master_list); spin_lock_init(&llsd->llsd_lock); llsd->llsd_rb_root = RB_ROOT; - rwlock_init(&llsd->llsd_rb_lock); + init_rwsem(&llsd->llsd_rb_rwsem); com->lc_data = llsd; } com->lc_file_size = sizeof(*lo); @@ -6714,14 +6854,17 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) com->lc_obj = obj; rc = lfsck_layout_load(env, com); - if (rc > 0) + if (rc > 0) { rc = lfsck_layout_reset(env, com, true); - else if (rc == -ENOENT) + } else if (rc == -ENOENT) { rc = lfsck_layout_init(env, com); - else if (lfsck->li_master) + } else if (lfsck->li_master) { rc = lfsck_load_sub_trace_files(env, com, &dt_lfsck_layout_dangling_features, LFSCK_LAYOUT, false); + if (rc) + rc = lfsck_layout_reset(env, com, true); + } if (rc != 0) GOTO(out, rc); @@ -6788,7 +6931,7 @@ struct lfsck_orphan_it { struct lfsck_rbtree_node *loi_lrn; struct lfsck_layout_slave_target *loi_llst; struct lu_fid loi_key; - struct lu_orphan_rec_v2 loi_rec; + struct lu_orphan_rec_v3 loi_rec; __u64 loi_hash; unsigned int loi_over:1; }; @@ -6893,8 +7036,7 @@ static int lfsck_orphan_index_insert(const struct lu_env *env, struct dt_object *dt, const struct dt_rec *rec, const struct dt_key *key, - struct thandle *handle, - int ignore_quota) + struct thandle *handle) { return -EOPNOTSUPP; } @@ -6955,7 +7097,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, if (dev->dd_record_fid_accessed) { /* The first iteration against the rbtree, scan the whole rbtree * to remove the nodes which do NOT need to be handled. */ - write_lock(&llsd->llsd_rb_lock); + down_write(&llsd->llsd_rb_rwsem); if (dev->dd_record_fid_accessed) { struct rb_node *node; struct rb_node *next; @@ -6977,11 +7119,11 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, node = next; } } - write_unlock(&llsd->llsd_rb_lock); + up_write(&llsd->llsd_rb_rwsem); } /* read lock the rbtree when init, and unlock when fini */ - read_lock(&llsd->llsd_rb_lock); + down_read(&llsd->llsd_rb_rwsem); it->loi_com = com; com = NULL; @@ -7018,7 +7160,7 @@ static void lfsck_orphan_it_fini(const struct lu_env *env, lfsck_lfsck2name(com->lc_lfsck)); llsd = com->lc_data; - read_unlock(&llsd->llsd_rb_lock); + up_read(&llsd->llsd_rb_rwsem); llst = it->loi_llst; LASSERT(llst != NULL); @@ -7044,7 +7186,7 @@ static int lfsck_orphan_it_next(const struct lu_env *env, struct lu_attr *la = &info->lti_la; struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di; struct lu_fid *key = &it->loi_key; - struct lu_orphan_rec_v2 *rec = &it->loi_rec; + struct lu_orphan_rec_v3 *rec = &it->loi_rec; struct ost_layout *ol = &rec->lor_layout; struct lfsck_component *com = it->loi_com; struct lfsck_instance *lfsck = com->lc_lfsck; @@ -7185,6 +7327,8 @@ again1: rec->lor_rec.lor_uid = la->la_uid; rec->lor_rec.lor_gid = la->la_gid; memset(ol, 0, sizeof(*ol)); + rec->lor_layout_version = 0; + rec->lor_range = 0; GOTO(out, rc = 0); } @@ -7220,13 +7364,18 @@ again1: rec->lor_rec.lor_uid = la->la_uid; rec->lor_rec.lor_gid = la->la_gid; ost_layout_le_to_cpu(ol, &ff->ff_layout); + rec->lor_layout_version = + le32_to_cpu(ff->ff_layout_version & ~LU_LAYOUT_RESYNC); + rec->lor_range = le32_to_cpu(ff->ff_range); CDEBUG(D_LFSCK, "%s: return orphan "DFID", PFID "DFID", owner %u:%u, " "stripe size %u, stripe count %u, COMP id %u, COMP start %llu, " - "COMP end %llu\n", lfsck_lfsck2name(com->lc_lfsck), PFID(key), + "COMP end %llu, layout version %u, range %u\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(key), PFID(&rec->lor_rec.lor_fid), rec->lor_rec.lor_uid, rec->lor_rec.lor_gid, ol->ol_stripe_size, ol->ol_stripe_count, - ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end); + ol->ol_comp_id, ol->ol_comp_start, ol->ol_comp_end, + rec->lor_layout_version, rec->lor_range); GOTO(out, rc = 0); @@ -7289,7 +7438,7 @@ static int lfsck_orphan_it_rec(const struct lu_env *env, { struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di; - *(struct lu_orphan_rec_v2 *)rec = it->loi_rec; + *(struct lu_orphan_rec_v3 *)rec = it->loi_rec; return 0; }