X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_layout.c;h=8ce60862d83587ae52ce8ee3167a2ed5f5352a6e;hp=00ef9cff3c09adaafe544942479268ecc9f26fda;hb=cffd726304667ea3d84b4b3c1a9b66565e5a5566;hpb=094030bab406b0ea5d45f711549327829b68c9cd diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 00ef9cf..8ce6086 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2014, Intel Corporation. + * Copyright (c) 2014, 2016, Intel Corporation. */ /* * lustre/lfsck/lfsck_layout.c @@ -72,6 +72,8 @@ struct lfsck_layout_slave_target { __u64 llst_gen; atomic_t llst_ref; __u32 llst_index; + /* How many times we have failed to get the master status. */ + int llst_failures; }; struct lfsck_layout_slave_data { @@ -88,50 +90,12 @@ struct lfsck_layout_slave_data { unsigned int llsd_rbtree_valid:1; }; -struct lfsck_layout_object { - struct lu_attr llo_attr; - atomic_t llo_ref; - __u64 llo_cookie; -}; - -struct lfsck_layout_req { - struct lfsck_assistant_req llr_lar; - struct lfsck_layout_object *llr_parent; - struct dt_object *llr_child; - __u32 llr_ost_idx; - __u32 llr_lov_idx; /* offset in LOV EA */ -}; - struct lfsck_layout_slave_async_args { struct obd_export *llsaa_exp; struct lfsck_component *llsaa_com; struct lfsck_layout_slave_target *llsaa_llst; }; -static struct lfsck_layout_object * -lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj, - __u64 cookie) -{ - struct lfsck_layout_object *llo; - int rc; - - OBD_ALLOC_PTR(llo); - if (llo == NULL) - return ERR_PTR(-ENOMEM); - - rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA); - if (rc != 0) { - OBD_FREE_PTR(llo); - - return ERR_PTR(rc); - } - - llo->llo_cookie = cookie; - atomic_set(&llo->llo_ref, 1); - - return llo; -} - static inline void lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst) { @@ -215,16 +179,8 @@ lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd, return NULL; } -static inline void lfsck_layout_object_put(const struct lu_env *env, - struct lfsck_layout_object *llo) -{ - if (atomic_dec_and_test(&llo->llo_ref)) - OBD_FREE_PTR(llo); -} - static struct lfsck_layout_req * -lfsck_layout_assistant_req_init(struct lfsck_layout_object *parent, - const struct lu_fid *pfid, +lfsck_layout_assistant_req_init(struct lfsck_assistant_object *lso, struct dt_object *child, __u32 ost_idx, __u32 lov_idx) { @@ -235,10 +191,7 @@ lfsck_layout_assistant_req_init(struct lfsck_layout_object *parent, return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&llr->llr_lar.lar_list); - llr->llr_lar.lar_fid = *pfid; - - atomic_inc(&parent->llo_ref); - llr->llr_parent = parent; + llr->llr_lar.lar_parent = lfsck_assistant_object_get(lso); llr->llr_child = child; llr->llr_ost_idx = ost_idx; llr->llr_lov_idx = lov_idx; @@ -253,7 +206,7 @@ static void lfsck_layout_assistant_req_fini(const struct lu_env *env, container_of0(lar, struct lfsck_layout_req, llr_lar); lfsck_object_put(env, llr->llr_child); - lfsck_layout_object_put(env, llr->llr_parent); + lfsck_assistant_object_put(env, lar->lar_parent); OBD_FREE_PTR(llr); } @@ -327,7 +280,7 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env, down_read(<ds->ltd_rw_sem); cfs_foreach_bit(lad->lad_bitmap, idx) { - ltd = LTD_TGT(ltds, idx); + ltd = lfsck_ltd2tgt(ltds, idx); LASSERT(ltd != NULL); laia->laia_ltd = ltd; @@ -373,10 +326,9 @@ static int lfsck_layout_get_lovea(const struct lu_env *env, int rc; again: - rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV); if (rc == -ERANGE) { - rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV, - BYPASS_CAPA); + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV); if (rc <= 0) return rc; @@ -445,7 +397,7 @@ static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm) return 0; } -#define LFSCK_RBTREE_BITMAP_SIZE PAGE_CACHE_SIZE +#define LFSCK_RBTREE_BITMAP_SIZE PAGE_SIZE #define LFSCK_RBTREE_BITMAP_WIDTH (LFSCK_RBTREE_BITMAP_SIZE << 3) #define LFSCK_RBTREE_BITMAP_MASK (LFSCK_RBTREE_BITMAP_WIDTH - 1) @@ -764,6 +716,8 @@ static void lfsck_layout_le_to_cpu(struct lfsck_layout *des, le64_to_cpu(src->ll_objs_repaired[i]); des->ll_objs_skipped = le64_to_cpu(src->ll_objs_skipped); des->ll_bitmap_size = le32_to_cpu(src->ll_bitmap_size); + fid_le_to_cpu(&des->ll_fid_latest_scanned_phase2, + &src->ll_fid_latest_scanned_phase2); } static void lfsck_layout_cpu_to_le(struct lfsck_layout *des, @@ -794,6 +748,8 @@ static void lfsck_layout_cpu_to_le(struct lfsck_layout *des, cpu_to_le64(src->ll_objs_repaired[i]); des->ll_objs_skipped = cpu_to_le64(src->ll_objs_skipped); des->ll_bitmap_size = cpu_to_le32(src->ll_bitmap_size); + fid_cpu_to_le(&des->ll_fid_latest_scanned_phase2, + &src->ll_fid_latest_scanned_phase2); } /** @@ -811,7 +767,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env, struct dt_object *obj = com->lc_obj; struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_layout *lo = com->lc_file_ram; - cfs_bitmap_t *bitmap = lad->lad_bitmap; + struct cfs_bitmap *bitmap = lad->lad_bitmap; loff_t pos = com->lc_file_size; ssize_t size; __u32 nbits; @@ -829,7 +785,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env, if (nbits > bitmap->size) { __u32 new_bits = bitmap->size; - cfs_bitmap_t *new_bitmap; + struct cfs_bitmap *new_bitmap; while (new_bits < nbits) new_bits <<= 1; @@ -937,7 +893,7 @@ static int lfsck_layout_store(const struct lu_env *env, struct lfsck_layout *lo = com->lc_file_disk; struct thandle *th; struct dt_device *dev = lfsck_obj2dev(obj); - cfs_bitmap_t *bitmap = NULL; + struct cfs_bitmap *bitmap = NULL; loff_t pos; ssize_t size = com->lc_file_size; __u32 nbits = 0; @@ -1013,6 +969,9 @@ static int lfsck_layout_init(const struct lu_env *env, lo->ll_status = LS_INIT; down_write(&com->lc_sem); rc = lfsck_layout_store(env, com); + if (rc == 0 && com->lc_lfsck->li_master) + rc = lfsck_load_sub_trace_files(env, com, + &dt_lfsck_layout_dangling_features, LFSCK_LAYOUT, true); up_write(&com->lc_sem); return rc; @@ -1038,14 +997,14 @@ static int fid_is_for_ostobj(const struct lu_env *env, lma = &lfsck_env_info(env)->lti_lma; rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)), - XATTR_NAME_LMA, BYPASS_CAPA); + XATTR_NAME_LMA); if (rc == sizeof(*lma)) { lustre_lma_swab(lma); return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0; } - rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA); + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID); return rc > 0; } @@ -1143,7 +1102,7 @@ stop: log: CDEBUG(D_LFSCK, "%s: layout LFSCK will create LAST_ID for " - LPX64": rc = %d\n", + "%#llx: rc = %d\n", lfsck_lfsck2name(lfsck), fid_seq(lfsck_dto2fid(obj)), rc); return rc; @@ -1180,8 +1139,8 @@ lfsck_layout_lastid_reload(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " - "LAST_ID file (1) for the sequence "LPX64 - ", old value "LPU64", known value "LPU64"\n", + "LAST_ID file (1) for the sequence %#llx" + ", old value %llu, known value %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lastid, lls->lls_lastid); } @@ -1214,7 +1173,7 @@ lfsck_layout_lastid_store(const struct lu_env *env, continue; CDEBUG(D_LFSCK, "%s: layout LFSCK will sync the LAST_ID for " - " "LPX64" as "LPU64"\n", + " %#llx as %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid); if (bk->lb_param & LPF_DRYRUN) { @@ -1226,7 +1185,7 @@ lfsck_layout_lastid_store(const struct lu_env *env, if (IS_ERR(th)) { rc1 = PTR_ERR(th); CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " - "the LAST_ID for "LPX64"(1): rc = %d\n", + "the LAST_ID for %#llx(1): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); continue; @@ -1257,7 +1216,7 @@ stop: if (rc != 0) { rc1 = rc; CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " - "the LAST_ID for "LPX64"(2): rc = %d\n", + "the LAST_ID for %#llx(2): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); } @@ -1294,7 +1253,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK cannot find the " - "LAST_ID file for sequence "LPX64"\n", + "LAST_ID file for sequence %#llx\n", lfsck_lfsck2name(lfsck), lls->lls_seq); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) && @@ -1337,7 +1296,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds invalid " - "LAST_ID file for the sequence "LPX64 + "LAST_ID file for the sequence %#llx" ": rc = %d\n", lfsck_lfsck2name(lfsck), lls->lls_seq, rc); } @@ -1371,7 +1330,7 @@ static void lfsck_layout_record_failure(const struct lu_env *env, lo->ll_pos_first_inconsistent = cookie; CDEBUG(D_LFSCK, "%s: layout LFSCK hit first non-repaired " - "inconsistency at the pos ["LPU64"]\n", + "inconsistency at the pos [%llu]\n", lfsck_lfsck2name(lfsck), lo->ll_pos_first_inconsistent); } @@ -1386,7 +1345,7 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, down_write(&com->lc_sem); lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); + HALF_SEC - com->lc_time_last_checkpoint); lo->ll_time_last_checkpoint = cfs_time_current_sec(); lo->ll_objs_checked_phase2 += com->lc_new_checked; @@ -1430,12 +1389,122 @@ static int lfsck_layout_trans_stop(const struct lu_env *env, { int rc; + /* XXX: If there is something worng or it needs to repair nothing, + * then notify the lower to stop the modification. Currently, + * we use th_result for such purpose, that may be replaced by + * some rollback mechanism in the future. */ handle->th_result = result; rc = dt_trans_stop(env, dev, handle); - if (rc > 0) - rc = 0; - else if (rc == 0) - rc = 1; + if (result != 0) + return result > 0 ? 0 : result; + + return rc == 0 ? 1 : rc; +} + +static int lfsck_layout_ins_dangling_rec(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *pfid, + const struct lu_fid *cfid, + __u32 ea_off, __u32 ost_idx) +{ + struct lu_fid *key = &lfsck_env_info(env)->lti_fid3; + struct lu_fid *rec = &lfsck_env_info(env)->lti_fid4; + struct dt_device *dev; + struct dt_object *obj; + struct thandle *th = NULL; + int idx; + int rc = 0; + ENTRY; + + idx = lfsck_sub_trace_file_fid2idx(pfid); + obj = com->lc_sub_trace_objs[idx].lsto_obj; + dev = lfsck_obj2dev(obj); + fid_cpu_to_be(key, pfid); + key->f_ver = cpu_to_be32(ea_off); + fid_cpu_to_be(rec, cfid); + rec->f_ver = cpu_to_be32(ost_idx); + + mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + GOTO(unlock, rc = PTR_ERR(th)); + + rc = dt_declare_insert(env, obj, + (const struct dt_rec *)rec, + (const struct dt_key *)key, th); + if (rc) + GOTO(unlock, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc) + GOTO(unlock, rc); + + rc = dt_insert(env, obj, (const struct dt_rec *)rec, + (const struct dt_key *)key, th, 1); + + GOTO(unlock, rc); + +unlock: + if (th != NULL && !IS_ERR(th)) + dt_trans_stop(env, dev, th); + + mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); + + CDEBUG(D_LFSCK, "%s: insert the paris "DFID" => "DFID", ea_off = %u, " + "ost_idx = %u, into the trace file for further dangling check: " + "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + PFID(pfid), PFID(cfid), ea_off, ost_idx, rc); + + return rc; +} + +static int lfsck_layout_del_dangling_rec(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *fid, + __u32 ea_off) +{ + struct lu_fid *key = &lfsck_env_info(env)->lti_fid3; + struct dt_device *dev; + struct dt_object *obj; + struct thandle *th = NULL; + int idx; + int rc = 0; + ENTRY; + + idx = lfsck_sub_trace_file_fid2idx(fid); + obj = com->lc_sub_trace_objs[idx].lsto_obj; + dev = lfsck_obj2dev(obj); + fid_cpu_to_be(key, fid); + key->f_ver = cpu_to_be32(ea_off); + + mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex); + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + GOTO(unlock, rc = PTR_ERR(th)); + + rc = dt_declare_delete(env, obj, (const struct dt_key *)key, th); + if (rc) + GOTO(unlock, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc) + GOTO(unlock, rc); + + rc = dt_delete(env, obj, (const struct dt_key *)key, th); + + GOTO(unlock, rc); + +unlock: + if (th != NULL && !IS_ERR(th)) + dt_trans_stop(env, dev, th); + + mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex); + + CDEBUG(D_LFSCK, "%s: delete the dangling record for "DFID + ", ea_off = %u from the trace file: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), PFID(fid), ea_off, rc); return rc; } @@ -1464,7 +1533,7 @@ static int lfsck_layout_get_def_stripesize(const struct lu_env *env, /* Get the default stripe size via xattr_get on the backend root. */ rc = dt_xattr_get(env, root, lfsck_buf_get(env, lum, sizeof(*lum)), - XATTR_NAME_LOV, BYPASS_CAPA); + XATTR_NAME_LOV); if (rc > 0) { /* The lum->lmm_stripe_size is LE mode. The *size also * should be LE mode. So it is unnecessary to convert. */ @@ -1528,8 +1597,7 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, } lfsck_buf_init(&ea_buf, lmm, lov_mds_md_size(count, magic)); - rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, fl, handle, - BYPASS_CAPA); + rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, fl, handle); if (rc == 0) rc = 1; @@ -1650,8 +1718,7 @@ static int __lfsck_layout_update_pfid(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - rc = dt_xattr_set(env, child, &buf, XATTR_NAME_FID, 0, handle, - BYPASS_CAPA); + rc = dt_xattr_set(env, child, &buf, XATTR_NAME_FID, 0, handle); GOTO(stop, rc); @@ -1754,7 +1821,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct dt_insert_rec *dtrec = &info->lti_dt_rec; char *name = info->lti_key; - struct lu_attr *la = &info->lti_la; + struct lu_attr *la = &info->lti_la2; struct dt_object_format *dof = &info->lti_dof; struct lfsck_instance *lfsck = com->lc_lfsck; struct lu_fid *pfid = &rec->lor_fid; @@ -1766,7 +1833,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct thandle *th = NULL; struct lu_buf *ea_buf = &info->lti_big_buf; struct lu_buf lov_buf; - struct lustre_handle lh = { 0 }; + struct lfsck_lock_handle *llh = &info->lti_llh; struct linkea_data ldata = { NULL }; struct lu_buf linkea_buf; const struct lu_name *pname; @@ -1806,25 +1873,6 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, LASSERT(infix != NULL); LASSERT(type != NULL); - do { - snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, - type, idx++); - rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, - (const struct dt_key *)name, BYPASS_CAPA); - if (rc != 0 && rc != -ENOENT) - GOTO(log, rc); - } while (rc == 0); - - rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf); - if (rc != 0) - GOTO(log, rc); - - pname = lfsck_name_get_const(env, name, strlen(name)); - rc = linkea_add_buf(&ldata, pname, lfsck_dto2fid(lfsck->li_lpf_obj)); - if (rc != 0) - GOTO(log, rc); - memset(la, 0, sizeof(*la)); la->la_uid = rec->lor_uid; la->la_gid = rec->lor_gid; @@ -1844,17 +1892,39 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, GOTO(log, rc = -ENOMEM); } - /* Hold update lock on the .lustre/lost+found/MDTxxxx/. - * - * XXX: Currently, we do not grab the PDO lock as normal create cases, - * because creating MDT-object for orphan OST-object is rare, we - * do not much care about the performance. It can be improved in - * the future when needed. */ - rc = lfsck_ibits_lock(env, lfsck, lpf, &lh, - MDS_INODELOCK_UPDATE, LCK_EX); +again: + do { + snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, + type, idx++); + rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, + (const struct dt_key *)name); + if (rc != 0 && rc != -ENOENT) + GOTO(log, rc); + } while (rc == 0); + + rc = lfsck_lock(env, lfsck, lfsck->li_lpf_obj, name, llh, + MDS_INODELOCK_UPDATE, LCK_PW); if (rc != 0) GOTO(log, rc); + /* Re-check whether the name conflict with othrs after taken + * the ldlm lock. */ + rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, + (const struct dt_key *)name); + if (unlikely(rc == 0)) { + lfsck_unlock(llh); + goto again; + } + + if (rc != -ENOENT) + GOTO(unlock, rc); + + pname = lfsck_name_get_const(env, name, strlen(name)); + rc = linkea_links_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf, + pname, lfsck_dto2fid(lfsck->li_lpf_obj)); + if (rc != 0) + GOTO(unlock, rc); + /* The 1st transaction. */ th = dt_trans_create(env, dev); if (IS_ERR(th)) @@ -1899,12 +1969,11 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, GOTO(stop, rc); rc = dt_insert(env, lpf, (const struct dt_rec *)dtrec, - (const struct dt_key *)name, th, BYPASS_CAPA, 1); + (const struct dt_key *)name, th, 1); if (rc != 0) GOTO(stop, rc); - rc = dt_xattr_set(env, pobj, &linkea_buf, - XATTR_NAME_LINK, 0, th, BYPASS_CAPA); + rc = dt_xattr_set(env, pobj, &linkea_buf, XATTR_NAME_LINK, 0, th); if (rc == 0 && cobj != NULL) { dt_trans_stop(env, dev, th); th = NULL; @@ -1920,7 +1989,7 @@ stop: dt_trans_stop(env, dev, th); unlock: - lfsck_ibits_unlock(&lh, LCK_EX); + lfsck_unlock(llh); log: if (cobj != NULL && !IS_ERR(cobj)) @@ -1999,7 +2068,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_attr *la = &info->lti_la; - ldlm_policy_data_t *policy = &info->lti_policy; + union ldlm_policy_data *policy = &info->lti_policy; struct ldlm_res_id *resid = &info->lti_resid; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_device *dev = lfsck->li_bottom; @@ -2024,7 +2093,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, } /* Get obj's attr without lock firstly. */ - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); dt_read_unlock(env, obj); if (rc != 0) GOTO(put, rc); @@ -2047,7 +2116,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, dt_write_lock(env, obj, 0); /* Get obj's attr within lock again. */ - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); if (rc != 0) GOTO(unlock, rc); @@ -2131,12 +2200,16 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, int rc = 0; ENTRY; + while (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val)) { + if (unlikely(!thread_is_running(&com->lc_lfsck->li_thread))) + RETURN(0); + } + ostid_le_to_cpu(&slot->l_ost_oi, oi); rc = ostid_to_fid(cfid2, oi, ost_idx2); if (rc != 0) GOTO(out, rc); - /* Hold layout lock on the parent to prevent others to access. */ rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, LCK_EX); @@ -2289,10 +2362,9 @@ again: dt_write_lock(env, parent, 0); locked = true; - rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV); if (rc == -ERANGE) { - rc = dt_xattr_get(env, parent, &LU_BUF_NULL, XATTR_NAME_LOV, - BYPASS_CAPA); + rc = dt_xattr_get(env, parent, &LU_BUF_NULL, XATTR_NAME_LOV); LASSERT(rc != 0); goto again; } else if (rc == -ENODATA || rc == 0) { @@ -2525,6 +2597,12 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, if (!S_ISREG(lu_object_attr(&parent->do_lu))) GOTO(put, rc = -EISDIR); + /* The orphan OST-object claims to be the parent's stripe, then + * related dangling record in the trace file is meaningless. */ + rc = lfsck_layout_del_dangling_rec(env, com, pfid, ea_off); + if (rc != 0 && rc != -ENOENT) + GOTO(put, rc); + rc = lfsck_layout_recreate_lovea(env, com, ltd, rec, parent, cfid, ltd->ltd_index, ea_off); @@ -2590,12 +2668,13 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, if (unlikely(IS_ERR(obj))) GOTO(log, rc = PTR_ERR(obj)); - rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_orphan_features); + rc = obj->do_ops->do_index_try(env, obj, + &dt_lfsck_layout_orphan_features); if (rc != 0) GOTO(put, rc); iops = &obj->do_index_ops->dio_it; - di = iops->init(env, obj, 0, BYPASS_CAPA); + di = iops->init(env, obj, 0); if (IS_ERR(di)) GOTO(put, rc = PTR_ERR(di)); @@ -2657,70 +2736,80 @@ log: return rc > 0 ? 0 : rc; } -/* For the MDT-object with dangling reference, we need to repare the - * inconsistency according to the LFSCK sponsor's requirement: +/** + * Repair the MDT-object with dangling LOV EA reference. + * + * we need to repair the inconsistency according to the users' requirement: * * 1) Keep the inconsistency there and report the inconsistency case, * then give the chance to the application to find related issues, * and the users can make the decision about how to handle it with * more human knownledge. (by default) * - * 2) Re-create the missing OST-object with the FID/owner information. */ -static int lfsck_layout_repair_dangling(const struct lu_env *env, - struct lfsck_component *com, - struct dt_object *parent, - struct lfsck_layout_req *llr, - const struct lu_attr *pla) + * 2) Re-create the missing OST-object with the FID/owner information. + * + * \param[in] env pointer to the thread context + * \param[in] com the layout LFSCK component + * \param[in] parent the MDT-object with dangling LOV EA reference + * \param[in] child the OST-object to be created + * \param[in] ea_off the offset of the OST-object in the LOV EA + * \param[in] ost_idx the index of OST on which the OST-object resides + * + * \retval +1 for repair successfully + * \retval 0 for did nothing + * \retval negative error number on failure + */ +static int __lfsck_layout_repair_dangling(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *parent, + struct dt_object *child, + __u32 ea_off, __u32 ost_idx, bool log) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct filter_fid *pfid = &info->lti_new_pfid; - struct dt_object_format *dof = &info->lti_dof; - struct lu_attr *cla = &info->lti_la2; - struct dt_object *child = llr->llr_child; - struct dt_device *dev = lfsck_obj2dev(child); - const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); - struct thandle *handle; - struct lu_buf *buf; - struct lustre_handle lh = { 0 }; - int rc; - bool create; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct filter_fid *ff = &info->lti_new_pfid; + struct dt_object_format *dof = &info->lti_dof; + struct lu_attr *la = &info->lti_la; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_device *dev = lfsck_obj2dev(child); + const struct lu_fid *pfid = lfsck_dto2fid(parent); + const struct lu_fid *cfid = lfsck_dto2fid(child); + struct thandle *handle; + struct lu_buf *buf; + struct lustre_handle lh = { 0 }; + int rc; ENTRY; - if (com->lc_lfsck->li_bookmark_ram.lb_param & LPF_CREATE_OSTOBJ) - create = true; - else - create = false; - - if (!create) + if (!(lfsck->li_bookmark_ram.lb_param & LPF_CREATE_OSTOBJ)) GOTO(log, rc = 1); - memset(cla, 0, sizeof(*cla)); - cla->la_uid = pla->la_uid; - cla->la_gid = pla->la_gid; - cla->la_mode = S_IFREG | 0666; - cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | - LA_ATIME | LA_MTIME | LA_CTIME; - memset(dof, 0, sizeof(*dof)); - - rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, + rc = lfsck_ibits_lock(env, lfsck, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, LCK_EX); if (rc != 0) GOTO(log, rc); - pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq); - pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(unlock1, rc); + + la->la_mode = S_IFREG | 0666; + la->la_atime = la->la_mtime = la->la_ctime = 0; + la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | + LA_ATIME | LA_MTIME | LA_CTIME; + memset(dof, 0, sizeof(*dof)); + ff->ff_parent.f_seq = cpu_to_le64(pfid->f_seq); + ff->ff_parent.f_oid = cpu_to_le32(pfid->f_oid); /* Currently, the filter_fid::ff_parent::f_ver is not the real parent * MDT-object's FID::f_ver, instead it is the OST-object index in its * parent MDT-object's layout EA. */ - pfid->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx); - buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid)); + ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); + buf = lfsck_buf_get(env, ff, sizeof(struct filter_fid)); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) GOTO(unlock1, rc = PTR_ERR(handle)); - rc = dt_declare_create(env, child, cla, NULL, dof, handle); + rc = dt_declare_create(env, child, la, NULL, dof, handle); if (rc != 0) GOTO(stop, rc); @@ -2735,14 +2824,60 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, dt_read_lock(env, parent, 0); if (unlikely(lfsck_is_dead_obj(parent))) - GOTO(unlock2, rc = 1); + GOTO(unlock2, rc = 0); + + if (lfsck->li_bookmark_ram.lb_param & LPF_DELAY_CREATE_OSTOBJ) { + struct ost_id *oi = &info->lti_oi; + struct lu_fid *tfid = &info->lti_fid2; + struct lu_buf *lovea = &info->lti_big_buf; + struct lov_mds_md_v1 *lmm; + struct lov_ost_data_v1 *objs; + __u32 magic; + int count; + int idx2; + + rc = lfsck_layout_get_lovea(env, parent, lovea); + if (rc <= 0) + GOTO(unlock2, rc); + + lmm = lovea->lb_buf; + rc = lfsck_layout_verify_header(lmm); + if (unlikely(rc != 0)) + GOTO(unlock2, rc); + + count = le16_to_cpu(lmm->lmm_stripe_count); + /* Someone changed the LOV EA, do nothing. */ + if (count <= ea_off) + GOTO(unlock2, rc = 0); + + /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which + * has been verified in lfsck_layout_verify_header() already. + * If some new magic introduced in the future, then the layout + * LFSCK needs to be updated also. */ + magic = le32_to_cpu(lmm->lmm_magic); + if (magic == LOV_MAGIC_V1) { + objs = &lmm->lmm_objects[ea_off]; + } else { + LASSERT(magic == LOV_MAGIC_V3); - rc = dt_create(env, child, cla, NULL, dof, handle); + objs = &((struct lov_mds_md_v3 *)lmm)->\ + lmm_objects[ea_off]; + } + + ostid_le_to_cpu(&objs->l_ost_oi, oi); + idx2 = le32_to_cpu(objs->l_ost_idx); + rc = ostid_to_fid(tfid, oi, idx2); + /* Someone changed the LOV EA, do nothing. */ + if (rc != 0 || !lu_fid_eq(tfid, cfid)) + GOTO(unlock2, rc); + } + + rc = dt_create(env, child, la, NULL, dof, handle); if (rc != 0) GOTO(unlock2, rc); rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, LU_XATTR_CREATE, - handle, BYPASS_CAPA); + handle); GOTO(unlock2, rc); @@ -2756,14 +2891,90 @@ unlock1: lfsck_ibits_unlock(&lh, LCK_EX); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found dangling " - "reference for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u. %s: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, - llr->llr_lov_idx, pla->la_uid, pla->la_gid, - create ? "Create the lost OST-object as required" : - "Keep the MDT-object there by default", rc); + if (rc != 0 && log) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found " + "dangling reference for: parent "DFID", child " + DFID", ea_off %u, ost_idx %u, %s: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), PFID(cfid), + ea_off, ost_idx, + (lfsck->li_bookmark_ram.lb_param & LPF_CREATE_OSTOBJ) ? + "Create the lost OST-object as required" : + "Keep the MDT-object there by default", rc); + + return rc; +} + +/** + * Repair the MDT-object with dangling LOV EA reference. + * + * Prepare parameters and call __lfsck_layout_repair_dangling() + * to repair the dangling LOV EA reference. + * + * \param[in] env pointer to the thread context + * \param[in] com the layout LFSCK component + * \param[in] pfid the MDT-object's FID + * \param[in] cfid the FID for the OST-object to be created + * \param[in] ea_off the offset of the OST-object in the LOV EA + * \param[in] ost_idx the index of OST on which the OST-object resides + * + * \retval +1 for repair successfully + * \retval 0 for did nothing + * \retval negative error number on failure + */ +static int lfsck_layout_repair_dangling(const struct lu_env *env, + struct lfsck_component *com, + const struct lu_fid *pfid, + const struct lu_fid *cfid, + __u32 ea_off, __u32 ost_idx) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_object *parent = NULL; + struct dt_object *child = NULL; + struct lfsck_tgt_desc *ltd; + int rc; + ENTRY; + + parent = lfsck_object_find_bottom(env, lfsck, pfid); + if (IS_ERR(parent)) + GOTO(log, rc = PTR_ERR(parent)); + + /* The MDT-object has been removed. */ + if (dt_object_exists(parent) == 0) + GOTO(log, rc = 0); + + ltd = lfsck_ltd2tgt(&lfsck->li_ost_descs, ost_idx); + if (unlikely(ltd == NULL)) + GOTO(log, rc = -ENODEV); + + child = lfsck_object_find_by_dev(env, ltd->ltd_tgt, cfid); + if (IS_ERR(child)) + GOTO(log, rc = PTR_ERR(child)); + + /* The OST-object has been created. */ + if (unlikely(dt_object_exists(child) != 0)) + GOTO(log, rc = 0); + + rc = __lfsck_layout_repair_dangling(env, com, parent, child, + ea_off, ost_idx, false); + + GOTO(log, rc); + +log: + if (child != NULL && !IS_ERR(child)) + lfsck_object_put(env, child); + + if (parent != NULL && !IS_ERR(parent)) + lfsck_object_put(env, parent); + + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found " + "dangling reference for: parent "DFID", child " + DFID", ea_off %u, ost_idx %u, %s: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), PFID(cfid), + ea_off, ost_idx, + (lfsck->li_bookmark_ram.lb_param & LPF_CREATE_OSTOBJ) ? + "Create the lost OST-object as required" : + "Keep the MDT-object there by default", rc); return rc; } @@ -2775,11 +2986,10 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, struct lfsck_component *com, struct dt_object *parent, struct lfsck_layout_req *llr, - const struct lu_attr *pla) + struct lu_attr *la) { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *pfid = &info->lti_new_pfid; - struct lu_attr *tla = &info->lti_la3; struct dt_object *child = llr->llr_child; struct dt_device *dev = lfsck_obj2dev(child); const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); @@ -2811,10 +3021,12 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - tla->la_valid = LA_UID | LA_GID; - tla->la_uid = pla->la_uid; - tla->la_gid = pla->la_gid; - rc = dt_declare_attr_set(env, child, tla, handle); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(stop, rc); + + la->la_valid = LA_UID | LA_GID; + rc = dt_declare_attr_set(env, child, la, handle); if (rc != 0) GOTO(stop, rc); @@ -2826,18 +3038,17 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 1); - rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle, - BYPASS_CAPA); + rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle); if (rc != 0) GOTO(unlock2, rc); /* Get the latest parent's owner. */ - rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); + rc = dt_attr_get(env, parent, la); if (rc != 0) GOTO(unlock2, rc); - tla->la_valid = LA_UID | LA_GID; - rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA); + la->la_valid = LA_UID | LA_GID; + rc = dt_attr_set(env, child, la, handle); GOTO(unlock2, rc); @@ -2851,12 +3062,16 @@ unlock1: lfsck_ibits_unlock(&lh, LCK_EX); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired unmatched " - "MDT-OST pair for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, - pla->la_uid, pla->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "unmatched MDT-OST pair for: parent "DFID + ", child "DFID", OST-index %u, stripe-index %u, " + "owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, + la->la_uid, la->la_gid, rc); return rc; } @@ -2876,7 +3091,7 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, struct dt_object_format *dof = &info->lti_dof; struct ost_id *oi = &info->lti_oi; struct lfsck_instance *lfsck = com->lc_lfsck; - struct dt_device *dev = lfsck->li_bottom; + struct dt_device *dev; struct lu_device *d = &lfsck_obj2dev(llr->llr_child)->dd_lu_dev; struct lu_object *o; @@ -2919,9 +3134,14 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, child = container_of(n, struct dt_object, do_lu); memset(hint, 0, sizeof(*hint)); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(log, rc); + la->la_valid = LA_UID | LA_GID; memset(dof, 0, sizeof(*dof)); + dev = lfsck_obj2dev(child); handle = dt_trans_create(env, dev); if (IS_ERR(handle)) GOTO(log, rc = PTR_ERR(handle)); @@ -2980,7 +3200,7 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock, rc = 0); - rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV); if (unlikely(rc == 0 || rc == -ENODATA || rc == -ERANGE)) GOTO(unlock, rc = 0); @@ -3014,7 +3234,7 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, lov_mds_md_size(le16_to_cpu(lmm->lmm_stripe_count), magic)); rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, - LU_XATTR_REPLACE, handle, BYPASS_CAPA); + LU_XATTR_REPLACE, handle); GOTO(unlock, rc = (rc == 0 ? 1 : rc)); @@ -3030,10 +3250,12 @@ log: if (child != NULL) lfsck_object_put(env, child); - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired multiple " - "references for: parent "DFID", OST-index %u, stripe-index %u, " - "owner %u/%u: rc = %d\n", lfsck_lfsck2name(lfsck), PFID(pfid), - llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "multiple references for: parent "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), llr->llr_ost_idx, + llr->llr_lov_idx, la->la_uid, la->la_gid, rc); return rc; } @@ -3046,23 +3268,24 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, struct lfsck_component *com, struct dt_object *parent, struct lfsck_layout_req *llr, - struct lu_attr *pla) + struct lu_attr *pla, + const struct lu_attr *cla) { struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_attr *tla = &info->lti_la3; + struct lu_attr *tla = &info->lti_la2; struct dt_object *child = llr->llr_child; struct dt_device *dev = lfsck_obj2dev(child); struct thandle *handle; int rc; ENTRY; + tla->la_uid = pla->la_uid; + tla->la_gid = pla->la_gid; + tla->la_valid = LA_UID | LA_GID; handle = dt_trans_create(env, dev); if (IS_ERR(handle)) GOTO(log, rc = PTR_ERR(handle)); - tla->la_uid = pla->la_uid; - tla->la_gid = pla->la_gid; - tla->la_valid = LA_UID | LA_GID; rc = dt_declare_attr_set(env, child, tla, handle); if (rc != 0) GOTO(stop, rc); @@ -3077,17 +3300,16 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, GOTO(unlock, rc = 1); /* Get the latest parent's owner. */ - rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); + rc = dt_attr_get(env, parent, pla); if (rc != 0) GOTO(unlock, rc); /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ if (unlikely(tla->la_uid != pla->la_uid || tla->la_gid != pla->la_gid)) - GOTO(unlock, rc = 1); - - tla->la_valid = LA_UID | LA_GID; - rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA); + rc = 1; + else + rc = dt_attr_set(env, child, tla, handle); GOTO(unlock, rc); @@ -3098,12 +3320,15 @@ stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired inconsistent " - "file owner for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, - pla->la_uid, pla->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "inconsistent file owner for: parent "DFID", child "DFID + ", OST-index %u, stripe-index %u, old owner %u/%u, " + "new owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, + cla->la_uid, cla->la_gid, tla->la_uid, tla->la_gid, rc); return rc; } @@ -3112,10 +3337,9 @@ log: * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */ static int lfsck_layout_check_parent(const struct lu_env *env, struct lfsck_component *com, - struct dt_object *parent, + struct lfsck_assistant_object *lso, const struct lu_fid *pfid, const struct lu_fid *cfid, - const struct lu_attr *pla, const struct lu_attr *cla, struct lfsck_layout_req *llr, struct lu_buf *lov_ea, __u32 idx) @@ -3132,24 +3356,11 @@ static int lfsck_layout_check_parent(const struct lu_env *env, __u16 count; ENTRY; - if (fid_is_zero(pfid)) { - /* client never wrote. */ - if (cla->la_size == 0 && cla->la_blocks == 0) { - if (unlikely(cla->la_uid != pla->la_uid || - cla->la_gid != pla->la_gid)) - RETURN (LLIT_INCONSISTENT_OWNER); - - RETURN(0); - } - - RETURN(LLIT_UNMATCHED_PAIR); - } - if (unlikely(!fid_is_sane(pfid))) RETURN(LLIT_UNMATCHED_PAIR); - if (lu_fid_eq(pfid, lu_object_fid(&parent->do_lu))) { - if (llr->llr_lov_idx == idx) + if (lu_fid_eq(pfid, &lso->lso_fid)) { + if (likely(llr->llr_lov_idx == idx)) RETURN(0); RETURN(LLIT_UNMATCHED_PAIR); @@ -3159,11 +3370,8 @@ static int lfsck_layout_check_parent(const struct lu_env *env, if (IS_ERR(tobj)) RETURN(PTR_ERR(tobj)); - if (dt_object_exists(tobj) == 0 || - lfsck_is_dead_obj(tobj)) - GOTO(out, rc = LLIT_UNMATCHED_PAIR); - - if (!S_ISREG(lfsck_object_type(tobj))) + if (dt_object_exists(tobj) == 0 || lfsck_is_dead_obj(tobj) || + !S_ISREG(lfsck_object_type(tobj))) GOTO(out, rc = LLIT_UNMATCHED_PAIR); /* Load the tobj's layout EA, in spite of it is a local MDT-object or @@ -3241,7 +3449,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env, * for a non-existent xattr to check if this object * has been been removed or not. */ rc = dt_xattr_get(env, tobj, &LU_BUF_NULL, - XATTR_NAME_DUMMY, BYPASS_CAPA); + XATTR_NAME_DUMMY); if (unlikely(rc == -ENOENT || rc >= 0)) { rc = LLIT_UNMATCHED_PAIR; } else if (rc == -ENODATA) { @@ -3273,15 +3481,16 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, { struct lfsck_layout_req *llr = container_of0(lar, struct lfsck_layout_req, llr_lar); + struct lfsck_assistant_object *lso = lar->lar_parent; struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid_old *pea = &info->lti_old_pfid; struct lu_fid *pfid = &info->lti_fid; struct lu_buf buf = { NULL }; - struct dt_object *parent; + struct dt_object *parent = NULL; struct dt_object *child = llr->llr_child; - struct lu_attr *pla = &info->lti_la; - struct lu_attr *cla = &info->lti_la2; + struct lu_attr *pla = &lso->lso_attr; + struct lu_attr *cla = &info->lti_la; struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; enum lfsck_layout_inconsistency_type type = LLIT_NONE; @@ -3289,21 +3498,19 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, int rc; ENTRY; - parent = lfsck_object_find_bottom(env, lfsck, &lar->lar_fid); - if (IS_ERR(parent)) - RETURN(PTR_ERR(parent)); - - if (unlikely(lfsck_is_dead_obj(parent))) - GOTO(put_parent, rc = 0); + if (lso->lso_dead) + RETURN(0); - rc = dt_attr_get(env, parent, pla, BYPASS_CAPA); - if (rc != 0) - GOTO(out, rc); + CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val); - rc = dt_attr_get(env, child, cla, BYPASS_CAPA); + rc = dt_attr_get(env, child, cla); if (rc == -ENOENT) { - if (unlikely(lfsck_is_dead_obj(parent))) - GOTO(put_parent, rc = 0); + parent = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + + RETURN(rc == -ENOENT ? 0 : rc); + } type = LLIT_DANGLING; goto repair; @@ -3313,8 +3520,8 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, GOTO(out, rc); lfsck_buf_init(&buf, pea, sizeof(struct filter_fid_old)); - rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID, BYPASS_CAPA); - if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) && + rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID); + if (unlikely(rc > 0 && rc != sizeof(struct filter_fid_old) && rc != sizeof(struct filter_fid))) { type = LLIT_UNMATCHED_PAIR; goto repair; @@ -3323,20 +3530,18 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (rc < 0 && rc != -ENODATA) GOTO(out, rc); - if (rc == -ENODATA) { - fid_zero(pfid); - } else { - fid_le_to_cpu(pfid, &pea->ff_parent); - /* Currently, the filter_fid::ff_parent::f_ver is not the - * real parent MDT-object's FID::f_ver, instead it is the - * OST-object index in its parent MDT-object's layout EA. */ - idx = pfid->f_stripe_idx; - pfid->f_ver = 0; - } + if (rc == 0 || rc == -ENODATA) + GOTO(check_owner, rc = 0); - rc = lfsck_layout_check_parent(env, com, parent, pfid, + fid_le_to_cpu(pfid, &pea->ff_parent); + /* Currently, the filter_fid::ff_parent::f_ver is not the + * real parent MDT-object's FID::f_ver, instead it is the + * OST-object index in its parent MDT-object's layout EA. */ + idx = pfid->f_stripe_idx; + pfid->f_ver = 0; + rc = lfsck_layout_check_parent(env, com, lso, pfid, lu_object_fid(&child->do_lu), - pla, cla, llr, &buf, idx); + cla, llr, &buf, idx); if (rc > 0) { type = rc; goto repair; @@ -3345,6 +3550,9 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (rc < 0) GOTO(out, rc); +check_owner: + /* Someone may has changed the owner after the parent attr pre-loaded. + * It can be handled later inside the lfsck_layout_repair_owner(). */ if (unlikely(cla->la_uid != pla->la_uid || cla->la_gid != pla->la_gid)) { type = LLIT_INCONSISTENT_OWNER; @@ -3352,16 +3560,34 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, } repair: - if (bk->lb_param & LPF_DRYRUN) { - if (type != LLIT_NONE) - GOTO(out, rc = 1); - else - GOTO(out, rc = 0); + if (type == LLIT_NONE) + GOTO(out, rc = 0); + + if (bk->lb_param & LPF_DRYRUN) + GOTO(out, rc = 1); + + if (parent == NULL) { + parent = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + + if (rc == -ENOENT) + RETURN(0); + + GOTO(out, rc); + } } switch (type) { case LLIT_DANGLING: - rc = lfsck_layout_repair_dangling(env, com, parent, llr, pla); + if (bk->lb_param & LPF_DELAY_CREATE_OSTOBJ) + rc = lfsck_layout_ins_dangling_rec(env, com, + lfsck_dto2fid(parent), lfsck_dto2fid(child), + llr->llr_lov_idx, llr->llr_ost_idx); + else + rc = __lfsck_layout_repair_dangling(env, com, parent, + llr->llr_child, llr->llr_lov_idx, + llr->llr_ost_idx, true); break; case LLIT_UNMATCHED_PAIR: rc = lfsck_layout_repair_unmatched_pair(env, com, parent, @@ -3372,7 +3598,7 @@ repair: llr, pla, &buf); break; case LLIT_INCONSISTENT_OWNER: - rc = lfsck_layout_repair_owner(env, com, parent, llr, pla); + rc = lfsck_layout_repair_owner(env, com, parent, llr, pla, cla); break; default: rc = 0; @@ -3402,7 +3628,8 @@ out: } else { lfsck_layout_record_failure(env, lfsck, lo); } - } else if (rc > 0) { + } else if (rc > 0 && (type != LLIT_DANGLING || + !(bk->lb_param & LPF_DELAY_CREATE_OSTOBJ))) { LASSERTF(type > LLIT_NONE && type <= LLIT_MAX, "unknown type = %d\n", type); @@ -3415,8 +3642,136 @@ out: } up_write(&com->lc_sem); -put_parent: - lfsck_object_put(env, parent); + if (parent != NULL && !IS_ERR(parent)) + lfsck_object_put(env, parent); + + return rc; +} + +static int +lfsck_layout_double_scan_one_trace_file(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *obj, bool first) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct ptlrpc_thread *thread = &lfsck->li_thread; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct lfsck_layout *lo = com->lc_file_ram; + const struct dt_it_ops *iops = &obj->do_index_ops->dio_it; + struct dt_it *di; + struct dt_key *key; + struct lu_fid *pfid = &lfsck_env_info(env)->lti_fid3; + struct lu_fid *cfid = &lfsck_env_info(env)->lti_fid4; + __u32 ea_off; + __u32 ost_idx; + int rc; + ENTRY; + + di = iops->init(env, obj, 0); + if (IS_ERR(di)) + RETURN(PTR_ERR(di)); + + if (first) + fid_cpu_to_be(pfid, &lo->ll_fid_latest_scanned_phase2); + else + fid_zero(pfid); + rc = iops->get(env, di, (const struct dt_key *)pfid); + if (rc < 0) + GOTO(fini, rc); + + if (first) { + /* The start one either has been processed or does not exist, + * skip it. */ + rc = iops->next(env, di); + if (rc != 0) + GOTO(put, rc); + } + + do { + if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) && + unlikely(!thread_is_running(thread))) + GOTO(put, rc = 0); + + key = iops->key(env, di); + if (IS_ERR(key)) { + rc = PTR_ERR(key); + if (rc == -ENOENT) + GOTO(put, rc = 1); + + goto checkpoint; + } + + fid_be_to_cpu(pfid, (const struct lu_fid *)key); + ea_off = pfid->f_ver; + pfid->f_ver = 0; + if (!fid_is_sane(pfid)) { + rc = 0; + goto checkpoint; + } + + rc = iops->rec(env, di, (struct dt_rec *)cfid, 0); + if (rc == 0) { + fid_be_to_cpu(cfid, cfid); + ost_idx = cfid->f_ver; + cfid->f_ver = 0; + if (!fid_is_sane(cfid)) { + rc = 0; + goto checkpoint; + } + + rc = lfsck_layout_repair_dangling(env, com, pfid, cfid, + ea_off, ost_idx); + } + +checkpoint: + down_write(&com->lc_sem); + com->lc_new_checked++; + com->lc_new_scanned++; + if (rc >= 0) + lo->ll_fid_latest_scanned_phase2 = *pfid; + + if (rc > 0) + lo->ll_objs_repaired[LLIT_DANGLING - 1]++; + else if (rc < 0) + lo->ll_objs_failed_phase2++; + up_write(&com->lc_sem); + + if (rc < 0 && bk->lb_param & LPF_FAILOUT) + GOTO(put, rc); + + if (unlikely(cfs_time_beforeq(com->lc_time_next_checkpoint, + cfs_time_current())) && + com->lc_new_checked != 0) { + down_write(&com->lc_sem); + lo->ll_run_time_phase2 += + cfs_duration_sec(cfs_time_current() + + HALF_SEC - com->lc_time_last_checkpoint); + lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_objs_checked_phase2 += com->lc_new_checked; + com->lc_new_checked = 0; + lfsck_layout_store(env, com); + up_write(&com->lc_sem); + + com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_next_checkpoint = + com->lc_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + } + + lfsck_control_speed_by_self(com); + if (unlikely(!thread_is_running(thread))) + GOTO(put, rc = 0); + + rc = iops->next(env, di); + } while (rc == 0); + + GOTO(put, rc); + +put: + iops->put(env, di); + +fini: + iops->fini(env, di); return rc; } @@ -3441,7 +3796,7 @@ static int lfsck_layout_assistant_handler_p2(const struct lu_env *env, struct lfsck_tgt_desc, ltd_layout_phase_list); list_del_init(<d->ltd_layout_phase_list); - if (bk->lb_param & LPF_ALL_TGT) { + if (bk->lb_param & LPF_OST_ORPHAN) { spin_unlock(<ds->ltd_lock); rc = lfsck_layout_scan_orphan(env, com, ltd); if (rc != 0 && bk->lb_param & LPF_FAILOUT) @@ -3460,6 +3815,29 @@ static int lfsck_layout_assistant_handler_p2(const struct lu_env *env, rc = 0; spin_unlock(<ds->ltd_lock); + if (rc == 1 && bk->lb_param & LPF_OST_ORPHAN) { + struct lfsck_layout *lo = com->lc_file_ram; + int i; + + com->lc_new_checked = 0; + com->lc_new_scanned = 0; + com->lc_time_last_checkpoint = cfs_time_current(); + com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); + + i = lfsck_sub_trace_file_fid2idx( + &lo->ll_fid_latest_scanned_phase2); + rc = lfsck_layout_double_scan_one_trace_file(env, com, + com->lc_sub_trace_objs[i].lsto_obj, true); + while (rc > 0 && ++i < LFSCK_STF_COUNT) + rc = lfsck_layout_double_scan_one_trace_file(env, com, + com->lc_sub_trace_objs[i].lsto_obj, false); + + CDEBUG(D_LFSCK, "%s: layout LFSCK phase2 scan dangling stop " + "at the No. %d trace file: rc = %d\n", + lfsck_lfsck2name(lfsck), i, rc); + } + CDEBUG(D_LFSCK, "%s: layout LFSCK phase2 scan stop: rc = %d\n", lfsck_lfsck2name(lfsck), rc); @@ -3480,11 +3858,22 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, bool done = false; if (rc != 0) { - /* It is quite probably caused by target crash, - * to make the LFSCK can go ahead, assume that - * the target finished the LFSCK prcoessing. */ - done = true; + /* It is probably caused by network trouble, or target crash, + * it will try several times (depends on the obd_timeout, and + * will not less than 3 times). But to make the LFSCK can go + * ahead, we should not try for ever. After some try but still + * hit failure, it will assume that the target exit the LFSCK + * prcoessing and stop try. */ + if (rc == -ENOTCONN || rc == -ESHUTDOWN) { + int max_try = max_t(int, obd_timeout / 30, 3); + + if (++(llst->llst_failures) > max_try) + done = true; + } else { + done = true; + } } else { + llst->llst_failures = 0; lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY); if (lr->lr_status != LS_SCANNING_PHASE1 && lr->lr_status != LS_SCANNING_PHASE2) @@ -3493,8 +3882,9 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, if (done) { CDEBUG(D_LFSCK, "%s: layout LFSCK slave gets the MDT %x " - "status %d\n", lfsck_lfsck2name(com->lc_lfsck), - llst->llst_index, lr != NULL ? lr->lr_status : rc); + "status %d, failures_try %d\n", lfsck_lfsck2name(com->lc_lfsck), + llst->llst_index, lr != NULL ? lr->lr_status : rc, + llst->llst_failures); lfsck_layout_llst_del(llsd, llst); } @@ -3538,6 +3928,7 @@ static int lfsck_layout_async_query(const struct lu_env *env, llsaa->llsaa_com = lfsck_component_get(com); llsaa->llsaa_llst = llst; req->rq_interpret_reply = lfsck_layout_slave_async_interpret; + req->rq_allow_intr = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -3566,6 +3957,7 @@ static int lfsck_layout_async_notify(const struct lu_env *env, tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); *tmp = *lr; ptlrpc_request_set_replen(req); + req->rq_allow_intr = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -3941,6 +4333,9 @@ static int lfsck_layout_reset(const struct lu_env *env, } rc = lfsck_layout_store(env, com); + if (rc == 0 && com->lc_lfsck->li_master) + rc = lfsck_load_sub_trace_files(env, com, + &dt_lfsck_layout_dangling_features, LFSCK_LAYOUT, true); up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK reset: rc = %d\n", @@ -3992,8 +4387,8 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + "%llu], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -4026,8 +4421,8 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + "%llu], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -4135,7 +4530,7 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, } CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos [" - LPU64"]\n", lfsck_lfsck2name(lfsck), + "%llu]\n", lfsck_lfsck2name(lfsck), com->lc_pos_start.lp_oit_cookie); return rc; @@ -4169,7 +4564,7 @@ static int lfsck_layout_master_prep(const struct lu_env *env, log: CDEBUG(D_LFSCK, "%s: layout LFSCK master prep done, start pos [" - LPU64"]\n", lfsck_lfsck2name(com->lc_lfsck), + "%llu]\n", lfsck_lfsck2name(com->lc_lfsck), com->lc_pos_start.lp_oit_cookie); return 0; @@ -4186,7 +4581,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_assistant_data *lad = com->lc_data; - struct lfsck_layout_object *llo = NULL; + struct lfsck_assistant_object *lso = NULL; struct lov_ost_data_v1 *objs; struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs; struct ptlrpc_thread *mthread = &lfsck->li_thread; @@ -4300,27 +4695,36 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, goto next; } - rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA); - if (rc != 0) - goto next; + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) { + rc = dt_declare_attr_get(env, cobj); + if (rc != 0) + goto next; - rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID, - BYPASS_CAPA); - if (rc != 0) - goto next; + rc = dt_declare_xattr_get(env, cobj, &buf, + XATTR_NAME_FID); + if (rc != 0) + goto next; + } + + if (lso == NULL) { + struct lu_attr *attr = &info->lti_la; + + rc = dt_attr_get(env, parent, attr); + if (rc != 0) + goto next; + + lso = lfsck_assistant_object_init(env, + lfsck_dto2fid(parent), attr, + lfsck->li_pos_current.lp_oit_cookie, false); + if (IS_ERR(lso)) { + rc = PTR_ERR(lso); + lso = NULL; - if (llo == NULL) { - llo = lfsck_layout_object_init(env, parent, - lfsck->li_pos_current.lp_oit_cookie); - if (IS_ERR(llo)) { - rc = PTR_ERR(llo); goto next; } } - llr = lfsck_layout_assistant_req_init(llo, - lfsck_dto2fid(parent), - cobj, index, i); + llr = lfsck_layout_assistant_req_init(lso, cobj, index, i); if (IS_ERR(llr)) { rc = PTR_ERR(llr); goto next; @@ -4364,8 +4768,8 @@ next: GOTO(out, rc = 0); out: - if (llo != NULL && !IS_ERR(llo)) - lfsck_layout_object_put(env, llo); + if (lso != NULL) + lfsck_assistant_object_put(env, lso); return rc; } @@ -4472,7 +4876,7 @@ again: } rc = dt_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV, - LU_XATTR_REPLACE, handle, BYPASS_CAPA); + LU_XATTR_REPLACE, handle); if (rc != 0) GOTO(out, rc); @@ -4562,7 +4966,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, rc = lfsck_layout_lastid_load(env, com, lls); if (rc != 0) { CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " - "load LAST_ID for "LPX64": rc = %d\n", + "load LAST_ID for %#llx: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), seq, rc); lo->ll_objs_failed_phase1++; OBD_FREE_PTR(lls); @@ -4589,7 +4993,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, rc = lfsck_layout_lastid_reload(env, com, lls); if (unlikely(rc != 0)) { CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " - "reload LAST_ID for "LPX64": rc = %d\n", + "reload LAST_ID for %#llx: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc); @@ -4607,8 +5011,8 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " - "LAST_ID file (2) for the sequence "LPX64 - ", old value "LPU64", known value "LPU64"\n", + "LAST_ID file (2) for the sequence %#llx" + ", old value %llu, known value %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid, oid); } @@ -4627,6 +5031,7 @@ unlock: static int lfsck_layout_exec_dir(const struct lu_env *env, struct lfsck_component *com, + struct lfsck_assistant_object *lso, struct lu_dirent *ent, __u16 type) { return 0; @@ -4696,13 +5101,13 @@ static int lfsck_layout_slave_post(const struct lu_env *env, int rc; bool done = false; + down_write(&com->lc_sem); rc = lfsck_layout_lastid_store(env, com); if (rc != 0) result = rc; LASSERT(lfsck->li_out_notify != NULL); - down_write(&com->lc_sem); spin_lock(&lfsck->li_lock); if (!init) lo->ll_pos_last_checkpoint = @@ -4757,77 +5162,62 @@ static int lfsck_layout_slave_post(const struct lu_env *env, return rc; } -static int lfsck_layout_dump(const struct lu_env *env, - struct lfsck_component *com, struct seq_file *m) +static void lfsck_layout_dump(const struct lu_env *env, + struct lfsck_component *com, struct seq_file *m) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; - int rc; down_read(&com->lc_sem); seq_printf(m, "name: lfsck_layout\n" - "magic: %#x\n" - "version: %d\n" - "status: %s\n", - lo->ll_magic, - bk->lb_version, - lfsck_status2names(lo->ll_status)); - - rc = lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); - if (rc < 0) - goto out; + "magic: %#x\n" + "version: %d\n" + "status: %s\n", + lo->ll_magic, + bk->lb_version, + lfsck_status2name(lo->ll_status)); - rc = lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); - rc = lfsck_time_dump(m, lo->ll_time_last_complete, - "time_since_last_completed"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - rc = lfsck_time_dump(m, lo->ll_time_latest_start, - "time_since_latest_start"); - if (rc < 0) - goto out; + lfsck_time_dump(m, lo->ll_time_last_complete, "last_completed"); - rc = lfsck_time_dump(m, lo->ll_time_last_checkpoint, - "time_since_last_checkpoint"); - if (rc < 0) - goto out; + lfsck_time_dump(m, lo->ll_time_latest_start, "latest_start"); - seq_printf(m, "latest_start_position: "LPU64"\n" - "last_checkpoint_position: "LPU64"\n" - "first_failure_position: "LPU64"\n", - lo->ll_pos_latest_start, - lo->ll_pos_last_checkpoint, - lo->ll_pos_first_inconsistent); + lfsck_time_dump(m, lo->ll_time_last_checkpoint, "last_checkpoint"); + + seq_printf(m, "latest_start_position: %llu\n" + "last_checkpoint_position: %llu\n" + "first_failure_position: %llu\n", + lo->ll_pos_latest_start, + lo->ll_pos_last_checkpoint, + lo->ll_pos_first_inconsistent); seq_printf(m, "success_count: %u\n" - "repaired_dangling: "LPU64"\n" - "repaired_unmatched_pair: "LPU64"\n" - "repaired_multiple_referenced: "LPU64"\n" - "repaired_orphan: "LPU64"\n" - "repaired_inconsistent_owner: "LPU64"\n" - "repaired_others: "LPU64"\n" - "skipped: "LPU64"\n" - "failed_phase1: "LPU64"\n" - "failed_phase2: "LPU64"\n", - lo->ll_success_count, - lo->ll_objs_repaired[LLIT_DANGLING - 1], - lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], - lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], - lo->ll_objs_repaired[LLIT_ORPHAN - 1], - lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], - lo->ll_objs_repaired[LLIT_OTHERS - 1], - lo->ll_objs_skipped, - lo->ll_objs_failed_phase1, - lo->ll_objs_failed_phase2); + "repaired_dangling: %llu\n" + "repaired_unmatched_pair: %llu\n" + "repaired_multiple_referenced: %llu\n" + "repaired_orphan: %llu\n" + "repaired_inconsistent_owner: %llu\n" + "repaired_others: %llu\n" + "skipped: %llu\n" + "failed_phase1: %llu\n" + "failed_phase2: %llu\n", + lo->ll_success_count, + lo->ll_objs_repaired[LLIT_DANGLING - 1], + lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], + lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], + lo->ll_objs_repaired[LLIT_ORPHAN - 1], + lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], + lo->ll_objs_repaired[LLIT_OTHERS - 1], + lo->ll_objs_skipped, + lo->ll_objs_failed_phase1, + lo->ll_objs_failed_phase2); if (lo->ll_status == LS_SCANNING_PHASE1) { __u64 pos; - const struct dt_it_ops *iops; cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; __u64 checked = lo->ll_objs_checked_phase1 + @@ -4842,37 +5232,40 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(new_checked, duration); if (rtime != 0) do_div(speed, rtime); - seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: N/A\n" - "real-time_speed_phase1: "LPU64" items/sec\n" - "real-time_speed_phase2: N/A\n", - checked, - lo->ll_objs_checked_phase2, - rtime, - lo->ll_run_time_phase2, - speed, - new_checked); - - LASSERT(lfsck->li_di_oit != NULL); - - iops = &lfsck->li_obj_oit->do_index_ops->dio_it; - - /* The low layer otable-based iteration position may NOT - * exactly match the layout-based directory traversal - * cookie. Generally, it is not a serious issue. But the - * caller should NOT make assumption on that. */ - pos = iops->store(env, lfsck->li_di_oit); - if (!lfsck->li_current_oit_processed) - pos--; - seq_printf(m, "current_position: "LPU64"\n", pos); + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: N/A\n" + "real-time_speed_phase1: %llu items/sec\n" + "real-time_speed_phase2: N/A\n", + checked, + lo->ll_objs_checked_phase2, + rtime, + lo->ll_run_time_phase2, + speed, + new_checked); + + if (likely(lfsck->li_di_oit)) { + const struct dt_it_ops *iops = + &lfsck->li_obj_oit->do_index_ops->dio_it; + + /* The low layer otable-based iteration position may NOT + * exactly match the layout-based directory traversal + * cookie. Generally, it is not a serious issue. But the + * caller should NOT make assumption on that. */ + pos = iops->store(env, lfsck->li_di_oit); + if (!lfsck->li_current_oit_processed) + pos--; + } else { + pos = lo->ll_pos_last_checkpoint; + } + seq_printf(m, "current_position: %llu\n", pos); } else if (lo->ll_status == LS_SCANNING_PHASE2) { cfs_duration_t duration = cfs_time_current() - - lfsck->li_time_last_checkpoint; + com->lc_time_last_checkpoint; __u64 checked = lo->ll_objs_checked_phase2 + com->lc_new_checked; __u64 speed1 = lo->ll_objs_checked_phase1; @@ -4888,26 +5281,23 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (rtime != 0) do_div(speed2, rtime); - rc = seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" items/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: "LPU64" items/sec\n" - "current_position: "DFID"\n", - lo->ll_objs_checked_phase1, - checked, - lo->ll_run_time_phase1, - rtime, - speed1, - speed2, - new_checked, - PFID(&com->lc_fid_latest_scanned_phase2)); - if (rc <= 0) - goto out; - + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu items/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: %llu items/sec\n" + "current_position: "DFID"\n", + lo->ll_objs_checked_phase1, + checked, + lo->ll_run_time_phase1, + rtime, + speed1, + speed2, + new_checked, + PFID(&com->lc_fid_latest_scanned_phase2)); } else { __u64 speed1 = lo->ll_objs_checked_phase1; __u64 speed2 = lo->ll_objs_checked_phase2; @@ -4916,12 +5306,12 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (lo->ll_run_time_phase2 != 0) do_div(speed2, lo->ll_run_time_phase2); - seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu objs/sec\n" "real-time_speed_phase1: N/A\n" "real-time_speed_phase2: N/A\n" "current_position: N/A\n", @@ -4932,10 +5322,8 @@ static int lfsck_layout_dump(const struct lu_env *env, speed1, speed2); } -out: - up_read(&com->lc_sem); - return rc; + up_read(&com->lc_sem); } static int lfsck_layout_master_double_scan(const struct lu_env *env, @@ -5165,12 +5553,14 @@ static void lfsck_layout_slave_quit(const struct lu_env *env, LASSERT(llsd != NULL); + down_write(&com->lc_sem); list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list, lls_list) { list_del_init(&lls->lls_list); lfsck_object_put(env, lls->lls_lastid_obj); OBD_FREE_PTR(lls); } + up_write(&com->lc_sem); spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { @@ -5224,7 +5614,7 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, else ltds = &lfsck->li_mdt_descs; spin_lock(<ds->ltd_lock); - ltd = LTD_TGT(ltds, lr->lr_index); + ltd = lfsck_ltd2tgt(ltds, lr->lr_index); if (ltd == NULL) { spin_unlock(<ds->ltd_lock); @@ -5401,12 +5791,85 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env, RETURN(0); } +static void lfsck_layout_repaired(struct lfsck_layout *lo, __u64 *count) +{ + int i; + + for (i = 0; i < LLIT_MAX; i++) + *count += lo->ll_objs_repaired[i]; +} + +static int lfsck_layout_query_all(const struct lu_env *env, + struct lfsck_component *com, + __u32 *mdts_count, __u32 *osts_count, + __u64 *repaired) +{ + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_tgt_descs *ltds; + struct lfsck_tgt_desc *ltd; + int idx; + int rc; + ENTRY; + + rc = lfsck_query_all(env, com); + if (rc != 0) + RETURN(rc); + + ltds = &com->lc_lfsck->li_mdt_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + mdts_count[ltd->ltd_layout_status]++; + *repaired += ltd->ltd_layout_repaired; + } + up_read(<ds->ltd_rw_sem); + + ltds = &com->lc_lfsck->li_ost_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + osts_count[ltd->ltd_layout_status]++; + *repaired += ltd->ltd_layout_repaired; + } + up_read(<ds->ltd_rw_sem); + + down_read(&com->lc_sem); + mdts_count[lo->ll_status]++; + lfsck_layout_repaired(lo, repaired); + up_read(&com->lc_sem); + + RETURN(0); +} + static int lfsck_layout_query(const struct lu_env *env, - struct lfsck_component *com) + struct lfsck_component *com, + struct lfsck_request *req, + struct lfsck_reply *rep, + struct lfsck_query *que, int idx) { struct lfsck_layout *lo = com->lc_file_ram; + int rc = 0; + + if (que != NULL) { + LASSERT(com->lc_lfsck->li_master); + + rc = lfsck_layout_query_all(env, com, + que->lu_mdts_count[idx], + que->lu_osts_count[idx], + &que->lu_repaired[idx]); + } else { + down_read(&com->lc_sem); + rep->lr_status = lo->ll_status; + if (req->lr_flags & LEF_QUERY_ALL) + lfsck_layout_repaired(lo, &rep->lr_repaired); + up_read(&com->lc_sem); + } - return lo->ll_status; + return rc; } /* with lfsck::li_lock held */ @@ -5489,13 +5952,17 @@ static void lfsck_layout_assistant_fill_pos(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_layout_req *llr; + if (((struct lfsck_layout *)(com->lc_file_ram))->ll_status != + LS_SCANNING_PHASE1) + return; + if (list_empty(&lad->lad_req_list)) return; llr = list_entry(lad->lad_req_list.next, struct lfsck_layout_req, llr_lar.lar_list); - pos->lp_oit_cookie = llr->llr_parent->llo_cookie - 1; + pos->lp_oit_cookie = llr->llr_lar.lar_parent->lso_oit_cookie - 1; } struct lfsck_assistant_operations lfsck_layout_assistant_ops = { @@ -5513,6 +5980,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) struct lfsck_layout *lo; struct dt_object *root = NULL; struct dt_object *obj; + int i; int rc; ENTRY; @@ -5533,6 +6001,9 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) LFSCK_LAYOUT); if (com->lc_data == NULL) GOTO(out, rc = -ENOMEM); + + for (i = 0; i < LFSCK_STF_COUNT; i++) + mutex_init(&com->lc_sub_trace_objs[i].lsto_mutex); } else { struct lfsck_layout_slave_data *llsd; @@ -5576,6 +6047,10 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) rc = lfsck_layout_reset(env, com, true); else if (rc == -ENOENT) rc = lfsck_layout_init(env, com); + else if (lfsck->li_master) + rc = lfsck_load_sub_trace_files(env, com, + &dt_lfsck_layout_dangling_features, + LFSCK_LAYOUT, false); if (rc != 0) GOTO(out, rc); @@ -5729,8 +6204,7 @@ stop: static int lfsck_orphan_index_lookup(const struct lu_env *env, struct dt_object *dt, struct dt_rec *rec, - const struct dt_key *key, - struct lustre_capa *capa) + const struct dt_key *key) { return -EOPNOTSUPP; } @@ -5749,7 +6223,6 @@ static int lfsck_orphan_index_insert(const struct lu_env *env, const struct dt_rec *rec, const struct dt_key *key, struct thandle *handle, - struct lustre_capa *capa, int ignore_quota) { return -EOPNOTSUPP; @@ -5766,16 +6239,14 @@ static int lfsck_orphan_index_declare_delete(const struct lu_env *env, static int lfsck_orphan_index_delete(const struct lu_env *env, struct dt_object *dt, const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa) + struct thandle *handle) { return -EOPNOTSUPP; } static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, struct dt_object *dt, - __u32 attr, - struct lustre_capa *capa) + __u32 attr) { struct dt_device *dev = lu2dt_dev(dt->do_lu.lo_dev); struct lfsck_instance *lfsck; @@ -6012,12 +6483,12 @@ again1: goto again1; } - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); if (rc != 0) GOTO(out, rc); rc = dt_xattr_get(env, obj, lfsck_buf_get(env, pfid, sizeof(*pfid)), - XATTR_NAME_FID, BYPASS_CAPA); + XATTR_NAME_FID); if (rc == -ENODATA) { /* For the pre-created OST-object, update the bitmap to avoid * others LFSCK (second phase) iteration to touch it again. */ @@ -6174,9 +6645,9 @@ static int lfsck_orphan_it_load(const struct lu_env *env, LASSERT(llst != NULL); if (hash != llst->llst_hash) { - CDEBUG(D_LFSCK, "%s: the given hash "LPU64" for orphan " + CDEBUG(D_LFSCK, "%s: the given hash %llu for orphan " "iteration does not match the one when fini " - LPU64", to be reset.\n", + "%llu, to be reset.\n", lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, llst->llst_hash); fid_zero(&llst->llst_fid);