X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_layout.c;h=d1cff126ee584f4b6ce0b4e66b3f051c04ef2252;hp=033f1b71d6be4de0c0d2548b26d828ea21836b98;hb=445da16c2ac0475b1c1077c822800b68cdbb7ce3;hpb=2b294992edce5af7b79d4300ed3aa1ea6a8db850 diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 033f1b7..d1cff12 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2014, Intel Corporation. + * Copyright (c) 2014, 2015, Intel Corporation. */ /* * lustre/lfsck/lfsck_layout.c @@ -53,8 +53,6 @@ #define LFSCK_LAYOUT_MAGIC LFSCK_LAYOUT_MAGIC_V2 -static const char lfsck_layout_name[] = "lfsck_layout"; - struct lfsck_layout_seq { struct list_head lls_list; __u64 lls_seq; @@ -74,6 +72,8 @@ struct lfsck_layout_slave_target { __u64 llst_gen; atomic_t llst_ref; __u32 llst_index; + /* How many times we have failed to get the master status. */ + int llst_failures; }; struct lfsck_layout_slave_data { @@ -90,57 +90,12 @@ struct lfsck_layout_slave_data { unsigned int llsd_rbtree_valid:1; }; -struct lfsck_layout_object { - struct dt_object *llo_obj; - struct lu_attr llo_attr; - atomic_t llo_ref; - __u64 llo_cookie; - __u16 llo_gen; -}; - -struct lfsck_layout_req { - struct lfsck_assistant_req llr_lar; - struct lfsck_layout_object *llr_parent; - struct dt_object *llr_child; - __u32 llr_ost_idx; - __u32 llr_lov_idx; /* offset in LOV EA */ -}; - struct lfsck_layout_slave_async_args { struct obd_export *llsaa_exp; struct lfsck_component *llsaa_com; struct lfsck_layout_slave_target *llsaa_llst; }; -static struct lfsck_layout_object * -lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj, - __u64 cookie, __u16 gen) -{ - struct lfsck_layout_object *llo; - int rc; - - OBD_ALLOC_PTR(llo); - if (llo == NULL) - return ERR_PTR(-ENOMEM); - - rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA); - if (rc != 0) { - OBD_FREE_PTR(llo); - - return ERR_PTR(rc); - } - - lu_object_get(&obj->do_lu); - llo->llo_obj = obj; - llo->llo_cookie = cookie; - /* The gen can be used to check whether some others have changed the - * file layout after LFSCK pre-fetching but before real verification. */ - llo->llo_gen = gen; - atomic_set(&llo->llo_ref, 1); - - return llo; -} - static inline void lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst) { @@ -224,17 +179,8 @@ lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd, return NULL; } -static inline void lfsck_layout_object_put(const struct lu_env *env, - struct lfsck_layout_object *llo) -{ - if (atomic_dec_and_test(&llo->llo_ref)) { - lfsck_object_put(env, llo->llo_obj); - OBD_FREE_PTR(llo); - } -} - static struct lfsck_layout_req * -lfsck_layout_assistant_req_init(struct lfsck_layout_object *parent, +lfsck_layout_assistant_req_init(struct lfsck_assistant_object *lso, struct dt_object *child, __u32 ost_idx, __u32 lov_idx) { @@ -245,8 +191,7 @@ lfsck_layout_assistant_req_init(struct lfsck_layout_object *parent, return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&llr->llr_lar.lar_list); - atomic_inc(&parent->llo_ref); - llr->llr_parent = parent; + llr->llr_lar.lar_parent = lfsck_assistant_object_get(lso); llr->llr_child = child; llr->llr_ost_idx = ost_idx; llr->llr_lov_idx = lov_idx; @@ -260,8 +205,8 @@ static void lfsck_layout_assistant_req_fini(const struct lu_env *env, struct lfsck_layout_req *llr = container_of0(lar, struct lfsck_layout_req, llr_lar); - lu_object_put(env, &llr->llr_child->do_lu); - lfsck_layout_object_put(env, llr->llr_parent); + lfsck_object_put(env, llr->llr_child); + lfsck_assistant_object_put(env, lar->lar_parent); OBD_FREE_PTR(llr); } @@ -270,10 +215,13 @@ lfsck_layout_assistant_sync_failures_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *args, int rc) { - struct lfsck_async_interpret_args *laia = args; + if (rc == 0) { + struct lfsck_async_interpret_args *laia = args; + struct lfsck_tgt_desc *ltd = laia->laia_ltd; - if (rc == 0) + ltd->ltd_synced_failures = 1; atomic_dec(laia->laia_count); + } return 0; } @@ -332,14 +280,10 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env, down_read(<ds->ltd_rw_sem); cfs_foreach_bit(lad->lad_bitmap, idx) { - ltd = LTD_TGT(ltds, idx); + ltd = lfsck_ltd2tgt(ltds, idx); LASSERT(ltd != NULL); - spin_lock(<ds->ltd_lock); - list_del_init(<d->ltd_layout_phase_list); - list_del_init(<d->ltd_layout_list); - spin_unlock(<ds->ltd_lock); - + laia->laia_ltd = ltd; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, lfsck_layout_assistant_sync_failures_interpret, laia, LFSCK_NOTIFY); @@ -382,10 +326,9 @@ static int lfsck_layout_get_lovea(const struct lu_env *env, int rc; again: - rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV); if (rc == -ERANGE) { - rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV, - BYPASS_CAPA); + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV); if (rc <= 0) return rc; @@ -454,7 +397,7 @@ static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm) return 0; } -#define LFSCK_RBTREE_BITMAP_SIZE PAGE_CACHE_SIZE +#define LFSCK_RBTREE_BITMAP_SIZE PAGE_SIZE #define LFSCK_RBTREE_BITMAP_WIDTH (LFSCK_RBTREE_BITMAP_SIZE << 3) #define LFSCK_RBTREE_BITMAP_MASK (LFSCK_RBTREE_BITMAP_WIDTH - 1) @@ -615,7 +558,7 @@ static int lfsck_rbtree_setup(const struct lu_env *env, struct dt_object *obj; fid->f_seq = FID_SEQ_LAYOUT_RBTREE; - fid->f_oid = lfsck_dev_idx(dev); + fid->f_oid = lfsck_dev_idx(lfsck); fid->f_ver = 0; obj = dt_locate(env, dev, fid); if (IS_ERR(obj)) @@ -663,7 +606,7 @@ static void lfsck_rbtree_cleanup(const struct lu_env *env, } if (llsd->llsd_rb_obj != NULL) { - lu_object_put(env, &llsd->llsd_rb_obj->do_lu); + lfsck_object_put(env, llsd->llsd_rb_obj); llsd->llsd_rb_obj = NULL; } @@ -820,7 +763,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env, struct dt_object *obj = com->lc_obj; struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_layout *lo = com->lc_file_ram; - cfs_bitmap_t *bitmap = lad->lad_bitmap; + struct cfs_bitmap *bitmap = lad->lad_bitmap; loff_t pos = com->lc_file_size; ssize_t size; __u32 nbits; @@ -838,7 +781,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env, if (nbits > bitmap->size) { __u32 new_bits = bitmap->size; - cfs_bitmap_t *new_bitmap; + struct cfs_bitmap *new_bitmap; while (new_bits < nbits) new_bits <<= 1; @@ -945,8 +888,8 @@ static int lfsck_layout_store(const struct lu_env *env, struct lfsck_layout *lo_ram = com->lc_file_ram; struct lfsck_layout *lo = com->lc_file_disk; struct thandle *th; - struct dt_device *dev = lfsck->li_bottom; - cfs_bitmap_t *bitmap = NULL; + struct dt_device *dev = lfsck_obj2dev(obj); + struct cfs_bitmap *bitmap = NULL; loff_t pos; ssize_t size = com->lc_file_size; __u32 nbits = 0; @@ -1027,10 +970,11 @@ static int lfsck_layout_init(const struct lu_env *env, return rc; } -static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt, +static int fid_is_for_ostobj(const struct lu_env *env, + struct lfsck_instance *lfsck, struct dt_object *obj, const struct lu_fid *fid) { - struct seq_server_site *ss = lu_site2seq(dt->dd_lu_dev.ld_site); + struct seq_server_site *ss = lfsck_dev_site(lfsck); struct lu_seq_range *range = &lfsck_env_info(env)->lti_range; struct lustre_mdt_attrs *lma; int rc; @@ -1046,14 +990,14 @@ static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt, lma = &lfsck_env_info(env)->lti_lma; rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)), - XATTR_NAME_LMA, BYPASS_CAPA); + XATTR_NAME_LMA); if (rc == sizeof(*lma)) { lustre_lma_swab(lma); return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0; } - rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA); + rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID); return rc > 0; } @@ -1099,7 +1043,7 @@ lfsck_layout_lastid_create(const struct lu_env *env, struct lu_attr *la = &info->lti_la; struct dt_object_format *dof = &info->lti_dof; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; - struct dt_device *dt = lfsck->li_bottom; + struct dt_device *dt = lfsck_obj2dev(obj); struct thandle *th; __u64 lastid = 0; loff_t pos = 0; @@ -1112,6 +1056,7 @@ lfsck_layout_lastid_create(const struct lu_env *env, memset(la, 0, sizeof(*la)); la->la_mode = S_IFREG | S_IRUGO | S_IWUSR; la->la_valid = LA_MODE | LA_UID | LA_GID; + memset(dof, 0, sizeof(*dof)); dof->dof_type = dt_mode_to_dft(S_IFREG); th = dt_trans_create(env, dt); @@ -1150,7 +1095,7 @@ stop: log: CDEBUG(D_LFSCK, "%s: layout LFSCK will create LAST_ID for " - LPX64": rc = %d\n", + "%#llx: rc = %d\n", lfsck_lfsck2name(lfsck), fid_seq(lfsck_dto2fid(obj)), rc); return rc; @@ -1187,8 +1132,8 @@ lfsck_layout_lastid_reload(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " - "LAST_ID file (1) for the sequence "LPX64 - ", old value "LPU64", known value "LPU64"\n", + "LAST_ID file (1) for the sequence %#llx" + ", old value %llu, known value %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lastid, lls->lls_lastid); } @@ -1221,7 +1166,7 @@ lfsck_layout_lastid_store(const struct lu_env *env, continue; CDEBUG(D_LFSCK, "%s: layout LFSCK will sync the LAST_ID for " - " "LPX64" as "LPU64"\n", + " %#llx as %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid); if (bk->lb_param & LPF_DRYRUN) { @@ -1233,7 +1178,7 @@ lfsck_layout_lastid_store(const struct lu_env *env, if (IS_ERR(th)) { rc1 = PTR_ERR(th); CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " - "the LAST_ID for "LPX64"(1): rc = %d\n", + "the LAST_ID for %#llx(1): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); continue; @@ -1264,7 +1209,7 @@ stop: if (rc != 0) { rc1 = rc; CDEBUG(D_LFSCK, "%s: layout LFSCK failed to store " - "the LAST_ID for "LPX64"(2): rc = %d\n", + "the LAST_ID for %#llx(2): rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc1); } @@ -1286,7 +1231,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, int rc; ENTRY; - lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck->li_bottom)); + lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck)); obj = dt_locate(env, lfsck->li_bottom, fid); if (IS_ERR(obj)) RETURN(PTR_ERR(obj)); @@ -1301,7 +1246,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK cannot find the " - "LAST_ID file for sequence "LPX64"\n", + "LAST_ID file for sequence %#llx\n", lfsck_lfsck2name(lfsck), lls->lls_seq); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) && @@ -1310,11 +1255,19 @@ lfsck_layout_lastid_load(const struct lu_env *env, cfs_time_seconds(cfs_fail_val), NULL, NULL); - up_write(&com->lc_sem); - l_wait_event(lfsck->li_thread.t_ctl_waitq, - !thread_is_running(&lfsck->li_thread), - &lwi); - down_write(&com->lc_sem); + /* Some others may changed the cfs_fail_val + * as zero after above check, re-check it for + * sure to avoid falling into wait for ever. */ + if (likely(lwi.lwi_timeout > 0)) { + struct ptlrpc_thread *thread = + &lfsck->li_thread; + + up_write(&com->lc_sem); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread), + &lwi); + down_write(&com->lc_sem); + } } } @@ -1336,7 +1289,7 @@ lfsck_layout_lastid_load(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds invalid " - "LAST_ID file for the sequence "LPX64 + "LAST_ID file for the sequence %#llx" ": rc = %d\n", lfsck_lfsck2name(lfsck), lls->lls_seq, rc); } @@ -1370,7 +1323,7 @@ static void lfsck_layout_record_failure(const struct lu_env *env, lo->ll_pos_first_inconsistent = cookie; CDEBUG(D_LFSCK, "%s: layout LFSCK hit first non-repaired " - "inconsistency at the pos ["LPU64"]\n", + "inconsistency at the pos [%llu]\n", lfsck_lfsck2name(lfsck), lo->ll_pos_first_inconsistent); } @@ -1385,7 +1338,7 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env, down_write(&com->lc_sem); lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); + HALF_SEC - com->lc_time_last_checkpoint); lo->ll_time_last_checkpoint = cfs_time_current_sec(); lo->ll_objs_checked_phase2 += com->lc_new_checked; @@ -1429,14 +1382,16 @@ static int lfsck_layout_trans_stop(const struct lu_env *env, { int rc; + /* XXX: If there is something worng or it needs to repair nothing, + * then notify the lower to stop the modification. Currently, + * we use th_result for such purpose, that may be replaced by + * some rollback mechanism in the future. */ handle->th_result = result; rc = dt_trans_stop(env, dev, handle); - if (rc > 0) - rc = 0; - else if (rc == 0) - rc = 1; + if (result != 0) + return result > 0 ? 0 : result; - return rc; + return rc == 0 ? 1 : rc; } /** @@ -1463,7 +1418,7 @@ static int lfsck_layout_get_def_stripesize(const struct lu_env *env, /* Get the default stripe size via xattr_get on the backend root. */ rc = dt_xattr_get(env, root, lfsck_buf_get(env, lum, sizeof(*lum)), - XATTR_NAME_LOV, BYPASS_CAPA); + XATTR_NAME_LOV); if (rc > 0) { /* The lum->lmm_stripe_size is LE mode. The *size also * should be LE mode. So it is unnecessary to convert. */ @@ -1497,6 +1452,7 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, int rc; __u32 magic; __u16 count; + ENTRY; magic = le32_to_cpu(lmm->lmm_magic); count = le16_to_cpu(lmm->lmm_stripe_count); @@ -1526,12 +1482,11 @@ static int lfsck_layout_refill_lovea(const struct lu_env *env, } lfsck_buf_init(&ea_buf, lmm, lov_mds_md_size(count, magic)); - rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, fl, handle, - BYPASS_CAPA); + rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, fl, handle); if (rc == 0) rc = 1; - return rc; + RETURN(rc); } /** @@ -1618,6 +1573,46 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env, RETURN(rc); } +static int __lfsck_layout_update_pfid(const struct lu_env *env, + struct dt_object *child, + const struct lu_fid *pfid, __u32 offset) +{ + struct dt_device *dev = lfsck_obj2dev(child); + struct filter_fid *ff = &lfsck_env_info(env)->lti_new_pfid; + struct thandle *handle; + struct lu_buf buf = { NULL }; + int rc; + + ff->ff_parent.f_seq = cpu_to_le64(pfid->f_seq); + ff->ff_parent.f_oid = cpu_to_le32(pfid->f_oid); + /* Currently, the filter_fid::ff_parent::f_ver is not the real parent + * MDT-object's FID::f_ver, instead it is the OST-object index in its + * parent MDT-object's layout EA. */ + ff->ff_parent.f_stripe_idx = cpu_to_le32(offset); + lfsck_buf_init(&buf, ff, sizeof(struct filter_fid)); + + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + rc = dt_declare_xattr_set(env, child, &buf, XATTR_NAME_FID, 0, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_xattr_set(env, child, &buf, XATTR_NAME_FID, 0, handle); + + GOTO(stop, rc); + +stop: + dt_trans_stop(env, dev, handle); + + return rc; +} + /** * \retval +1: repaired * \retval 0: did nothing @@ -1629,11 +1624,7 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, struct lu_fid *cfid, struct dt_device *cdev, __u32 ea_off) { - struct filter_fid *pfid = &lfsck_env_info(env)->lti_new_pfid; struct dt_object *child; - struct thandle *handle; - const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); - struct lu_buf *buf; int rc = 0; ENTRY; @@ -1641,38 +1632,11 @@ static int lfsck_layout_update_pfid(const struct lu_env *env, if (IS_ERR(child)) RETURN(PTR_ERR(child)); - handle = dt_trans_create(env, cdev); - if (IS_ERR(handle)) - GOTO(out, rc = PTR_ERR(handle)); - - pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq); - pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid); - /* Currently, the filter_fid::ff_parent::f_ver is not the real parent - * MDT-object's FID::f_ver, instead it is the OST-object index in its - * parent MDT-object's layout EA. */ - pfid->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); - buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid)); - - rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle); - if (rc != 0) - GOTO(stop, rc); - - rc = dt_trans_start(env, cdev, handle); - if (rc != 0) - GOTO(stop, rc); - - rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle, - BYPASS_CAPA); - - GOTO(stop, rc = (rc == 0 ? 1 : rc)); - -stop: - dt_trans_stop(env, cdev, handle); - -out: - lu_object_put(env, &child->do_lu); + rc = __lfsck_layout_update_pfid(env, child, + lu_object_fid(&parent->do_lu), ea_off); + lfsck_object_put(env, child); - return rc; + RETURN(rc == 0 ? 1 : rc); } /** @@ -1742,20 +1706,20 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct dt_insert_rec *dtrec = &info->lti_dt_rec; char *name = info->lti_key; - struct lu_attr *la = &info->lti_la; + struct lu_attr *la = &info->lti_la2; struct dt_object_format *dof = &info->lti_dof; struct lfsck_instance *lfsck = com->lc_lfsck; struct lu_fid *pfid = &rec->lor_fid; struct lu_fid *tfid = &info->lti_fid3; - struct dt_device *next = lfsck->li_next; + struct dt_device *dev = lfsck->li_bottom; + struct dt_object *lpf = lfsck->li_lpf_obj; struct dt_object *pobj = NULL; struct dt_object *cobj = NULL; struct thandle *th = NULL; - struct lu_buf pbuf = { 0 }; struct lu_buf *ea_buf = &info->lti_big_buf; struct lu_buf lov_buf; - struct lustre_handle lh = { 0 }; - struct linkea_data ldata = { 0 }; + struct lfsck_lock_handle *llh = &info->lti_llh; + struct linkea_data ldata = { NULL }; struct lu_buf linkea_buf; const struct lu_name *pname; int size = 0; @@ -1763,58 +1727,37 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, int rc = 0; ENTRY; - /* Create .lustre/lost+found/MDTxxxx when needed. */ - if (unlikely(lfsck->li_lpf_obj == NULL)) { - rc = lfsck_create_lpf(env, lfsck); - if (rc != 0) - GOTO(log, rc); - } + if (unlikely(lpf == NULL)) + GOTO(log, rc = -ENXIO); - if (fid_is_zero(pfid)) { - struct filter_fid *ff = &info->lti_new_pfid; + /* We use two separated transactions to repair the inconsistency. + * + * 1) create the MDT-object locally. + * 2) update the OST-object's PFID EA if necessary. + * + * If 1) succeed, but 2) failed, then the OST-object's PFID EA will be + * updated when the layout LFSCK run next time. + * + * If 1) failed, but 2) succeed, then such MDT-object will be re-created + * when the layout LFSCK run next time. */ + if (fid_is_zero(pfid)) { rc = lfsck_fid_alloc(env, lfsck, pfid, false); if (rc != 0) - RETURN(rc); + GOTO(log, rc); - ff->ff_parent.f_seq = cpu_to_le64(pfid->f_seq); - ff->ff_parent.f_oid = cpu_to_le32(pfid->f_oid); - /* Currently, the filter_fid::ff_parent::f_ver is not the - * real parent MDT-object's FID::f_ver, instead it is the - * OST-object index in its parent MDT-object's layout EA. */ - ff->ff_parent.f_stripe_idx = cpu_to_le32(ea_off); - lfsck_buf_init(&pbuf, ff, sizeof(struct filter_fid)); cobj = lfsck_object_find_by_dev(env, ltd->ltd_tgt, cfid); if (IS_ERR(cobj)) GOTO(log, rc = PTR_ERR(cobj)); } - pobj = lfsck_object_find_by_dev(env, lfsck->li_bottom, pfid); + pobj = lfsck_object_find_by_dev(env, dev, pfid); if (IS_ERR(pobj)) - GOTO(put, rc = PTR_ERR(pobj)); + GOTO(log, rc = PTR_ERR(pobj)); LASSERT(infix != NULL); LASSERT(type != NULL); - do { - snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, - type, idx++); - rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, - (const struct dt_key *)name, BYPASS_CAPA); - if (rc != 0 && rc != -ENOENT) - GOTO(put, rc); - } while (rc == 0); - - rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf); - if (rc != 0) - GOTO(put, rc); - - pname = lfsck_name_get_const(env, name, strlen(name)); - rc = linkea_add_buf(&ldata, pname, lfsck_dto2fid(lfsck->li_lpf_obj)); - if (rc != 0) - GOTO(put, rc); - memset(la, 0, sizeof(*la)); la->la_uid = rec->lor_uid; la->la_gid = rec->lor_gid; @@ -1823,62 +1766,77 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, memset(dof, 0, sizeof(*dof)); dof->dof_type = dt_mode_to_dft(S_IFREG); + /* Because the dof->dof_reg.striped = 0, the LOD will not create + * the stripe(s). The LFSCK will specify the LOV EA via + * lfsck_layout_extend_lovea(). */ size = lov_mds_md_size(ea_off + 1, LOV_MAGIC_V1); if (ea_buf->lb_len < size) { lu_buf_realloc(ea_buf, size); if (ea_buf->lb_buf == NULL) - GOTO(put, rc = -ENOMEM); + GOTO(log, rc = -ENOMEM); } - /* Hold update lock on the .lustre/lost+found/MDTxxxx/. - * - * XXX: Currently, we do not grab the PDO lock as normal create cases, - * because creating MDT-object for orphan OST-object is rare, we - * do not much care about the performance. It can be improved in - * the future when needed. */ - rc = lfsck_ibits_lock(env, lfsck, lfsck->li_lpf_obj, &lh, - MDS_INODELOCK_UPDATE, LCK_EX); +again: + do { + snprintf(name, NAME_MAX, DFID"%s-%s-%d", PFID(pfid), infix, + type, idx++); + rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, + (const struct dt_key *)name); + if (rc != 0 && rc != -ENOENT) + GOTO(log, rc); + } while (rc == 0); + + rc = lfsck_lock(env, lfsck, lfsck->li_lpf_obj, name, llh, + MDS_INODELOCK_UPDATE, LCK_PW); if (rc != 0) - GOTO(put, rc); + GOTO(log, rc); + + /* Re-check whether the name conflict with othrs after taken + * the ldlm lock. */ + rc = dt_lookup(env, lfsck->li_lpf_obj, (struct dt_rec *)tfid, + (const struct dt_key *)name); + if (unlikely(rc == 0)) { + lfsck_unlock(llh); + goto again; + } + + if (rc != -ENOENT) + GOTO(unlock, rc); + + rc = linkea_data_new(&ldata, + &lfsck_env_info(env)->lti_linkea_buf); + if (rc != 0) + GOTO(unlock, rc); + + pname = lfsck_name_get_const(env, name, strlen(name)); + rc = linkea_add_buf(&ldata, pname, lfsck_dto2fid(lfsck->li_lpf_obj)); + if (rc != 0) + GOTO(unlock, rc); - th = dt_trans_create(env, next); + /* The 1st transaction. */ + th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(unlock, rc = PTR_ERR(th)); - /* 1a. Update OST-object's parent information remotely. - * - * If other subsequent modifications failed, then next LFSCK scanning - * will process the OST-object as orphan again with known parent FID. */ - if (cobj != NULL) { - rc = dt_declare_xattr_set(env, cobj, &pbuf, XATTR_NAME_FID, - 0, th); - if (rc != 0) - GOTO(stop, rc); - } - - /* 2a. Create the MDT-object locally. */ rc = dt_declare_create(env, pobj, la, NULL, dof, th); if (rc != 0) GOTO(stop, rc); - /* 3a. Add layout EA for the MDT-object. */ lfsck_buf_init(&lov_buf, ea_buf->lb_buf, size); rc = dt_declare_xattr_set(env, pobj, &lov_buf, XATTR_NAME_LOV, - LU_XATTR_CREATE, th); + LU_XATTR_REPLACE, th); if (rc != 0) GOTO(stop, rc); - /* 4a. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ dtrec->rec_fid = pfid; dtrec->rec_type = S_IFREG; - rc = dt_declare_insert(env, lfsck->li_lpf_obj, + rc = dt_declare_insert(env, lpf, (const struct dt_rec *)dtrec, (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); - /* 5a. insert linkEA for parent. */ lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, ldata.ld_leh->leh_len); rc = dt_declare_xattr_set(env, pobj, &linkea_buf, @@ -1886,55 +1844,48 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, next, th); + rc = dt_trans_start_local(env, dev, th); if (rc != 0) GOTO(stop, rc); - /* 1b. Update OST-object's parent information remotely. */ - if (cobj != NULL) { - rc = dt_xattr_set(env, cobj, &pbuf, XATTR_NAME_FID, 0, th, - BYPASS_CAPA); - if (rc != 0) - GOTO(stop, rc); - } - dt_write_lock(env, pobj, 0); - /* 2b. Create the MDT-object locally. */ rc = dt_create(env, pobj, la, NULL, dof, th); if (rc == 0) - /* 3b. Add layout EA for the MDT-object. */ rc = lfsck_layout_extend_lovea(env, lfsck, th, pobj, cfid, - &lov_buf, LU_XATTR_CREATE, - ltd->ltd_index, ea_off, false); + &lov_buf, 0, ltd->ltd_index, ea_off, true); dt_write_unlock(env, pobj); if (rc < 0) GOTO(stop, rc); - /* 4b. Insert the MDT-object to .lustre/lost+found/MDTxxxx/ */ - rc = dt_insert(env, lfsck->li_lpf_obj, (const struct dt_rec *)dtrec, - (const struct dt_key *)name, th, BYPASS_CAPA, 1); + rc = dt_insert(env, lpf, (const struct dt_rec *)dtrec, + (const struct dt_key *)name, th, 1); if (rc != 0) GOTO(stop, rc); - /* 5b. insert linkEA for parent. */ - rc = dt_xattr_set(env, pobj, &linkea_buf, - XATTR_NAME_LINK, 0, th, BYPASS_CAPA); + rc = dt_xattr_set(env, pobj, &linkea_buf, XATTR_NAME_LINK, 0, th); + if (rc == 0 && cobj != NULL) { + dt_trans_stop(env, dev, th); + th = NULL; + + /* The 2nd transaction. */ + rc = __lfsck_layout_update_pfid(env, cobj, pfid, ea_off); + } GOTO(stop, rc); stop: - dt_trans_stop(env, next, th); + if (th != NULL) + dt_trans_stop(env, dev, th); unlock: - lfsck_ibits_unlock(&lh, LCK_EX); + lfsck_unlock(llh); -put: +log: if (cobj != NULL && !IS_ERR(cobj)) - lu_object_put(env, &cobj->do_lu); + lfsck_object_put(env, cobj); if (pobj != NULL && !IS_ERR(pobj)) - lu_object_put(env, &pobj->do_lu); + lfsck_object_put(env, pobj); -log: if (rc < 0) CDEBUG(D_LFSCK, "%s layout LFSCK assistant failed to " "recreate the lost MDT-object: parent "DFID @@ -2006,7 +1957,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, { struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_attr *la = &info->lti_la; - ldlm_policy_data_t *policy = &info->lti_policy; + union ldlm_policy_data *policy = &info->lti_policy; struct ldlm_res_id *resid = &info->lti_resid; struct lfsck_instance *lfsck = com->lc_lfsck; struct dt_device *dev = lfsck->li_bottom; @@ -2031,7 +1982,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, } /* Get obj's attr without lock firstly. */ - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); dt_read_unlock(env, obj); if (rc != 0) GOTO(put, rc); @@ -2054,7 +2005,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env, dt_write_lock(env, obj, 0); /* Get obj's attr within lock again. */ - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); if (rc != 0) GOTO(unlock, rc); @@ -2099,7 +2050,7 @@ unlock: ldlm_lock_decref(&lh, LCK_EX); put: - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); return rc; } @@ -2131,7 +2082,7 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, struct lu_fid *cfid2 = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; struct lov_mds_md_v1 *lmm = ea_buf->lb_buf; - struct dt_device *dev = com->lc_lfsck->li_bottom; + struct dt_device *dev = lfsck_obj2dev(parent); struct thandle *th = NULL; struct lustre_handle lh = { 0 }; __u32 ost_idx2 = le32_to_cpu(slot->l_ost_idx); @@ -2143,7 +2094,6 @@ static int lfsck_layout_conflict_create(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - /* Hold layout lock on the parent to prevent others to access. */ rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, LCK_EX); @@ -2229,7 +2179,7 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env, struct lu_fid *fid = &info->lti_fid2; struct ost_id *oi = &info->lti_oi; struct lfsck_instance *lfsck = com->lc_lfsck; - struct dt_device *dt = lfsck->li_bottom; + struct dt_device *dt = lfsck_obj2dev(parent); struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct thandle *handle = NULL; size_t lovea_size; @@ -2296,10 +2246,9 @@ again: dt_write_lock(env, parent, 0); locked = true; - rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV); if (rc == -ERANGE) { - rc = dt_xattr_get(env, parent, &LU_BUF_NULL, XATTR_NAME_LOV, - BYPASS_CAPA); + rc = dt_xattr_get(env, parent, &LU_BUF_NULL, XATTR_NAME_LOV); LASSERT(rc != 0); goto again; } else if (rc == -ENODATA || rc == 0) { @@ -2523,7 +2472,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, GOTO(put, rc = -EXDEV); if (dt_object_exists(parent) == 0) { - lu_object_put(env, &parent->do_lu); + lfsck_object_put(env, parent); rc = lfsck_layout_recreate_parent(env, com, ltd, rec, cfid, "", "R", ea_off); GOTO(out, rc); @@ -2539,7 +2488,7 @@ static int lfsck_layout_scan_orphan_one(const struct lu_env *env, put: if (rc <= 0) - lu_object_put(env, &parent->do_lu); + lfsck_object_put(env, parent); else /* The layout EA is changed, need to be reloaded next time. */ lu_object_put_nocache(env, &parent->do_lu); @@ -2602,7 +2551,7 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, GOTO(put, rc); iops = &obj->do_index_ops->dio_it; - di = iops->init(env, obj, 0, BYPASS_CAPA); + di = iops->init(env, obj, 0); if (IS_ERR(di)) GOTO(put, rc = PTR_ERR(di)); @@ -2629,17 +2578,9 @@ static int lfsck_layout_scan_orphan(const struct lu_env *env, struct dt_key *key; struct lu_orphan_rec *rec = &info->lti_rec; - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) && - cfs_fail_val > 0) { - struct ptlrpc_thread *thread = &lfsck->li_thread; - struct l_wait_info lwi; - - lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - !thread_is_running(thread), - &lwi); - } + if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) && + unlikely(!thread_is_running(&lfsck->li_thread))) + break; key = iops->key(env, di); com->lc_fid_latest_scanned_phase2 = *(struct lu_fid *)key; @@ -2662,7 +2603,7 @@ fini: iops->put(env, di); iops->fini(env, di); put: - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); log: CDEBUG(D_LFSCK, "%s: layout LFSCK assistant finished the orphan " @@ -2683,16 +2624,15 @@ log: * 2) Re-create the missing OST-object with the FID/owner information. */ static int lfsck_layout_repair_dangling(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *parent, struct lfsck_layout_req *llr, - const struct lu_attr *pla) + struct lu_attr *la) { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *pfid = &info->lti_new_pfid; - struct dt_allocation_hint *hint = &info->lti_hint; - struct lu_attr *cla = &info->lti_la2; - struct dt_object *parent = llr->llr_parent->llo_obj; + struct dt_object_format *dof = &info->lti_dof; struct dt_object *child = llr->llr_child; - struct dt_device *dev = lfsck_obj2dt_dev(child); + struct dt_device *dev = lfsck_obj2dev(child); const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); struct thandle *handle; struct lu_buf *buf; @@ -2709,25 +2649,21 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, if (!create) GOTO(log, rc = 1); - memset(cla, 0, sizeof(*cla)); - cla->la_uid = pla->la_uid; - cla->la_gid = pla->la_gid; - cla->la_mode = S_IFREG | 0666; - cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | - LA_ATIME | LA_MTIME | LA_CTIME; - rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, LCK_EX); if (rc != 0) GOTO(log, rc); - handle = dt_trans_create(env, dev); - if (IS_ERR(handle)) - GOTO(unlock1, rc = PTR_ERR(handle)); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(unlock1, rc); - hint->dah_parent = NULL; - hint->dah_mode = 0; + la->la_mode = S_IFREG | 0666; + la->la_atime = la->la_mtime = la->la_ctime = 0; + la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID | + LA_ATIME | LA_MTIME | LA_CTIME; + memset(dof, 0, sizeof(*dof)); pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq); pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid); /* Currently, the filter_fid::ff_parent::f_ver is not the real parent @@ -2736,7 +2672,11 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, pfid->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx); buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid)); - rc = dt_declare_create(env, child, cla, hint, NULL, handle); + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(unlock1, rc = PTR_ERR(handle)); + + rc = dt_declare_create(env, child, la, NULL, dof, handle); if (rc != 0) GOTO(stop, rc); @@ -2745,7 +2685,7 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, dev, handle); + rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(stop, rc); @@ -2753,12 +2693,12 @@ static int lfsck_layout_repair_dangling(const struct lu_env *env, if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 1); - rc = dt_create(env, child, cla, hint, NULL, handle); + rc = dt_create(env, child, la, NULL, dof, handle); if (rc != 0) GOTO(unlock2, rc); rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, LU_XATTR_CREATE, - handle, BYPASS_CAPA); + handle); GOTO(unlock2, rc); @@ -2772,14 +2712,16 @@ unlock1: lfsck_ibits_unlock(&lh, LCK_EX); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found dangling " - "reference for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u. %s: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, - llr->llr_lov_idx, pla->la_uid, pla->la_gid, - create ? "Create the lost OST-object as required" : - "Keep the MDT-object there by default", rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant found " + "dangling reference for: parent "DFID", child "DFID + ", OST-index %u, stripe-index %u, owner %u/%u. %s: " + "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, + la->la_uid, la->la_gid, + create ? "Create the lost OST-object as required" : + "Keep the MDT-object there by default", rc); return rc; } @@ -2789,15 +2731,14 @@ log: * given MDT-object as its parent. So update the OST-object filter_fid. */ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *parent, struct lfsck_layout_req *llr, - const struct lu_attr *pla) + struct lu_attr *la) { struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid *pfid = &info->lti_new_pfid; - struct lu_attr *tla = &info->lti_la3; - struct dt_object *parent = llr->llr_parent->llo_obj; struct dt_object *child = llr->llr_child; - struct dt_device *dev = lfsck_obj2dt_dev(child); + struct dt_device *dev = lfsck_obj2dev(child); const struct lu_fid *tfid = lu_object_fid(&parent->do_lu); struct thandle *handle; struct lu_buf *buf; @@ -2811,10 +2752,6 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, if (rc != 0) GOTO(log, rc); - handle = dt_trans_create(env, dev); - if (IS_ERR(handle)) - GOTO(unlock1, rc = PTR_ERR(handle)); - pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq); pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid); /* Currently, the filter_fid::ff_parent::f_ver is not the real parent @@ -2823,18 +2760,24 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, pfid->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx); buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid)); + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(unlock1, rc = PTR_ERR(handle)); + rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle); if (rc != 0) GOTO(stop, rc); - tla->la_valid = LA_UID | LA_GID; - tla->la_uid = pla->la_uid; - tla->la_gid = pla->la_gid; - rc = dt_declare_attr_set(env, child, tla, handle); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(stop, rc); + + la->la_valid = LA_UID | LA_GID; + rc = dt_declare_attr_set(env, child, la, handle); if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, dev, handle); + rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(stop, rc); @@ -2842,18 +2785,17 @@ static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env, if (unlikely(lfsck_is_dead_obj(parent))) GOTO(unlock2, rc = 1); - rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle, - BYPASS_CAPA); + rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle); if (rc != 0) GOTO(unlock2, rc); /* Get the latest parent's owner. */ - rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); + rc = dt_attr_get(env, parent, la); if (rc != 0) GOTO(unlock2, rc); - tla->la_valid = LA_UID | LA_GID; - rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA); + la->la_valid = LA_UID | LA_GID; + rc = dt_attr_set(env, child, la, handle); GOTO(unlock2, rc); @@ -2867,12 +2809,16 @@ unlock1: lfsck_ibits_unlock(&lh, LCK_EX); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired unmatched " - "MDT-OST pair for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, - pla->la_uid, pla->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "unmatched MDT-OST pair for: parent "DFID + ", child "DFID", OST-index %u, stripe-index %u, " + "owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), + PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, + la->la_uid, la->la_gid, rc); return rc; } @@ -2882,6 +2828,7 @@ log: * new OST-object(s) with new fid(s) for the non-recognized MDT-object(s). */ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *parent, struct lfsck_layout_req *llr, struct lu_attr *la, struct lu_buf *buf) @@ -2889,119 +2836,173 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env, struct lfsck_thread_info *info = lfsck_env_info(env); struct dt_allocation_hint *hint = &info->lti_hint; struct dt_object_format *dof = &info->lti_dof; - struct dt_device *pdev = com->lc_lfsck->li_next; struct ost_id *oi = &info->lti_oi; - struct dt_object *parent = llr->llr_parent->llo_obj; - struct dt_device *cdev = lfsck_obj2dt_dev(llr->llr_child); + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_device *dev; + struct lu_device *d = + &lfsck_obj2dev(llr->llr_child)->dd_lu_dev; + struct lu_object *o; + struct lu_object *n; struct dt_object *child = NULL; - struct lu_device *d = &cdev->dd_lu_dev; - struct lu_object *o = NULL; - struct thandle *handle; + struct thandle *handle = NULL; struct lov_mds_md_v1 *lmm; struct lov_ost_data_v1 *objs; + const struct lu_fid *pfid = lfsck_dto2fid(parent); + struct lu_fid tfid; struct lustre_handle lh = { 0 }; struct lu_buf ea_buf; __u32 magic; + __u32 index; int rc; ENTRY; - rc = lfsck_ibits_lock(env, com->lc_lfsck, parent, &lh, - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, - LCK_EX); - if (rc != 0) - GOTO(log, rc); - - handle = dt_trans_create(env, pdev); - if (IS_ERR(handle)) - GOTO(unlock1, rc = PTR_ERR(handle)); + /* We use two separated transactions to repair the inconsistency. + * + * 1) create the child (OST-object). + * 2) update the parent LOV EA according to the child's FID. + * + * If 1) succeed, but 2) failed or aborted, then such OST-object will be + * handled as orphan when the layout LFSCK run next time. + * + * If 1) failed, but 2) succeed, then such OST-object will be re-created + * as dangling referened case when the layout LFSCK run next time. */ + /* The 1st transaction. */ o = lu_object_anon(env, d, NULL); if (IS_ERR(o)) - GOTO(stop, rc = PTR_ERR(o)); + GOTO(log, rc = PTR_ERR(o)); - child = container_of(o, struct dt_object, do_lu); - o = lu_object_locate(o->lo_header, d->ld_type); - if (unlikely(o == NULL)) - GOTO(stop, rc = -EINVAL); + n = lu_object_locate(o->lo_header, d->ld_type); + if (unlikely(n == NULL)) { + lu_object_put_nocache(env, o); + + GOTO(log, rc = -EINVAL); + } + + child = container_of(n, struct dt_object, do_lu); + memset(hint, 0, sizeof(*hint)); + rc = dt_attr_get(env, parent, la); + if (rc != 0) + GOTO(log, rc); - child = container_of(o, struct dt_object, do_lu); la->la_valid = LA_UID | LA_GID; - hint->dah_parent = NULL; - hint->dah_mode = 0; - dof->dof_type = DFT_REGULAR; - rc = dt_declare_create(env, child, la, NULL, NULL, handle); + memset(dof, 0, sizeof(*dof)); + + dev = lfsck_obj2dev(child); + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(log, rc = PTR_ERR(handle)); + + rc = dt_declare_create(env, child, la, hint, dof, handle); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(stop, rc); + rc = dt_create(env, child, la, hint, dof, handle); + dt_trans_stop(env, dev, handle); + handle = NULL; + if (rc != 0) + GOTO(log, rc); + + rc = lfsck_ibits_lock(env, lfsck, parent, &lh, + MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR, + LCK_EX); + if (rc != 0) + GOTO(log, rc); + + /* The 2nd transaction. */ + + /* XXX: Generally, we should use bottom device (OSD) to update parent + * LOV EA. But because the LOD-object still references the wrong + * OSP-object that should be detached after the parent's LOV EA + * refreshed. Unfortunately, there is no suitable API for that. + * So we have to make the LOD to re-load the OSP-object(s) via + * replacing the LOV EA against the LOD-object. + * + * Once the DNE2 patches have been landed, we can replace the + * LOD device with the OSD device. LU-6230. */ + + dev = lfsck->li_next; + parent = lfsck_object_locate(dev, parent); + if (IS_ERR(parent)) + GOTO(log, rc = PTR_ERR(parent)); + + handle = dt_trans_create(env, dev); + if (IS_ERR(handle)) + GOTO(log, rc = PTR_ERR(handle)); + rc = dt_declare_xattr_set(env, parent, buf, XATTR_NAME_LOV, LU_XATTR_REPLACE, handle); if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, pdev, handle); + rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(stop, rc); dt_write_lock(env, parent, 0); if (unlikely(lfsck_is_dead_obj(parent))) - GOTO(unlock2, rc = 0); + GOTO(unlock, rc = 0); - rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA); + rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV); if (unlikely(rc == 0 || rc == -ENODATA || rc == -ERANGE)) - GOTO(unlock2, rc = 0); + GOTO(unlock, rc = 0); lmm = buf->lb_buf; - /* Someone change layout during the LFSCK, no need to repair then. */ - if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen) - GOTO(unlock2, rc = 0); - - rc = dt_create(env, child, la, hint, dof, handle); - if (rc != 0) - GOTO(unlock2, rc); - /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has * been verified in lfsck_layout_verify_header() already. If some * new magic introduced in the future, then layout LFSCK needs to * be updated also. */ magic = le32_to_cpu(lmm->lmm_magic); if (magic == LOV_MAGIC_V1) { - objs = &lmm->lmm_objects[0]; + objs = &lmm->lmm_objects[llr->llr_lov_idx]; } else { LASSERT(magic == LOV_MAGIC_V3); - objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0]; + objs = + &((struct lov_mds_md_v3 *)lmm)->lmm_objects[llr->llr_lov_idx]; } - lmm->lmm_layout_gen = cpu_to_le16(llr->llr_parent->llo_gen + 1); + ostid_le_to_cpu(&objs->l_ost_oi, oi); + index = le32_to_cpu(objs->l_ost_idx); + rc = ostid_to_fid(&tfid, oi, index); + /* Someone changed layout during the LFSCK, no need to repair then. */ + if (rc == 0 && !lu_fid_eq(&tfid, lu_object_fid(&llr->llr_child->do_lu))) + GOTO(unlock, rc = 0); + + lmm->lmm_layout_gen = cpu_to_le16(le16_to_cpu(lmm->lmm_layout_gen) + 1); fid_to_ostid(lu_object_fid(&child->do_lu), oi); - ostid_cpu_to_le(oi, &objs[llr->llr_lov_idx].l_ost_oi); - objs[llr->llr_lov_idx].l_ost_gen = cpu_to_le32(0); - objs[llr->llr_lov_idx].l_ost_idx = cpu_to_le32(llr->llr_ost_idx); + ostid_cpu_to_le(oi, &objs->l_ost_oi); + objs->l_ost_gen = cpu_to_le32(0); + objs->l_ost_idx = cpu_to_le32(llr->llr_ost_idx); lfsck_buf_init(&ea_buf, lmm, lov_mds_md_size(le16_to_cpu(lmm->lmm_stripe_count), magic)); rc = dt_xattr_set(env, parent, &ea_buf, XATTR_NAME_LOV, - LU_XATTR_REPLACE, handle, BYPASS_CAPA); + LU_XATTR_REPLACE, handle); - GOTO(unlock2, rc = (rc == 0 ? 1 : rc)); + GOTO(unlock, rc = (rc == 0 ? 1 : rc)); -unlock2: +unlock: dt_write_unlock(env, parent); stop: - if (child != NULL) - lu_object_put(env, &child->do_lu); - - dt_trans_stop(env, pdev, handle); + if (handle != NULL) + dt_trans_stop(env, dev, handle); -unlock1: +log: lfsck_ibits_unlock(&lh, LCK_EX); + if (child != NULL) + lfsck_object_put(env, child); -log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired multiple " - "references for: parent "DFID", OST-index %u, stripe-index %u, " - "owner %u/%u: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "multiple references for: parent "DFID", OST-index %u, " + "stripe-index %u, owner %u/%u: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(pfid), llr->llr_ost_idx, + llr->llr_lov_idx, la->la_uid, la->la_gid, rc); return rc; } @@ -3012,30 +3013,31 @@ log: * is partly done. */ static int lfsck_layout_repair_owner(const struct lu_env *env, struct lfsck_component *com, + struct dt_object *parent, struct lfsck_layout_req *llr, - struct lu_attr *pla) + struct lu_attr *pla, + const struct lu_attr *cla) { struct lfsck_thread_info *info = lfsck_env_info(env); - struct lu_attr *tla = &info->lti_la3; - struct dt_object *parent = llr->llr_parent->llo_obj; + struct lu_attr *tla = &info->lti_la2; struct dt_object *child = llr->llr_child; - struct dt_device *dev = lfsck_obj2dt_dev(child); + struct dt_device *dev = lfsck_obj2dev(child); struct thandle *handle; int rc; ENTRY; + tla->la_uid = pla->la_uid; + tla->la_gid = pla->la_gid; + tla->la_valid = LA_UID | LA_GID; handle = dt_trans_create(env, dev); if (IS_ERR(handle)) GOTO(log, rc = PTR_ERR(handle)); - tla->la_uid = pla->la_uid; - tla->la_gid = pla->la_gid; - tla->la_valid = LA_UID | LA_GID; rc = dt_declare_attr_set(env, child, tla, handle); if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, dev, handle); + rc = dt_trans_start_local(env, dev, handle); if (rc != 0) GOTO(stop, rc); @@ -3045,17 +3047,16 @@ static int lfsck_layout_repair_owner(const struct lu_env *env, GOTO(unlock, rc = 1); /* Get the latest parent's owner. */ - rc = dt_attr_get(env, parent, tla, BYPASS_CAPA); + rc = dt_attr_get(env, parent, pla); if (rc != 0) GOTO(unlock, rc); /* Some others chown/chgrp during the LFSCK, needs to do nothing. */ if (unlikely(tla->la_uid != pla->la_uid || tla->la_gid != pla->la_gid)) - GOTO(unlock, rc = 1); - - tla->la_valid = LA_UID | LA_GID; - rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA); + rc = 1; + else + rc = dt_attr_set(env, child, tla, handle); GOTO(unlock, rc); @@ -3066,12 +3067,15 @@ stop: rc = lfsck_layout_trans_stop(env, dev, handle, rc); log: - CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired inconsistent " - "file owner for: parent "DFID", child "DFID", OST-index %u, " - "stripe-index %u, owner %u/%u: rc = %d\n", - lfsck_lfsck2name(com->lc_lfsck), PFID(lfsck_dto2fid(parent)), - PFID(lfsck_dto2fid(child)), llr->llr_ost_idx, llr->llr_lov_idx, - pla->la_uid, pla->la_gid, rc); + if (rc != 0) + CDEBUG(D_LFSCK, "%s: layout LFSCK assistant repaired " + "inconsistent file owner for: parent "DFID", child "DFID + ", OST-index %u, stripe-index %u, old owner %u/%u, " + "new owner %u/%u: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), + PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)), + llr->llr_ost_idx, llr->llr_lov_idx, + cla->la_uid, cla->la_gid, tla->la_uid, tla->la_gid, rc); return rc; } @@ -3080,10 +3084,9 @@ log: * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */ static int lfsck_layout_check_parent(const struct lu_env *env, struct lfsck_component *com, - struct dt_object *parent, + struct lfsck_assistant_object *lso, const struct lu_fid *pfid, const struct lu_fid *cfid, - const struct lu_attr *pla, const struct lu_attr *cla, struct lfsck_layout_req *llr, struct lu_buf *lov_ea, __u32 idx) @@ -3093,45 +3096,29 @@ static int lfsck_layout_check_parent(const struct lu_env *env, struct dt_object *tobj; struct lov_mds_md_v1 *lmm; struct lov_ost_data_v1 *objs; + struct lustre_handle lh = { 0 }; int rc; int i; __u32 magic; __u16 count; ENTRY; - if (fid_is_zero(pfid)) { - /* client never wrote. */ - if (cla->la_size == 0 && cla->la_blocks == 0) { - if (unlikely(cla->la_uid != pla->la_uid || - cla->la_gid != pla->la_gid)) - RETURN (LLIT_INCONSISTENT_OWNER); - - RETURN(0); - } - - RETURN(LLIT_UNMATCHED_PAIR); - } - if (unlikely(!fid_is_sane(pfid))) RETURN(LLIT_UNMATCHED_PAIR); - if (lu_fid_eq(pfid, lu_object_fid(&parent->do_lu))) { - if (llr->llr_lov_idx == idx) + if (lu_fid_eq(pfid, &lso->lso_fid)) { + if (likely(llr->llr_lov_idx == idx)) RETURN(0); RETURN(LLIT_UNMATCHED_PAIR); } - tobj = lfsck_object_find(env, com->lc_lfsck, pfid); + tobj = lfsck_object_find_bottom(env, com->lc_lfsck, pfid); if (IS_ERR(tobj)) RETURN(PTR_ERR(tobj)); - dt_read_lock(env, tobj, 0); - if (dt_object_exists(tobj) == 0 || - lfsck_is_dead_obj(tobj)) - GOTO(out, rc = LLIT_UNMATCHED_PAIR); - - if (!S_ISREG(lfsck_object_type(tobj))) + if (dt_object_exists(tobj) == 0 || lfsck_is_dead_obj(tobj) || + !S_ISREG(lfsck_object_type(tobj))) GOTO(out, rc = LLIT_UNMATCHED_PAIR); /* Load the tobj's layout EA, in spite of it is a local MDT-object or @@ -3176,16 +3163,60 @@ static int lfsck_layout_check_parent(const struct lu_env *env, } if (lu_fid_eq(cfid, tfid)) { - *lov_ea = *buf; + rc = lfsck_ibits_lock(env, com->lc_lfsck, tobj, &lh, + MDS_INODELOCK_UPDATE | + MDS_INODELOCK_LAYOUT | + MDS_INODELOCK_XATTR, + LCK_EX); + if (rc != 0) + GOTO(out, rc); + + dt_read_lock(env, tobj, 0); + + /* For local MDT-object, re-check existence + * after taken the lock. */ + if (!dt_object_remote(tobj)) { + if (dt_object_exists(tobj) == 0 || + lfsck_is_dead_obj(tobj)) { + rc = LLIT_UNMATCHED_PAIR; + } else { + *lov_ea = *buf; + rc = LLIT_MULTIPLE_REFERENCED; + } - GOTO(out, rc = LLIT_MULTIPLE_REFERENCED); + GOTO(unlock, rc); + } + + /* For migration case, the new MDT-object and old + * MDT-object may reference the same OST-object at + * some migration internal time. + * + * For remote MDT-object, the local MDT may not know + * whether it has been removed or not. Try checking + * for a non-existent xattr to check if this object + * has been been removed or not. */ + rc = dt_xattr_get(env, tobj, &LU_BUF_NULL, + XATTR_NAME_DUMMY); + if (unlikely(rc == -ENOENT || rc >= 0)) { + rc = LLIT_UNMATCHED_PAIR; + } else if (rc == -ENODATA) { + *lov_ea = *buf; + rc = LLIT_MULTIPLE_REFERENCED; + } + + GOTO(unlock, rc); } } GOTO(out, rc = LLIT_UNMATCHED_PAIR); +unlock: + if (lustre_handle_is_used(&lh)) { + dt_read_unlock(env, tobj); + lfsck_ibits_unlock(&lh, LCK_EX); + } + out: - dt_read_unlock(env, tobj); lfsck_object_put(env, tobj); return rc; @@ -3197,15 +3228,16 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, { struct lfsck_layout_req *llr = container_of0(lar, struct lfsck_layout_req, llr_lar); + struct lfsck_assistant_object *lso = lar->lar_parent; struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_thread_info *info = lfsck_env_info(env); struct filter_fid_old *pea = &info->lti_old_pfid; struct lu_fid *pfid = &info->lti_fid; - struct lu_buf buf = { 0 }; - struct dt_object *parent = llr->llr_parent->llo_obj; + struct lu_buf buf = { NULL }; + struct dt_object *parent = NULL; struct dt_object *child = llr->llr_child; - struct lu_attr *pla = &info->lti_la; - struct lu_attr *cla = &info->lti_la2; + struct lu_attr *pla = &lso->lso_attr; + struct lu_attr *cla = &info->lti_la; struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; enum lfsck_layout_inconsistency_type type = LLIT_NONE; @@ -3213,17 +3245,19 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, int rc; ENTRY; - if (unlikely(lfsck_is_dead_obj(parent))) + if (lso->lso_dead) RETURN(0); - rc = dt_attr_get(env, parent, pla, BYPASS_CAPA); - if (rc != 0) - GOTO(out, rc); + CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val); - rc = dt_attr_get(env, child, cla, BYPASS_CAPA); + rc = dt_attr_get(env, child, cla); if (rc == -ENOENT) { - if (unlikely(lfsck_is_dead_obj(parent))) - RETURN(0); + parent = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + + RETURN(rc == -ENOENT ? 0 : rc); + } type = LLIT_DANGLING; goto repair; @@ -3233,8 +3267,8 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, GOTO(out, rc); lfsck_buf_init(&buf, pea, sizeof(struct filter_fid_old)); - rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID, BYPASS_CAPA); - if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) && + rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID); + if (unlikely(rc > 0 && rc != sizeof(struct filter_fid_old) && rc != sizeof(struct filter_fid))) { type = LLIT_UNMATCHED_PAIR; goto repair; @@ -3243,20 +3277,18 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (rc < 0 && rc != -ENODATA) GOTO(out, rc); - if (rc == -ENODATA) { - fid_zero(pfid); - } else { - fid_le_to_cpu(pfid, &pea->ff_parent); - /* Currently, the filter_fid::ff_parent::f_ver is not the - * real parent MDT-object's FID::f_ver, instead it is the - * OST-object index in its parent MDT-object's layout EA. */ - idx = pfid->f_stripe_idx; - pfid->f_ver = 0; - } + if (rc == 0 || rc == -ENODATA) + GOTO(check_owner, rc = 0); - rc = lfsck_layout_check_parent(env, com, parent, pfid, + fid_le_to_cpu(pfid, &pea->ff_parent); + /* Currently, the filter_fid::ff_parent::f_ver is not the + * real parent MDT-object's FID::f_ver, instead it is the + * OST-object index in its parent MDT-object's layout EA. */ + idx = pfid->f_stripe_idx; + pfid->f_ver = 0; + rc = lfsck_layout_check_parent(env, com, lso, pfid, lu_object_fid(&child->do_lu), - pla, cla, llr, &buf, idx); + cla, llr, &buf, idx); if (rc > 0) { type = rc; goto repair; @@ -3265,6 +3297,9 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, if (rc < 0) GOTO(out, rc); +check_owner: + /* Someone may has changed the owner after the parent attr pre-loaded. + * It can be handled later inside the lfsck_layout_repair_owner(). */ if (unlikely(cla->la_uid != pla->la_uid || cla->la_gid != pla->la_gid)) { type = LLIT_INCONSISTENT_OWNER; @@ -3272,26 +3307,38 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env, } repair: - if (bk->lb_param & LPF_DRYRUN) { - if (type != LLIT_NONE) - GOTO(out, rc = 1); - else - GOTO(out, rc = 0); + if (type == LLIT_NONE) + GOTO(out, rc = 0); + + if (bk->lb_param & LPF_DRYRUN) + GOTO(out, rc = 1); + + if (parent == NULL) { + parent = lfsck_assistant_object_load(env, lfsck, lso); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + + if (rc == -ENOENT) + RETURN(0); + + GOTO(out, rc); + } } switch (type) { case LLIT_DANGLING: - rc = lfsck_layout_repair_dangling(env, com, llr, pla); + rc = lfsck_layout_repair_dangling(env, com, parent, llr, pla); break; case LLIT_UNMATCHED_PAIR: - rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla); + rc = lfsck_layout_repair_unmatched_pair(env, com, parent, + llr, pla); break; case LLIT_MULTIPLE_REFERENCED: - rc = lfsck_layout_repair_multiple_references(env, com, llr, - pla, &buf); + rc = lfsck_layout_repair_multiple_references(env, com, parent, + llr, pla, &buf); break; case LLIT_INCONSISTENT_OWNER: - rc = lfsck_layout_repair_owner(env, com, llr, pla); + rc = lfsck_layout_repair_owner(env, com, parent, llr, pla, cla); break; default: rc = 0; @@ -3334,6 +3381,9 @@ out: } up_write(&com->lc_sem); + if (parent != NULL && !IS_ERR(parent)) + lfsck_object_put(env, parent); + return rc; } @@ -3396,11 +3446,22 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, bool done = false; if (rc != 0) { - /* It is quite probably caused by target crash, - * to make the LFSCK can go ahead, assume that - * the target finished the LFSCK prcoessing. */ - done = true; + /* It is probably caused by network trouble, or target crash, + * it will try several times (depends on the obd_timeout, and + * will not less than 3 times). But to make the LFSCK can go + * ahead, we should not try for ever. After some try but still + * hit failure, it will assume that the target exit the LFSCK + * prcoessing and stop try. */ + if (rc == -ENOTCONN || rc == -ESHUTDOWN) { + int max_try = max_t(int, obd_timeout / 30, 3); + + if (++(llst->llst_failures) > max_try) + done = true; + } else { + done = true; + } } else { + llst->llst_failures = 0; lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY); if (lr->lr_status != LS_SCANNING_PHASE1 && lr->lr_status != LS_SCANNING_PHASE2) @@ -3409,8 +3470,9 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, if (done) { CDEBUG(D_LFSCK, "%s: layout LFSCK slave gets the MDT %x " - "status %d\n", lfsck_lfsck2name(com->lc_lfsck), - llst->llst_index, lr != NULL ? lr->lr_status : rc); + "status %d, failures_try %d\n", lfsck_lfsck2name(com->lc_lfsck), + llst->llst_index, lr != NULL ? lr->lr_status : rc, + llst->llst_failures); lfsck_layout_llst_del(llsd, llst); } @@ -3454,6 +3516,7 @@ static int lfsck_layout_async_query(const struct lu_env *env, llsaa->llsaa_com = lfsck_component_get(com); llsaa->llsaa_llst = llst; req->rq_interpret_reply = lfsck_layout_slave_async_interpret; + req->rq_allow_intr = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -3482,6 +3545,7 @@ static int lfsck_layout_async_notify(const struct lu_env *env, tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); *tmp = *lr; ptlrpc_request_set_replen(req); + req->rq_allow_intr = 1; ptlrpc_set_add_req(set, req); RETURN(0); @@ -3506,7 +3570,6 @@ lfsck_layout_slave_query_master(const struct lu_env *env, GOTO(log, rc = -ENOMEM); memset(lr, 0, sizeof(*lr)); - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; lr->lr_active = LFSCK_TYPE_LAYOUT; @@ -3587,7 +3650,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, lr->lr_event = event; lr->lr_flags = LEF_FROM_OST; lr->lr_status = result; - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_flags2 = lo->ll_flags; llsd->llsd_touch_gen++; @@ -3657,7 +3720,7 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, ENTRY; pfid->f_ver = 0; - obj = lfsck_object_find_by_dev(env, com->lc_lfsck->li_bottom, pfid); + obj = lfsck_object_find_bottom(env, com->lc_lfsck, pfid); if (IS_ERR(obj)) RETURN(PTR_ERR(obj)); @@ -3707,7 +3770,7 @@ static int lfsck_layout_master_check_pairs(const struct lu_env *env, unlock: dt_read_unlock(env, obj); - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); return rc; } @@ -3730,8 +3793,7 @@ static int lfsck_layout_slave_check_pairs(const struct lu_env *env, { struct lfsck_instance *lfsck = com->lc_lfsck; struct obd_device *obd = lfsck->li_obd; - struct seq_server_site *ss = - lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + struct seq_server_site *ss = lfsck_dev_site(lfsck); struct obd_export *exp = NULL; struct ptlrpc_request *req = NULL; struct lfsck_request *lr; @@ -3795,48 +3857,27 @@ static int lfsck_layout_slave_repair_pfid(const struct lu_env *env, struct lfsck_component *com, struct lfsck_request *lr) { - struct lfsck_thread_info *info = lfsck_env_info(env); - struct filter_fid *ff = &info->lti_new_pfid; - struct lu_buf *buf; - struct dt_device *dev = com->lc_lfsck->li_bottom; - struct dt_object *obj; - struct thandle *th = NULL; - int rc = 0; + struct dt_object *obj; + int rc = 0; ENTRY; - obj = lfsck_object_find_by_dev(env, dev, &lr->lr_fid); + obj = lfsck_object_find_bottom(env, com->lc_lfsck, &lr->lr_fid); if (IS_ERR(obj)) GOTO(log, rc = PTR_ERR(obj)); - fid_cpu_to_le(&ff->ff_parent, &lr->lr_fid2); - buf = lfsck_buf_get(env, ff, sizeof(*ff)); dt_write_lock(env, obj, 0); if (unlikely(dt_object_exists(obj) == 0 || lfsck_is_dead_obj(obj))) GOTO(unlock, rc = 0); - th = dt_trans_create(env, dev); - if (IS_ERR(th)) - GOTO(unlock, rc = PTR_ERR(th)); - - rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_FID, 0, th); - if (rc != 0) - GOTO(stop, rc); + rc = __lfsck_layout_update_pfid(env, obj, &lr->lr_fid2, + lr->lr_fid2.f_ver); - rc = dt_trans_start_local(env, dev, th); - if (rc != 0) - GOTO(stop, rc); - - rc = dt_xattr_set(env, obj, buf, XATTR_NAME_FID, 0, th, BYPASS_CAPA); - - GOTO(stop, rc); - -stop: - dt_trans_stop(env, dev, th); + GOTO(unlock, rc); unlock: dt_write_unlock(env, obj); - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); log: CDEBUG(D_LFSCK, "%s: layout LFSCK slave repaired pfid for "DFID @@ -3931,8 +3972,8 @@ static int lfsck_layout_master_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + "%llu], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -3965,8 +4006,8 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env, up_write(&com->lc_sem); CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos [" - LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck), - lfsck->li_pos_current.lp_oit_cookie, rc); + "%llu], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck), + lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc); return rc; } @@ -4074,7 +4115,7 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, } CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos [" - LPU64"]\n", lfsck_lfsck2name(lfsck), + "%llu]\n", lfsck_lfsck2name(lfsck), com->lc_pos_start.lp_oit_cookie); return rc; @@ -4108,7 +4149,7 @@ static int lfsck_layout_master_prep(const struct lu_env *env, log: CDEBUG(D_LFSCK, "%s: layout LFSCK master prep done, start pos [" - LPU64"]\n", lfsck_lfsck2name(com->lc_lfsck), + "%llu]\n", lfsck_lfsck2name(com->lc_lfsck), com->lc_pos_start.lp_oit_cookie); return 0; @@ -4125,7 +4166,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_assistant_data *lad = com->lc_data; - struct lfsck_layout_object *llo = NULL; + struct lfsck_assistant_object *lso = NULL; struct lov_ost_data_v1 *objs; struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs; struct ptlrpc_thread *mthread = &lfsck->li_thread; @@ -4136,13 +4177,11 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, int i; __u32 magic; __u16 count; - __u16 gen; ENTRY; lfsck_buf_init(&buf, &info->lti_old_pfid, sizeof(struct filter_fid_old)); count = le16_to_cpu(lmm->lmm_stripe_count); - gen = le16_to_cpu(lmm->lmm_layout_gen); /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has * been verified in lfsck_layout_verify_header() already. If some * new magic introduced in the future, then layout LFSCK needs to @@ -4168,7 +4207,6 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, continue; l_wait_event(mthread->t_ctl_waitq, - bk->lb_async_windows == 0 || lad->lad_prefetched < bk->lb_async_windows || !thread_is_running(mthread) || thread_is_stopped(athread), @@ -4242,25 +4280,36 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env, goto next; } - rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA); - if (rc != 0) - goto next; + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) { + rc = dt_declare_attr_get(env, cobj); + if (rc != 0) + goto next; - rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID, - BYPASS_CAPA); - if (rc != 0) - goto next; + rc = dt_declare_xattr_get(env, cobj, &buf, + XATTR_NAME_FID); + if (rc != 0) + goto next; + } + + if (lso == NULL) { + struct lu_attr *attr = &info->lti_la; + + rc = dt_attr_get(env, parent, attr); + if (rc != 0) + goto next; + + lso = lfsck_assistant_object_init(env, + lfsck_dto2fid(parent), attr, + lfsck->li_pos_current.lp_oit_cookie, false); + if (IS_ERR(lso)) { + rc = PTR_ERR(lso); + lso = NULL; - if (llo == NULL) { - llo = lfsck_layout_object_init(env, parent, - lfsck->li_pos_current.lp_oit_cookie, gen); - if (IS_ERR(llo)) { - rc = PTR_ERR(llo); goto next; } } - llr = lfsck_layout_assistant_req_init(llo, cobj, index, i); + llr = lfsck_layout_assistant_req_init(lso, cobj, index, i); if (IS_ERR(llr)) { rc = PTR_ERR(llr); goto next; @@ -4292,7 +4341,7 @@ next: up_write(&com->lc_sem); if (cobj != NULL && !IS_ERR(cobj)) - lu_object_put(env, &cobj->do_lu); + lfsck_object_put(env, cobj); if (likely(tgt != NULL)) lfsck_tgt_put(tgt); @@ -4304,8 +4353,8 @@ next: GOTO(out, rc = 0); out: - if (llo != NULL && !IS_ERR(llo)) - lfsck_layout_object_put(env, llo); + if (lso != NULL) + lfsck_assistant_object_put(env, lso); return rc; } @@ -4331,9 +4380,9 @@ static int lfsck_layout_master_exec_oit(const struct lu_env *env, struct thandle *handle = NULL; struct lu_buf *buf = &info->lti_big_buf; struct lov_mds_md_v1 *lmm = NULL; - struct dt_device *dev = lfsck->li_bottom; + struct dt_device *dev = lfsck_obj2dev(obj); struct lustre_handle lh = { 0 }; - struct lu_buf ea_buf = { 0 }; + struct lu_buf ea_buf = { NULL }; int rc = 0; int size = 0; bool locked = false; @@ -4412,7 +4461,7 @@ again: } rc = dt_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV, - LU_XATTR_REPLACE, handle, BYPASS_CAPA); + LU_XATTR_REPLACE, handle); if (rc != 0) GOTO(out, rc); @@ -4469,7 +4518,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, LASSERT(llsd != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY5) && - cfs_fail_val == lfsck_dev_idx(lfsck->li_bottom)) { + cfs_fail_val == lfsck_dev_idx(lfsck)) { struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(1), NULL, NULL); struct ptlrpc_thread *thread = &lfsck->li_thread; @@ -4485,7 +4534,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, if (fid_is_idif(fid)) seq = 0; else if (!fid_is_norm(fid) || - !fid_is_for_ostobj(env, lfsck->li_next, obj, fid)) + !fid_is_for_ostobj(env, lfsck, obj, fid)) GOTO(unlock, rc = 0); else seq = fid_seq(fid); @@ -4502,7 +4551,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, rc = lfsck_layout_lastid_load(env, com, lls); if (rc != 0) { CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " - "load LAST_ID for "LPX64": rc = %d\n", + "load LAST_ID for %#llx: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), seq, rc); lo->ll_objs_failed_phase1++; OBD_FREE_PTR(lls); @@ -4529,7 +4578,7 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, rc = lfsck_layout_lastid_reload(env, com, lls); if (unlikely(rc != 0)) { CDEBUG(D_LFSCK, "%s: layout LFSCK failed to " - "reload LAST_ID for "LPX64": rc = %d\n", + "reload LAST_ID for %#llx: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lls->lls_seq, rc); @@ -4547,8 +4596,8 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env, lo->ll_flags |= LF_CRASHED_LASTID; CDEBUG(D_LFSCK, "%s: layout LFSCK finds crashed " - "LAST_ID file (2) for the sequence "LPX64 - ", old value "LPU64", known value "LPU64"\n", + "LAST_ID file (2) for the sequence %#llx" + ", old value %llu, known value %llu\n", lfsck_lfsck2name(lfsck), lls->lls_seq, lls->lls_lastid, oid); } @@ -4567,6 +4616,7 @@ unlock: static int lfsck_layout_exec_dir(const struct lu_env *env, struct lfsck_component *com, + struct lfsck_assistant_object *lso, struct lu_dirent *ent, __u16 type) { return 0; @@ -4636,13 +4686,13 @@ static int lfsck_layout_slave_post(const struct lu_env *env, int rc; bool done = false; + down_write(&com->lc_sem); rc = lfsck_layout_lastid_store(env, com); if (rc != 0) result = rc; LASSERT(lfsck->li_out_notify != NULL); - down_write(&com->lc_sem); spin_lock(&lfsck->li_lock); if (!init) lo->ll_pos_last_checkpoint = @@ -4697,73 +4747,59 @@ static int lfsck_layout_slave_post(const struct lu_env *env, return rc; } -static int lfsck_layout_dump(const struct lu_env *env, - struct lfsck_component *com, struct seq_file *m) +static void lfsck_layout_dump(const struct lu_env *env, + struct lfsck_component *com, struct seq_file *m) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; - int rc; down_read(&com->lc_sem); seq_printf(m, "name: lfsck_layout\n" - "magic: %#x\n" - "version: %d\n" - "status: %s\n", - lo->ll_magic, - bk->lb_version, - lfsck_status2names(lo->ll_status)); - - rc = lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); - if (rc < 0) - goto out; + "magic: %#x\n" + "version: %d\n" + "status: %s\n", + lo->ll_magic, + bk->lb_version, + lfsck_status2name(lo->ll_status)); - rc = lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags"); - rc = lfsck_time_dump(m, lo->ll_time_last_complete, - "time_since_last_completed"); - if (rc < 0) - goto out; + lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param"); - rc = lfsck_time_dump(m, lo->ll_time_latest_start, - "time_since_latest_start"); - if (rc < 0) - goto out; + lfsck_time_dump(m, lo->ll_time_last_complete, "last_completed"); - rc = lfsck_time_dump(m, lo->ll_time_last_checkpoint, - "time_since_last_checkpoint"); - if (rc < 0) - goto out; + lfsck_time_dump(m, lo->ll_time_latest_start, "latest_start"); + + lfsck_time_dump(m, lo->ll_time_last_checkpoint, "last_checkpoint"); - seq_printf(m, "latest_start_position: "LPU64"\n" - "last_checkpoint_position: "LPU64"\n" - "first_failure_position: "LPU64"\n", - lo->ll_pos_latest_start, - lo->ll_pos_last_checkpoint, - lo->ll_pos_first_inconsistent); + seq_printf(m, "latest_start_position: %llu\n" + "last_checkpoint_position: %llu\n" + "first_failure_position: %llu\n", + lo->ll_pos_latest_start, + lo->ll_pos_last_checkpoint, + lo->ll_pos_first_inconsistent); seq_printf(m, "success_count: %u\n" - "repaired_dangling: "LPU64"\n" - "repaired_unmatched_pair: "LPU64"\n" - "repaired_multiple_referenced: "LPU64"\n" - "repaired_orphan: "LPU64"\n" - "repaired_inconsistent_owner: "LPU64"\n" - "repaired_others: "LPU64"\n" - "skipped: "LPU64"\n" - "failed_phase1: "LPU64"\n" - "failed_phase2: "LPU64"\n", - lo->ll_success_count, - lo->ll_objs_repaired[LLIT_DANGLING - 1], - lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], - lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], - lo->ll_objs_repaired[LLIT_ORPHAN - 1], - lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], - lo->ll_objs_repaired[LLIT_OTHERS - 1], - lo->ll_objs_skipped, - lo->ll_objs_failed_phase1, - lo->ll_objs_failed_phase2); + "repaired_dangling: %llu\n" + "repaired_unmatched_pair: %llu\n" + "repaired_multiple_referenced: %llu\n" + "repaired_orphan: %llu\n" + "repaired_inconsistent_owner: %llu\n" + "repaired_others: %llu\n" + "skipped: %llu\n" + "failed_phase1: %llu\n" + "failed_phase2: %llu\n", + lo->ll_success_count, + lo->ll_objs_repaired[LLIT_DANGLING - 1], + lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1], + lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1], + lo->ll_objs_repaired[LLIT_ORPHAN - 1], + lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1], + lo->ll_objs_repaired[LLIT_OTHERS - 1], + lo->ll_objs_skipped, + lo->ll_objs_failed_phase1, + lo->ll_objs_failed_phase2); if (lo->ll_status == LS_SCANNING_PHASE1) { __u64 pos; @@ -4782,20 +4818,20 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(new_checked, duration); if (rtime != 0) do_div(speed, rtime); - seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: N/A\n" - "real-time_speed_phase1: "LPU64" items/sec\n" - "real-time_speed_phase2: N/A\n", - checked, - lo->ll_objs_checked_phase2, - rtime, - lo->ll_run_time_phase2, - speed, - new_checked); + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: N/A\n" + "real-time_speed_phase1: %llu items/sec\n" + "real-time_speed_phase2: N/A\n", + checked, + lo->ll_objs_checked_phase2, + rtime, + lo->ll_run_time_phase2, + speed, + new_checked); LASSERT(lfsck->li_di_oit != NULL); @@ -4808,11 +4844,11 @@ static int lfsck_layout_dump(const struct lu_env *env, pos = iops->store(env, lfsck->li_di_oit); if (!lfsck->li_current_oit_processed) pos--; - seq_printf(m, "current_position: "LPU64"\n", pos); + seq_printf(m, "current_position: %llu\n", pos); } else if (lo->ll_status == LS_SCANNING_PHASE2) { cfs_duration_t duration = cfs_time_current() - - lfsck->li_time_last_checkpoint; + com->lc_time_last_checkpoint; __u64 checked = lo->ll_objs_checked_phase2 + com->lc_new_checked; __u64 speed1 = lo->ll_objs_checked_phase1; @@ -4828,26 +4864,23 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (rtime != 0) do_div(speed2, rtime); - rc = seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" - "run_time_phase1: %u seconds\n" - "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" items/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: "LPU64" items/sec\n" - "current_position: "DFID"\n", - lo->ll_objs_checked_phase1, - checked, - lo->ll_run_time_phase1, - rtime, - speed1, - speed2, - new_checked, - PFID(&com->lc_fid_latest_scanned_phase2)); - if (rc <= 0) - goto out; - + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" + "run_time_phase1: %u seconds\n" + "run_time_phase2: %u seconds\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu items/sec\n" + "real-time_speed_phase1: N/A\n" + "real-time_speed_phase2: %llu items/sec\n" + "current_position: "DFID"\n", + lo->ll_objs_checked_phase1, + checked, + lo->ll_run_time_phase1, + rtime, + speed1, + speed2, + new_checked, + PFID(&com->lc_fid_latest_scanned_phase2)); } else { __u64 speed1 = lo->ll_objs_checked_phase1; __u64 speed2 = lo->ll_objs_checked_phase2; @@ -4856,12 +4889,12 @@ static int lfsck_layout_dump(const struct lu_env *env, do_div(speed1, lo->ll_run_time_phase1); if (lo->ll_run_time_phase2 != 0) do_div(speed2, lo->ll_run_time_phase2); - seq_printf(m, "checked_phase1: "LPU64"\n" - "checked_phase2: "LPU64"\n" + seq_printf(m, "checked_phase1: %llu\n" + "checked_phase2: %llu\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" - "average_speed_phase1: "LPU64" items/sec\n" - "average_speed_phase2: "LPU64" objs/sec\n" + "average_speed_phase1: %llu items/sec\n" + "average_speed_phase2: %llu objs/sec\n" "real-time_speed_phase1: N/A\n" "real-time_speed_phase2: N/A\n" "current_position: N/A\n", @@ -4872,10 +4905,8 @@ static int lfsck_layout_dump(const struct lu_env *env, speed1, speed2); } -out: - up_read(&com->lc_sem); - return rc; + up_read(&com->lc_sem); } static int lfsck_layout_master_double_scan(const struct lu_env *env, @@ -4929,11 +4960,11 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK slave phase2 scan start\n", lfsck_lfsck2name(lfsck)); + atomic_inc(&lfsck->li_double_scan_count); + if (lo->ll_flags & LF_INCOMPLETE) GOTO(done, rc = 1); - atomic_inc(&lfsck->li_double_scan_count); - com->lc_new_checked = 0; com->lc_new_scanned = 0; com->lc_time_last_checkpoint = cfs_time_current(); @@ -4959,11 +4990,15 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, rc = l_wait_event(thread->t_ctl_waitq, !thread_is_running(thread) || + lo->ll_flags & LF_INCOMPLETE || list_empty(&llsd->llsd_master_list), &lwi); if (unlikely(!thread_is_running(thread))) GOTO(done, rc = 0); + if (lo->ll_flags & LF_INCOMPLETE) + GOTO(done, rc = 1); + if (rc == -ETIMEDOUT) continue; @@ -5101,12 +5136,14 @@ static void lfsck_layout_slave_quit(const struct lu_env *env, LASSERT(llsd != NULL); + down_write(&com->lc_sem); list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list, lls_list) { list_del_init(&lls->lls_list); lfsck_object_put(env, lls->lls_lastid_obj); OBD_FREE_PTR(lls); } + up_write(&com->lc_sem); spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { @@ -5115,6 +5152,7 @@ static void lfsck_layout_slave_quit(const struct lu_env *env, list_del_init(&llst->llst_list); spin_unlock(&llsd->llsd_lock); lfsck_layout_llst_put(llst); + spin_lock(&llsd->llsd_lock); } spin_unlock(&llsd->llsd_lock); @@ -5146,7 +5184,7 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: layout LFSCK master handles notify %u " "from %s %x, status %d, flags %x, flags2 %x\n", lfsck_lfsck2name(lfsck), lr->lr_event, - (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + (lr->lr_flags & LEF_FROM_OST) ? "OST" : "MDT", lr->lr_index, lr->lr_status, lr->lr_flags, lr->lr_flags2); if (lr->lr_event != LE_PHASE1_DONE && @@ -5159,7 +5197,7 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, else ltds = &lfsck->li_mdt_descs; spin_lock(<ds->ltd_lock); - ltd = LTD_TGT(ltds, lr->lr_index); + ltd = lfsck_ltd2tgt(ltds, lr->lr_index); if (ltd == NULL) { spin_unlock(<ds->ltd_lock); @@ -5199,7 +5237,14 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env, break; case LE_PHASE2_DONE: ltd->ltd_layout_done = 1; - list_del_init(<d->ltd_layout_list); + if (!list_empty(<d->ltd_layout_list)) { + list_del_init(<d->ltd_layout_list); + if (lr->lr_flags2 & LF_INCOMPLETE) { + lfsck_lad_set_bitmap(env, com, ltd->ltd_index); + fail = true; + } + } + break; case LE_PEER_EXIT: fail = true; @@ -5289,9 +5334,7 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env, true); if (llst != NULL) { lfsck_layout_llst_put(llst); - if (list_empty(&llsd->llsd_master_list)) - wake_up_all( - &lfsck->li_thread.t_ctl_waitq); + wake_up_all(&lfsck->li_thread.t_ctl_waitq); } } @@ -5331,12 +5374,85 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env, RETURN(0); } +static void lfsck_layout_repaired(struct lfsck_layout *lo, __u64 *count) +{ + int i; + + for (i = 0; i < LLIT_MAX; i++) + *count += lo->ll_objs_repaired[i]; +} + +static int lfsck_layout_query_all(const struct lu_env *env, + struct lfsck_component *com, + __u32 *mdts_count, __u32 *osts_count, + __u64 *repaired) +{ + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_tgt_descs *ltds; + struct lfsck_tgt_desc *ltd; + int idx; + int rc; + ENTRY; + + rc = lfsck_query_all(env, com); + if (rc != 0) + RETURN(rc); + + ltds = &com->lc_lfsck->li_mdt_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + mdts_count[ltd->ltd_layout_status]++; + *repaired += ltd->ltd_layout_repaired; + } + up_read(<ds->ltd_rw_sem); + + ltds = &com->lc_lfsck->li_ost_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + osts_count[ltd->ltd_layout_status]++; + *repaired += ltd->ltd_layout_repaired; + } + up_read(<ds->ltd_rw_sem); + + down_read(&com->lc_sem); + mdts_count[lo->ll_status]++; + lfsck_layout_repaired(lo, repaired); + up_read(&com->lc_sem); + + RETURN(0); +} + static int lfsck_layout_query(const struct lu_env *env, - struct lfsck_component *com) + struct lfsck_component *com, + struct lfsck_request *req, + struct lfsck_reply *rep, + struct lfsck_query *que, int idx) { struct lfsck_layout *lo = com->lc_file_ram; + int rc = 0; + + if (que != NULL) { + LASSERT(com->lc_lfsck->li_master); + + rc = lfsck_layout_query_all(env, com, + que->lu_mdts_count[idx], + que->lu_osts_count[idx], + &que->lu_repaired[idx]); + } else { + down_read(&com->lc_sem); + rep->lr_status = lo->ll_status; + if (req->lr_flags & LEF_QUERY_ALL) + lfsck_layout_repaired(lo, &rep->lr_repaired); + up_read(&com->lc_sem); + } - return lo->ll_status; + return rc; } /* with lfsck::li_lock held */ @@ -5419,13 +5535,17 @@ static void lfsck_layout_assistant_fill_pos(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct lfsck_layout_req *llr; + if (((struct lfsck_layout *)(com->lc_file_ram))->ll_status != + LS_SCANNING_PHASE1) + return; + if (list_empty(&lad->lad_req_list)) return; llr = list_entry(lad->lad_req_list.next, struct lfsck_layout_req, llr_lar.lar_list); - pos->lp_oit_cookie = llr->llr_parent->llo_cookie - 1; + pos->lp_oit_cookie = llr->llr_lar.lar_parent->lso_oit_cookie - 1; } struct lfsck_assistant_operations lfsck_layout_assistant_ops = { @@ -5460,7 +5580,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) com->lc_ops = &lfsck_layout_master_ops; com->lc_data = lfsck_assistant_data_init( &lfsck_layout_assistant_ops, - "lfsck_layout"); + LFSCK_LAYOUT); if (com->lc_data == NULL) GOTO(out, rc = -ENOMEM); } else { @@ -5495,7 +5615,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) GOTO(out, rc = -ENOTDIR); obj = local_file_find_or_create(env, lfsck->li_los, root, - lfsck_layout_name, + LFSCK_LAYOUT, S_IFREG | S_IRUGO | S_IWUSR); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); @@ -5556,7 +5676,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) out: if (root != NULL && !IS_ERR(root)) - lu_object_put(env, &root->do_lu); + lfsck_object_put(env, root); if (rc != 0) { lfsck_component_cleanup(env, com); @@ -5594,7 +5714,7 @@ static int lfsck_fid_match_idx(const struct lu_env *env, return 0; } - ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + ss = lfsck_dev_site(lfsck); if (unlikely(ss == NULL)) return -ENOTCONN; @@ -5616,11 +5736,11 @@ static int lfsck_fid_match_idx(const struct lu_env *env, } static void lfsck_layout_destroy_orphan(const struct lu_env *env, - struct dt_device *dev, struct dt_object *obj) { - struct thandle *handle; - int rc; + struct dt_device *dev = lfsck_obj2dev(obj); + struct thandle *handle; + int rc; ENTRY; handle = dt_trans_create(env, dev); @@ -5659,8 +5779,7 @@ stop: static int lfsck_orphan_index_lookup(const struct lu_env *env, struct dt_object *dt, struct dt_rec *rec, - const struct dt_key *key, - struct lustre_capa *capa) + const struct dt_key *key) { return -EOPNOTSUPP; } @@ -5679,7 +5798,6 @@ static int lfsck_orphan_index_insert(const struct lu_env *env, const struct dt_rec *rec, const struct dt_key *key, struct thandle *handle, - struct lustre_capa *capa, int ignore_quota) { return -EOPNOTSUPP; @@ -5696,16 +5814,14 @@ static int lfsck_orphan_index_declare_delete(const struct lu_env *env, static int lfsck_orphan_index_delete(const struct lu_env *env, struct dt_object *dt, const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa) + struct thandle *handle) { return -EOPNOTSUPP; } static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, struct dt_object *dt, - __u32 attr, - struct lustre_capa *capa) + __u32 attr) { struct dt_device *dev = lu2dt_dev(dt->do_lu.lo_dev); struct lfsck_instance *lfsck; @@ -5923,7 +6039,7 @@ again1: } key->f_oid = lrn->lrn_first_oid + pos; - obj = lfsck_object_find(env, lfsck, key); + obj = lfsck_object_find_bottom(env, lfsck, key); if (IS_ERR(obj)) { rc = PTR_ERR(obj); if (rc == -ENOENT) { @@ -5942,12 +6058,12 @@ again1: goto again1; } - rc = dt_attr_get(env, obj, la, BYPASS_CAPA); + rc = dt_attr_get(env, obj, la); if (rc != 0) GOTO(out, rc); rc = dt_xattr_get(env, obj, lfsck_buf_get(env, pfid, sizeof(*pfid)), - XATTR_NAME_FID, BYPASS_CAPA); + XATTR_NAME_FID); if (rc == -ENODATA) { /* For the pre-created OST-object, update the bitmap to avoid * others LFSCK (second phase) iteration to touch it again. */ @@ -5960,9 +6076,7 @@ again1: * OST-object there. Destroy it now! */ if (unlikely(!(la->la_mode & S_ISUID))) { dt_read_unlock(env, obj); - lfsck_layout_destroy_orphan(env, - lfsck->li_bottom, - obj); + lfsck_layout_destroy_orphan(env, obj); lfsck_object_put(env, obj); pos++; goto again1; @@ -6106,9 +6220,9 @@ static int lfsck_orphan_it_load(const struct lu_env *env, LASSERT(llst != NULL); if (hash != llst->llst_hash) { - CDEBUG(D_LFSCK, "%s: the given hash "LPU64" for orphan " + CDEBUG(D_LFSCK, "%s: the given hash %llu for orphan " "iteration does not match the one when fini " - LPU64", to be reset.\n", + "%llu, to be reset.\n", lfsck_lfsck2name(it->loi_com->lc_lfsck), hash, llst->llst_hash); fid_zero(&llst->llst_fid);