X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flfsck%2Flfsck_lib.c;h=9fe3a738727c39eded8f1e84e531eff4a827fb35;hb=5c883ea2748ae9e430a9cd863a9b630b2a74440a;hp=cf6ce4690cee5bbde551c32629254232047324f8;hpb=dbe544883a59d7eecaace16400c575b416c51002;p=fs%2Flustre-release.git diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index cf6ce46..9fe3a73 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -20,7 +20,7 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2013, Intel Corporation. + * Copyright (c) 2013, 2017, Intel Corporation. */ /* * lustre/lfsck/lfsck_lib.c @@ -30,7 +30,10 @@ #define DEBUG_SUBSYSTEM S_LFSCK -#include +#include +#include +#include +#include #include #include #include @@ -38,7 +41,6 @@ #include #include #include -#include #include "lfsck_internal.h" @@ -61,26 +63,11 @@ static void lfsck_key_fini(const struct lu_context *ctx, LU_CONTEXT_KEY_DEFINE(lfsck, LCT_MD_THREAD | LCT_DT_THREAD); LU_KEY_INIT_GENERIC(lfsck); -static struct list_head lfsck_instance_list; -static struct list_head lfsck_ost_orphan_list; -static struct list_head lfsck_mdt_orphan_list; +static LIST_HEAD(lfsck_instance_list); +static LIST_HEAD(lfsck_ost_orphan_list); +static LIST_HEAD(lfsck_mdt_orphan_list); static DEFINE_SPINLOCK(lfsck_instance_lock); -static const char *lfsck_status_names[] = { - [LS_INIT] = "init", - [LS_SCANNING_PHASE1] = "scanning-phase1", - [LS_SCANNING_PHASE2] = "scanning-phase2", - [LS_COMPLETED] = "completed", - [LS_FAILED] = "failed", - [LS_STOPPED] = "stopped", - [LS_PAUSED] = "paused", - [LS_CRASHED] = "crashed", - [LS_PARTIAL] = "partial", - [LS_CO_FAILED] = "co-failed", - [LS_CO_STOPPED] = "co-stopped", - [LS_CO_PAUSED] = "co-paused" -}; - const char *lfsck_flags_names[] = { "scanned-once", "inconsistent", @@ -98,6 +85,9 @@ const char *lfsck_param_names[] = { "broadcast", "orphan", "create_ostobj", + "create_mdtobj", + NULL, + "delay_create_ostobj", NULL }; @@ -106,12 +96,16 @@ enum lfsck_verify_lpf_types { LVLT_BY_NAMEENTRY = 1, }; -const char *lfsck_status2names(enum lfsck_status status) +static inline void +lfsck_reset_ltd_status(struct lfsck_tgt_desc *ltd, enum lfsck_type type) { - if (unlikely(status < 0 || status >= LS_MAX)) - return "unknown"; - - return lfsck_status_names[status]; + if (type == LFSCK_TYPE_LAYOUT) { + ltd->ltd_layout_status = LS_MAX; + ltd->ltd_layout_repaired = 0; + } else { + ltd->ltd_namespace_status = LS_MAX; + ltd->ltd_namespace_repaired = 0; + } } static int lfsck_tgt_descs_init(struct lfsck_tgt_descs *ltds) @@ -147,7 +141,7 @@ static void lfsck_tgt_descs_fini(struct lfsck_tgt_descs *ltds) } cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { - ltd = LTD_TGT(ltds, idx); + ltd = lfsck_ltd2tgt(ltds, idx); if (likely(ltd != NULL)) { LASSERT(list_empty(<d->ltd_layout_list)); LASSERT(list_empty(<d->ltd_layout_phase_list)); @@ -156,7 +150,7 @@ static void lfsck_tgt_descs_fini(struct lfsck_tgt_descs *ltds) ltds->ltd_tgtnr--; cfs_bitmap_clear(ltds->ltd_tgts_bitmap, idx); - LTD_TGT(ltds, idx) = NULL; + lfsck_assign_tgt(ltds, NULL, idx); lfsck_tgt_put(ltd); } } @@ -199,8 +193,8 @@ static int __lfsck_add_target(const struct lu_env *env, if (index >= ltds->ltd_tgts_bitmap->size) { __u32 newsize = max((__u32)ltds->ltd_tgts_bitmap->size, (__u32)BITS_PER_LONG); - cfs_bitmap_t *old_bitmap = ltds->ltd_tgts_bitmap; - cfs_bitmap_t *new_bitmap; + struct cfs_bitmap *old_bitmap = ltds->ltd_tgts_bitmap; + struct cfs_bitmap *new_bitmap; while (newsize < index + 1) newsize <<= 1; @@ -228,7 +222,7 @@ static int __lfsck_add_target(const struct lu_env *env, GOTO(unlock, rc = -ENOMEM); } - LTD_TGT(ltds, index) = ltd; + lfsck_assign_tgt(ltds, ltd, index); cfs_bitmap_set(ltds->ltd_tgts_bitmap, index); ltds->ltd_tgtnr++; @@ -360,6 +354,56 @@ int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck, RETURN(rc); } +static int __lfsck_ibits_lock(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *obj, struct ldlm_res_id *resid, + struct lustre_handle *lh, __u64 bits, + enum ldlm_mode mode) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + union ldlm_policy_data *policy = &info->lti_policy; + __u64 flags = LDLM_FL_ATOMIC_CB; + int rc; + + LASSERT(lfsck->li_namespace != NULL); + + memset(policy, 0, sizeof(*policy)); + policy->l_inodebits.bits = bits; + if (dt_object_remote(obj)) { + struct ldlm_enqueue_info *einfo = &info->lti_einfo; + + memset(einfo, 0, sizeof(*einfo)); + einfo->ei_type = LDLM_IBITS; + einfo->ei_mode = mode; + einfo->ei_cb_bl = ldlm_blocking_ast; + einfo->ei_cb_cp = ldlm_completion_ast; + einfo->ei_res_id = resid; + + rc = dt_object_lock(env, obj, lh, einfo, policy); + /* for regular checks LFSCK doesn't use LDLM locking, + * so the state isn't coherent. here we just took LDLM + * lock for coherency and it's time to invalidate + * previous state */ + if (rc == ELDLM_OK) + dt_invalidate(env, obj); + } else { + rc = ldlm_cli_enqueue_local(env, lfsck->li_namespace, resid, + LDLM_IBITS, policy, mode, + &flags, ldlm_blocking_ast, + ldlm_completion_ast, NULL, NULL, + 0, LVB_T_NONE, NULL, lh); + } + + if (rc == ELDLM_OK) { + rc = 0; + } else { + memset(lh, 0, sizeof(*lh)); + rc = -EIO; + } + + return rc; +} + /** * Request the specified ibits lock for the given object. * @@ -370,7 +414,7 @@ int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck, * \param[in] lfsck pointer to the lfsck instance * \param[in] obj pointer to the dt_object to be locked * \param[out] lh pointer to the lock handle - * \param[in] ibits the bits for the ldlm lock to be acquired + * \param[in] bits the bits for the ldlm lock to be acquired * \param[in] mode the mode for the ldlm lock to be acquired * * \retval 0 for success @@ -378,31 +422,44 @@ int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck, */ int lfsck_ibits_lock(const struct lu_env *env, struct lfsck_instance *lfsck, struct dt_object *obj, struct lustre_handle *lh, - __u64 bits, ldlm_mode_t mode) + __u64 bits, enum ldlm_mode mode) { - struct lfsck_thread_info *info = lfsck_env_info(env); - ldlm_policy_data_t *policy = &info->lti_policy; - struct ldlm_res_id *resid = &info->lti_resid; - __u64 flags = LDLM_FL_ATOMIC_CB; - int rc; + struct ldlm_res_id *resid = &lfsck_env_info(env)->lti_resid; - LASSERT(lfsck->li_namespace != NULL); + LASSERT(!lustre_handle_is_used(lh)); - memset(policy, 0, sizeof(*policy)); - policy->l_inodebits.bits = bits; fid_build_reg_res_name(lfsck_dto2fid(obj), resid); - rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS, - policy, mode, &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, - LVB_T_NONE, NULL, lh); - if (rc == ELDLM_OK) { - rc = 0; - } else { - memset(lh, 0, sizeof(*lh)); - rc = -EIO; - } + return __lfsck_ibits_lock(env, lfsck, obj, resid, lh, bits, mode); +} - return rc; +/** + * Request the remote LOOKUP lock for the given object. + * + * If \a pobj is remote, the LOOKUP lock of \a obj is on the MDT where + * \a pobj is, acquire LOOKUP lock there. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[in] pobj pointer to parent dt_object + * \param[in] obj pointer to the dt_object to be locked + * \param[out] lh pointer to the lock handle + * \param[in] mode the mode for the ldlm lock to be acquired + * + * \retval 0 for success + * \retval negative error number on failure + */ +int lfsck_remote_lookup_lock(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *pobj, struct dt_object *obj, + struct lustre_handle *lh, enum ldlm_mode mode) +{ + struct ldlm_res_id *resid = &lfsck_env_info(env)->lti_resid; + + LASSERT(!lustre_handle_is_used(lh)); + + fid_build_reg_res_name(lfsck_dto2fid(obj), resid); + return __lfsck_ibits_lock(env, lfsck, pobj, resid, lh, + MDS_INODELOCK_LOOKUP, mode); } /** @@ -414,7 +471,7 @@ int lfsck_ibits_lock(const struct lu_env *env, struct lfsck_instance *lfsck, * \param[in] lh pointer to the lock handle * \param[in] mode the mode for the ldlm lock to be released */ -void lfsck_ibits_unlock(struct lustre_handle *lh, ldlm_mode_t mode) +void lfsck_ibits_unlock(struct lustre_handle *lh, enum ldlm_mode mode) { if (lustre_handle_is_used(lh)) { ldlm_lock_decref(lh, mode); @@ -422,15 +479,105 @@ void lfsck_ibits_unlock(struct lustre_handle *lh, ldlm_mode_t mode) } } +/** + * Request compound ibits locks for the given pairs. + * + * Before the LFSCK modifying on the namespace visible object, it needs to + * acquire related ibits ldlm lock. Usually, we can use lfsck_ibits_lock for + * the lock purpose. But the simple lfsck_ibits_lock for directory-based + * modificationis (such as insert name entry to the directory) may be too + * coarse-grained and not efficient. + * + * The lfsck_lock() will request compound ibits locks on the specified + * pairs: the PDO (Parallel Directory Operations) ibits (UPDATE) + * lock on the directory object, and the regular ibits lock on the name hash. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[in] obj pointer to the dt_object to be locked + * \param[in] name used for building the PDO lock resource + * \param[out] llh pointer to the lfsck_lock_handle + * \param[in] bits the bits for the ldlm lock to be acquired + * \param[in] mode the mode for the ldlm lock to be acquired + * + * \retval 0 for success + * \retval negative error number on failure + */ +int lfsck_lock(const struct lu_env *env, struct lfsck_instance *lfsck, + struct dt_object *obj, const char *name, + struct lfsck_lock_handle *llh, __u64 bits, enum ldlm_mode mode) +{ + struct ldlm_res_id *resid = &lfsck_env_info(env)->lti_resid; + int rc; + + LASSERT(S_ISDIR(lfsck_object_type(obj))); + LASSERT(name != NULL); + LASSERT(name[0] != 0); + LASSERT(!lustre_handle_is_used(&llh->llh_pdo_lh)); + LASSERT(!lustre_handle_is_used(&llh->llh_reg_lh)); + + switch (mode) { + case LCK_EX: + llh->llh_pdo_mode = LCK_EX; + break; + case LCK_PW: + llh->llh_pdo_mode = LCK_CW; + break; + case LCK_PR: + llh->llh_pdo_mode = LCK_CR; + break; + default: + CDEBUG(D_LFSCK, "%s: unexpected PDO lock mode %u on the obj " + DFID"\n", lfsck_lfsck2name(lfsck), mode, + PFID(lfsck_dto2fid(obj))); + LBUG(); + } + + fid_build_reg_res_name(lfsck_dto2fid(obj), resid); + rc = __lfsck_ibits_lock(env, lfsck, obj, resid, &llh->llh_pdo_lh, + MDS_INODELOCK_UPDATE, llh->llh_pdo_mode); + if (rc != 0) + return rc; + + llh->llh_reg_mode = mode; + resid->name[LUSTRE_RES_ID_HSH_OFF] = ll_full_name_hash(NULL, name, + strlen(name)); + LASSERT(resid->name[LUSTRE_RES_ID_HSH_OFF] != 0); + rc = __lfsck_ibits_lock(env, lfsck, obj, resid, &llh->llh_reg_lh, + bits, llh->llh_reg_mode); + if (rc != 0) + lfsck_ibits_unlock(&llh->llh_pdo_lh, llh->llh_pdo_mode); + + return rc; +} + +/** + * Release the the compound ibits locks. + * + * \param[in] llh pointer to the lfsck_lock_handle to be released + */ +void lfsck_unlock(struct lfsck_lock_handle *llh) +{ + lfsck_ibits_unlock(&llh->llh_reg_lh, llh->llh_reg_mode); + lfsck_ibits_unlock(&llh->llh_pdo_lh, llh->llh_pdo_mode); +} + int lfsck_find_mdt_idx_by_fid(const struct lu_env *env, struct lfsck_instance *lfsck, const struct lu_fid *fid) { - struct seq_server_site *ss = - lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + struct seq_server_site *ss = lfsck_dev_site(lfsck); struct lu_seq_range *range = &lfsck_env_info(env)->lti_range; int rc; + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { + /* "ROOT" is always on the MDT0. */ + if (lu_fid_eq(fid, &lfsck->li_global_root_fid)) + return 0; + + return lfsck_dev_idx(lfsck); + } + fld_range_set_mdt(range); rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); if (rc == 0) @@ -439,26 +586,94 @@ int lfsck_find_mdt_idx_by_fid(const struct lu_env *env, return rc; } -static const char dot[] = "."; -static const char dotdot[] = ".."; +const char dot[] = "."; +const char dotdot[] = ".."; static const char dotlustre[] = ".lustre"; static const char lostfound[] = "lost+found"; +/** + * Remove the name entry from the .lustre/lost+found directory. + * + * No need to care about the object referenced by the name entry, + * either the name entry is invalid or redundant, or the referenced + * object has been processed or will be handled by others. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * \param[in] name the name for the name entry to be removed + * + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_lpf_remove_name_entry(const struct lu_env *env, + struct lfsck_instance *lfsck, + const char *name) +{ + struct dt_object *parent = lfsck->li_lpf_root_obj; + struct dt_device *dev = lfsck_obj2dev(parent); + struct thandle *th; + struct lfsck_lock_handle *llh = &lfsck_env_info(env)->lti_llh; + int rc; + ENTRY; + + rc = lfsck_lock(env, lfsck, parent, name, llh, + MDS_INODELOCK_UPDATE, LCK_PW); + if (rc != 0) + RETURN(rc); + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + GOTO(unlock, rc = PTR_ERR(th)); + + rc = dt_declare_delete(env, parent, (const struct dt_key *)name, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_declare_ref_del(env, parent, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_delete(env, parent, (const struct dt_key *)name, th); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, parent, 0); + rc = dt_ref_del(env, parent, th); + dt_write_unlock(env, parent); + + GOTO(stop, rc); + +stop: + dt_trans_stop(env, dev, th); + +unlock: + lfsck_unlock(llh); + + CDEBUG(D_LFSCK, "%s: remove name entry "DFID"/%s: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(parent)), name, rc); + + return rc; +} + static int lfsck_create_lpf_local(const struct lu_env *env, struct lfsck_instance *lfsck, - struct dt_object *parent, struct dt_object *child, struct lu_attr *la, struct dt_object_format *dof, const char *name) { struct dt_insert_rec *rec = &lfsck_env_info(env)->lti_dt_rec; - struct dt_device *dev = lfsck->li_bottom; + struct dt_object *parent = lfsck->li_lpf_root_obj; + struct dt_device *dev = lfsck_obj2dev(child); struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct dt_object *bk_obj = lfsck->li_bookmark_obj; const struct lu_fid *cfid = lfsck_dto2fid(child); struct thandle *th = NULL; - struct linkea_data ldata = { 0 }; + struct linkea_data ldata = { NULL }; struct lu_buf linkea_buf; const struct lu_name *cname; loff_t pos = 0; @@ -466,13 +681,9 @@ static int lfsck_create_lpf_local(const struct lu_env *env, int rc; ENTRY; - rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf2); - if (rc != 0) - RETURN(rc); - cname = lfsck_name_get_const(env, name, strlen(name)); - rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent)); + rc = linkea_links_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf2, + cname, lfsck_dto2fid(parent)); if (rc != 0) RETURN(rc); @@ -485,12 +696,30 @@ static int lfsck_create_lpf_local(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + if (!dt_try_as_dir(env, child)) + GOTO(stop, rc = -ENOTDIR); + /* 2a. increase child nlink */ rc = dt_declare_ref_add(env, child, th); if (rc != 0) GOTO(stop, rc); - /* 3a. insert linkEA for child */ + /* 3a. insert dot into child dir */ + rec->rec_type = S_IFDIR; + rec->rec_fid = cfid; + rc = dt_declare_insert(env, child, (const struct dt_rec *)rec, + (const struct dt_key *)dot, th); + if (rc != 0) + GOTO(stop, rc); + + /* 4a. insert dotdot into child dir */ + rec->rec_fid = &LU_LPF_FID; + rc = dt_declare_insert(env, child, (const struct dt_rec *)rec, + (const struct dt_key *)dotdot, th); + if (rc != 0) + GOTO(stop, rc); + + /* 5a. insert linkEA for child */ lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, ldata.ld_leh->leh_len); rc = dt_declare_xattr_set(env, child, &linkea_buf, @@ -498,7 +727,7 @@ static int lfsck_create_lpf_local(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - /* 4a. insert name into parent dir */ + /* 6a. insert name into parent dir */ rec->rec_type = S_IFDIR; rec->rec_fid = cfid; rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec, @@ -506,12 +735,12 @@ static int lfsck_create_lpf_local(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - /* 5a. increase parent nlink */ + /* 7a. increase parent nlink */ rc = dt_declare_ref_add(env, parent, th); if (rc != 0) GOTO(stop, rc); - /* 6a. update bookmark */ + /* 8a. update bookmark */ rc = dt_declare_record_write(env, bk_obj, lfsck_buf_get(env, bk, len), 0, th); if (rc != 0) @@ -522,49 +751,46 @@ static int lfsck_create_lpf_local(const struct lu_env *env, GOTO(stop, rc); dt_write_lock(env, child, 0); - /* 1b.1. create child */ + /* 1b. create child */ rc = dt_create(env, child, la, NULL, dof, th); if (rc != 0) GOTO(unlock, rc); - if (unlikely(!dt_try_as_dir(env, child))) - GOTO(unlock, rc = -ENOTDIR); + /* 2b. increase child nlink */ + rc = dt_ref_add(env, child, th); + if (rc != 0) + GOTO(unlock, rc); - /* 1b.2. insert dot into child dir */ + /* 3b. insert dot into child dir */ rec->rec_fid = cfid; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dot, th, BYPASS_CAPA, 1); + (const struct dt_key *)dot, th); if (rc != 0) GOTO(unlock, rc); - /* 1b.3. insert dotdot into child dir */ + /* 4b. insert dotdot into child dir */ rec->rec_fid = &LU_LPF_FID; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1); - if (rc != 0) - GOTO(unlock, rc); - - /* 2b. increase child nlink */ - rc = dt_ref_add(env, child, th); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock, rc); - /* 3b. insert linkEA for child. */ + /* 5b. insert linkEA for child. */ rc = dt_xattr_set(env, child, &linkea_buf, - XATTR_NAME_LINK, 0, th, BYPASS_CAPA); + XATTR_NAME_LINK, 0, th); dt_write_unlock(env, child); if (rc != 0) GOTO(stop, rc); - /* 4b. insert name into parent dir */ + /* 6b. insert name into parent dir */ rec->rec_fid = cfid; rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, BYPASS_CAPA, 1); + (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); dt_write_lock(env, parent, 0); - /* 5b. increase parent nlink */ + /* 7b. increase parent nlink */ rc = dt_ref_add(env, parent, th); dt_write_unlock(env, parent); if (rc != 0) @@ -573,7 +799,7 @@ static int lfsck_create_lpf_local(const struct lu_env *env, bk->lb_lpf_fid = *cfid; lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk); - /* 6b. update bookmark */ + /* 8b. update bookmark */ rc = dt_record_write(env, bk_obj, lfsck_buf_get(env, bk, len), &pos, th); @@ -590,18 +816,18 @@ stop: static int lfsck_create_lpf_remote(const struct lu_env *env, struct lfsck_instance *lfsck, - struct dt_object *parent, struct dt_object *child, struct lu_attr *la, struct dt_object_format *dof, const char *name) { struct dt_insert_rec *rec = &lfsck_env_info(env)->lti_dt_rec; + struct dt_object *parent = lfsck->li_lpf_root_obj; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct dt_object *bk_obj = lfsck->li_bookmark_obj; const struct lu_fid *cfid = lfsck_dto2fid(child); struct thandle *th = NULL; - struct linkea_data ldata = { 0 }; + struct linkea_data ldata = { NULL }; struct lu_buf linkea_buf; const struct lu_name *cname; struct dt_device *dev; @@ -610,13 +836,9 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, int rc; ENTRY; - rc = linkea_data_new(&ldata, - &lfsck_env_info(env)->lti_linkea_buf2); - if (rc != 0) - RETURN(rc); - cname = lfsck_name_get_const(env, name, strlen(name)); - rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent)); + rc = linkea_links_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf2, + cname, lfsck_dto2fid(parent)); if (rc != 0) RETURN(rc); @@ -642,7 +864,7 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, /* Transaction I: locally */ - dev = lfsck->li_bottom; + dev = lfsck_obj2dev(child); th = dt_trans_create(env, dev); if (IS_ERR(th)) RETURN(PTR_ERR(th)); @@ -652,12 +874,30 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); + if (!dt_try_as_dir(env, child)) + GOTO(stop, rc = -ENOTDIR); + /* 2a. increase child nlink */ rc = dt_declare_ref_add(env, child, th); if (rc != 0) GOTO(stop, rc); - /* 3a. insert linkEA for child */ + /* 3a. insert dot into child dir */ + rec->rec_type = S_IFDIR; + rec->rec_fid = cfid; + rc = dt_declare_insert(env, child, (const struct dt_rec *)rec, + (const struct dt_key *)dot, th); + if (rc != 0) + GOTO(stop, rc); + + /* 4a. insert dotdot into child dir */ + rec->rec_fid = &LU_LPF_FID; + rc = dt_declare_insert(env, child, (const struct dt_rec *)rec, + (const struct dt_key *)dotdot, th); + if (rc != 0) + GOTO(stop, rc); + + /* 5a. insert linkEA for child */ lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf, ldata.ld_leh->leh_len); rc = dt_declare_xattr_set(env, child, &linkea_buf, @@ -665,7 +905,7 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - /* 4a. update bookmark */ + /* 6a. update bookmark */ rc = dt_declare_record_write(env, bk_obj, lfsck_buf_get(env, bk, len), 0, th); if (rc != 0) @@ -676,44 +916,41 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, GOTO(stop, rc); dt_write_lock(env, child, 0); - /* 1b.1. create child */ + /* 1b. create child */ rc = dt_create(env, child, la, NULL, dof, th); if (rc != 0) GOTO(unlock, rc); - if (unlikely(!dt_try_as_dir(env, child))) - GOTO(unlock, rc = -ENOTDIR); + /* 2b. increase child nlink */ + rc = dt_ref_add(env, child, th); + if (rc != 0) + GOTO(unlock, rc); - /* 1b.2. insert dot into child dir */ + /* 3b. insert dot into child dir */ rec->rec_type = S_IFDIR; rec->rec_fid = cfid; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dot, th, BYPASS_CAPA, 1); + (const struct dt_key *)dot, th); if (rc != 0) GOTO(unlock, rc); - /* 1b.3. insert dotdot into child dir */ + /* 4b. insert dotdot into child dir */ rec->rec_fid = &LU_LPF_FID; rc = dt_insert(env, child, (const struct dt_rec *)rec, - (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1); - if (rc != 0) - GOTO(unlock, rc); - - /* 2b. increase child nlink */ - rc = dt_ref_add(env, child, th); + (const struct dt_key *)dotdot, th); if (rc != 0) GOTO(unlock, rc); - /* 3b. insert linkEA for child */ + /* 5b. insert linkEA for child */ rc = dt_xattr_set(env, child, &linkea_buf, - XATTR_NAME_LINK, 0, th, BYPASS_CAPA); + XATTR_NAME_LINK, 0, th); if (rc != 0) GOTO(unlock, rc); bk->lb_lpf_fid = *cfid; lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk); - /* 4b. update bookmark */ + /* 6b. update bookmark */ rc = dt_record_write(env, bk_obj, lfsck_buf_get(env, bk, len), &pos, th); @@ -724,11 +961,12 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, /* Transaction II: remotely */ - dev = lfsck->li_next; + dev = lfsck_obj2dev(parent); th = dt_trans_create(env, dev); if (IS_ERR(th)) RETURN(PTR_ERR(th)); + th->th_sync = 1; /* 5a. insert name into parent dir */ rec->rec_fid = cfid; rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec, @@ -741,13 +979,13 @@ static int lfsck_create_lpf_remote(const struct lu_env *env, if (rc != 0) GOTO(stop, rc); - rc = dt_trans_start(env, dev, th); + rc = dt_trans_start_local(env, dev, th); if (rc != 0) GOTO(stop, rc); /* 5b. insert name into parent dir */ rc = dt_insert(env, parent, (const struct dt_rec *)rec, - (const struct dt_key *)name, th, BYPASS_CAPA, 1); + (const struct dt_key *)name, th); if (rc != 0) GOTO(stop, rc); @@ -763,7 +1001,7 @@ unlock: stop: dt_trans_stop(env, dev, th); - if (rc != 0 && dev == lfsck->li_next) + if (rc != 0 && dev == lfsck_obj2dev(parent)) CDEBUG(D_LFSCK, "%s: partially created the object "DFID "for orphans, but failed to insert the name %s " "to the .lustre/lost+found/. Such inconsistency " @@ -773,58 +1011,45 @@ stop: return rc; } -/* Do NOT create .lustre/lost+found/MDTxxxx when register the lfsck instance, - * because the MDT0 maybe not reaady for sequence allocation yet. We do that - * only when it is required, such as orphan OST-objects repairing. */ -int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) +/** + * Create the MDTxxxx directory under /ROOT/.lustre/lost+found/ + * + * The /ROOT/.lustre/lost+found/MDTxxxx/ directory is used for holding + * orphans and other uncertain inconsistent objects found during the + * LFSCK. Such directory will be created by the LFSCK engine on the + * local MDT before the LFSCK scanning. + * + * \param[in] env pointer to the thread context + * \param[in] lfsck pointer to the lfsck instance + * + * \retval 0 for success + * \retval negative error number on failure + */ +static int lfsck_create_lpf(const struct lu_env *env, + struct lfsck_instance *lfsck) { struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_thread_info *info = lfsck_env_info(env); struct lu_fid *cfid = &info->lti_fid2; struct lu_attr *la = &info->lti_la; struct dt_object_format *dof = &info->lti_dof; - struct dt_object *parent = NULL; + struct dt_object *parent = lfsck->li_lpf_root_obj; struct dt_object *child = NULL; - struct lustre_handle lh = { 0 }; + struct lfsck_lock_handle *llh = &info->lti_llh; char name[8]; - int node = lfsck_dev_idx(lfsck->li_bottom); + int node = lfsck_dev_idx(lfsck); int rc = 0; ENTRY; LASSERT(lfsck->li_master); + LASSERT(parent != NULL); + LASSERT(lfsck->li_lpf_obj == NULL); - sprintf(name, "MDT%04x", node); - if (node == 0) { - parent = lfsck_object_find_by_dev(env, lfsck->li_bottom, - &LU_LPF_FID); - } else { - struct lfsck_tgt_desc *ltd; - - ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0); - if (unlikely(ltd == NULL)) - RETURN(-ENXIO); - - parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt, - &LU_LPF_FID); - lfsck_tgt_put(ltd); - } - if (IS_ERR(parent)) - RETURN(PTR_ERR(parent)); - - if (lfsck->li_lpf_obj != NULL) - GOTO(out, rc = 0); - - if (unlikely(!dt_try_as_dir(env, parent))) - GOTO(out, rc = -ENOTDIR); - - rc = lfsck_ibits_lock(env, lfsck, parent, &lh, - MDS_INODELOCK_UPDATE, LCK_EX); + snprintf(name, 8, "MDT%04x", node); + rc = lfsck_lock(env, lfsck, parent, name, llh, + MDS_INODELOCK_UPDATE, LCK_PW); if (rc != 0) - GOTO(out, rc); - - mutex_lock(&lfsck->li_mutex); - if (lfsck->li_lpf_obj != NULL) - GOTO(unlock, rc = 0); + RETURN(rc); if (fid_is_zero(&bk->lb_lpf_fid)) { /* There is corner case that: in former LFSCK scanning we have @@ -832,7 +1057,7 @@ int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) * the lfsck_bookmark::lb_lpf_fid successfully. So need lookup * it from MDT0 firstly. */ rc = dt_lookup(env, parent, (struct dt_rec *)cfid, - (const struct dt_key *)name, BYPASS_CAPA); + (const struct dt_key *)name); if (rc != 0 && rc != -ENOENT) GOTO(unlock, rc); @@ -848,7 +1073,7 @@ int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) *cfid = bk->lb_lpf_fid; } - child = lfsck_object_find_by_dev(env, lfsck->li_bottom, cfid); + child = lfsck_object_find_bottom_new(env, lfsck, cfid); if (IS_ERR(child)) GOTO(unlock, rc = PTR_ERR(child)); @@ -862,7 +1087,7 @@ int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) } memset(la, 0, sizeof(*la)); - la->la_atime = la->la_mtime = la->la_ctime = cfs_time_current_sec(); + la->la_atime = la->la_mtime = la->la_ctime = ktime_get_real_seconds(); la->la_mode = S_IFDIR | S_IRWXU; la->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | LA_UID | LA_GID; @@ -870,24 +1095,18 @@ int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) dof->dof_type = dt_mode_to_dft(S_IFDIR); if (node == 0) - rc = lfsck_create_lpf_local(env, lfsck, parent, child, la, - dof, name); + rc = lfsck_create_lpf_local(env, lfsck, child, la, dof, name); else - rc = lfsck_create_lpf_remote(env, lfsck, parent, child, la, - dof, name); + rc = lfsck_create_lpf_remote(env, lfsck, child, la, dof, name); if (rc == 0) lfsck->li_lpf_obj = child; GOTO(unlock, rc); unlock: - mutex_unlock(&lfsck->li_mutex); - lfsck_ibits_unlock(&lh, LCK_EX); + lfsck_unlock(llh); if (rc != 0 && child != NULL && !IS_ERR(child)) - lu_object_put(env, &child->do_lu); -out: - if (parent != NULL && !IS_ERR(parent)) - lu_object_put(env, &parent->do_lu); + lfsck_object_put(env, child); return rc; } @@ -901,15 +1120,14 @@ out: * * \param[in] env pointer to the thread context * \param[in] lfsck pointer to the lfsck instance - * \param[in] parent pointer to the lost+found object * * \retval 0 for success * \retval negative error number on failure */ static int lfsck_scan_lpf_bad_entries(const struct lu_env *env, - struct lfsck_instance *lfsck, - struct dt_object *parent) + struct lfsck_instance *lfsck) { + struct dt_object *parent = lfsck->li_lpf_root_obj; struct lu_dirent *ent = (struct lu_dirent *)lfsck_env_info(env)->lti_key; const struct dt_it_ops *iops = &parent->do_index_ops->dio_it; @@ -917,7 +1135,7 @@ static int lfsck_scan_lpf_bad_entries(const struct lu_env *env, int rc; ENTRY; - it = iops->init(env, parent, LUDA_64BITHASH, BYPASS_CAPA); + it = iops->init(env, parent, LUDA_64BITHASH); if (IS_ERR(it)) RETURN(PTR_ERR(it)); @@ -935,13 +1153,8 @@ static int lfsck_scan_lpf_bad_entries(const struct lu_env *env, break; ent->lde_namelen = le16_to_cpu(ent->lde_namelen); - if (ent->lde_name[0] == '.') { - if (ent->lde_namelen == 1) - goto next; - - if (ent->lde_namelen == 2 && ent->lde_name[1] == '.') - goto next; - } + if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen)) + goto next; /* name length must be strlen("MDTxxxx") */ if (ent->lde_namelen != 7) @@ -956,8 +1169,8 @@ static int lfsck_scan_lpf_bad_entries(const struct lu_env *env, if (off != 7) { remove: - rc = lfsck_remove_name_entry(env, lfsck, parent, - ent->lde_name, S_IFDIR); + rc = lfsck_lpf_remove_name_entry(env, lfsck, + ent->lde_name); if (rc != 0) break; } @@ -1024,7 +1237,6 @@ static int lfsck_update_lpf_entry(const struct lu_env *env, * * \param[in] env pointer to the thread context * \param[in] lfsck pointer to the lfsck instance - * \param[in] parent pointer to the lost+found object * \param[in] child pointer to the lost+found sub-directory object * \param[in] name the name for lost+found sub-directory object * \param[out] fid pointer to the buffer to hold the FID of the object @@ -1039,11 +1251,11 @@ static int lfsck_update_lpf_entry(const struct lu_env *env, */ static int lfsck_verify_lpf_pairs(const struct lu_env *env, struct lfsck_instance *lfsck, - struct dt_object *parent, struct dt_object *child, const char *name, struct lu_fid *fid, enum lfsck_verify_lpf_types type) { + struct dt_object *parent = lfsck->li_lpf_root_obj; struct lfsck_thread_info *info = lfsck_env_info(env); char *name2 = info->lti_key; struct lu_fid *fid2 = &info->lti_fid3; @@ -1054,7 +1266,7 @@ static int lfsck_verify_lpf_pairs(const struct lu_env *env, fid_zero(fid); rc = dt_lookup(env, child, (struct dt_rec *)fid, - (const struct dt_key *)dotdot, BYPASS_CAPA); + (const struct dt_key *)dotdot); if (rc != 0) GOTO(linkea, rc); @@ -1070,8 +1282,7 @@ static int lfsck_verify_lpf_pairs(const struct lu_env *env, } cname = lfsck_name_get_const(env, name, strlen(name)); - rc = lfsck_verify_linkea(env, lfsck->li_bottom, child, cname, - &LU_LPF_FID); + rc = lfsck_verify_linkea(env, child, cname, &LU_LPF_FID); if (rc == 0) rc = lfsck_update_lpf_entry(env, lfsck, parent, child, name, type); @@ -1079,18 +1290,18 @@ static int lfsck_verify_lpf_pairs(const struct lu_env *env, GOTO(out_done, rc); } - parent2 = lfsck_object_find_by_dev(env, lfsck->li_next, fid); + parent2 = lfsck_object_find_bottom(env, lfsck, fid); if (IS_ERR(parent2)) GOTO(linkea, parent2); if (!dt_object_exists(parent2)) { - lu_object_put(env, &parent2->do_lu); + lfsck_object_put(env, parent2); GOTO(linkea, parent2 = ERR_PTR(-ENOENT)); } if (!dt_try_as_dir(env, parent2)) { - lu_object_put(env, &parent2->do_lu); + lfsck_object_put(env, parent2); GOTO(linkea, parent2 = ERR_PTR(-ENOTDIR)); } @@ -1141,7 +1352,7 @@ linkea: } rc = dt_lookup(env, parent2, (struct dt_rec *)fid, - (const struct dt_key *)name2, BYPASS_CAPA); + (const struct dt_key *)name2); dt_read_unlock(env, child); lfsck_ibits_unlock(&lh, LCK_PR); if (rc != 0 && rc != -ENOENT) @@ -1168,14 +1379,14 @@ linkea: lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(child)), rc); } else /* if (type == LVLT_BY_NAMEENTRY) */ { /* The name entry is wrong, remove it. */ - rc = lfsck_remove_name_entry(env, lfsck, parent, name, S_IFDIR); + rc = lfsck_lpf_remove_name_entry(env, lfsck, name); } GOTO(out_put, rc); out_put: if (parent2 != NULL && !IS_ERR(parent2)) - lu_object_put(env, &parent2->do_lu); + lfsck_object_put(env, parent2); out_done: return rc; @@ -1202,22 +1413,25 @@ int lfsck_verify_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) struct lu_fid *pfid = &info->lti_fid; struct lu_fid *cfid = &info->lti_fid2; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; - struct dt_object *parent = NULL; + struct dt_object *parent; /* child1's FID is in the bookmark file. */ struct dt_object *child1 = NULL; /* child2's FID is in the name entry MDTxxxx. */ struct dt_object *child2 = NULL; - struct dt_device *dev = lfsck->li_bottom; const struct lu_name *cname; char name[8]; - int node = lfsck_dev_idx(dev); + int node = lfsck_dev_idx(lfsck); int rc = 0; ENTRY; LASSERT(lfsck->li_master); + if (lfsck->li_lpf_root_obj != NULL) + RETURN(0); + if (node == 0) { - parent = lfsck_object_find_by_dev(env, dev, &LU_LPF_FID); + parent = lfsck_object_find_by_dev(env, lfsck->li_bottom, + &LU_LPF_FID); } else { struct lfsck_tgt_desc *ltd; @@ -1235,69 +1449,28 @@ int lfsck_verify_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) LASSERT(dt_object_exists(parent)); - if (unlikely(!dt_try_as_dir(env, parent))) + if (unlikely(!dt_try_as_dir(env, parent))) { + lfsck_object_put(env, parent); + GOTO(put, rc = -ENOTDIR); + } + lfsck->li_lpf_root_obj = parent; if (node == 0) { - rc = lfsck_scan_lpf_bad_entries(env, lfsck, parent); + rc = lfsck_scan_lpf_bad_entries(env, lfsck); if (rc != 0) CDEBUG(D_LFSCK, "%s: scan .lustre/lost+found/ " "for bad sub-directories: rc = %d\n", lfsck_lfsck2name(lfsck), rc); } - if (!fid_is_zero(&bk->lb_lpf_fid)) { - if (unlikely(!fid_is_norm(&bk->lb_lpf_fid))) { - struct lu_fid tfid = bk->lb_lpf_fid; - - /* Invalid FID record in the bookmark file, reset it. */ - fid_zero(&bk->lb_lpf_fid); - rc = lfsck_bookmark_store(env, lfsck); - - CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID - " in the bookmark file: rc = %d\n", - lfsck_lfsck2name(lfsck), PFID(&tfid), rc); - - if (rc != 0) - GOTO(put, rc); - } else { - child1 = lfsck_object_find_by_dev(env, dev, - &bk->lb_lpf_fid); - if (IS_ERR(child1)) - GOTO(put, rc = PTR_ERR(child1)); - - if (unlikely(!dt_object_exists(child1) || - dt_object_remote(child1)) || - !S_ISDIR(lfsck_object_type(child1))) { - /* Invalid FID record in the bookmark file, - * reset it. */ - fid_zero(&bk->lb_lpf_fid); - rc = lfsck_bookmark_store(env, lfsck); - - CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID - " in the bookmark file: rc = %d\n", - lfsck_lfsck2name(lfsck), - PFID(lfsck_dto2fid(child1)), rc); - - if (rc != 0) - GOTO(put, rc); - - lu_object_put(env, &child1->do_lu); - child1 = NULL; - } else if (unlikely(!dt_try_as_dir(env, child1))) { - GOTO(put, rc = -ENOTDIR); - } - } - } - + /* child2 */ snprintf(name, 8, "MDT%04x", node); rc = dt_lookup(env, parent, (struct dt_rec *)cfid, - (const struct dt_key *)name, BYPASS_CAPA); + (const struct dt_key *)name); if (rc == -ENOENT) { - if (!fid_is_zero(&bk->lb_lpf_fid)) - goto check_child1; - - GOTO(put, rc = 0); + rc = 0; + goto find_child1; } if (rc != 0) @@ -1305,71 +1478,126 @@ int lfsck_verify_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) /* Invalid FID in the name entry, remove the name entry. */ if (!fid_is_norm(cfid)) { - rc = lfsck_remove_name_entry(env, lfsck, parent, name, S_IFDIR); + rc = lfsck_lpf_remove_name_entry(env, lfsck, name); if (rc != 0) GOTO(put, rc); - goto check_child1; + goto find_child1; } - child2 = lfsck_object_find_by_dev(env, dev, cfid); + child2 = lfsck_object_find_bottom(env, lfsck, cfid); if (IS_ERR(child2)) GOTO(put, rc = PTR_ERR(child2)); if (unlikely(!dt_object_exists(child2) || dt_object_remote(child2)) || !S_ISDIR(lfsck_object_type(child2))) { - rc = lfsck_remove_name_entry(env, lfsck, parent, name, - S_IFDIR); + rc = lfsck_lpf_remove_name_entry(env, lfsck, name); if (rc != 0) GOTO(put, rc); - goto check_child1; + goto find_child1; } - if (unlikely(!dt_try_as_dir(env, child2))) - GOTO(put, rc = -ENOTDIR); + if (unlikely(!dt_try_as_dir(env, child2))) { + lfsck_object_put(env, child2); + child2 = NULL; + rc = -ENOTDIR; + } - if (child1 == NULL) { - rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child2, name, - pfid, LVLT_BY_NAMEENTRY); - } else if (!lu_fid_eq(cfid, &bk->lb_lpf_fid)) { - rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child1, name, - pfid, LVLT_BY_BOOKMARK); - if (!lu_fid_eq(pfid, &LU_LPF_FID)) - rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child2, - name, pfid, - LVLT_BY_NAMEENTRY); - } else { +find_child1: + if (fid_is_zero(&bk->lb_lpf_fid)) + goto check_child2; + + if (likely(lu_fid_eq(cfid, &bk->lb_lpf_fid))) { if (lfsck->li_lpf_obj == NULL) { lu_object_get(&child2->do_lu); lfsck->li_lpf_obj = child2; } cname = lfsck_name_get_const(env, name, strlen(name)); - rc = lfsck_verify_linkea(env, dev, child2, cname, &LU_LPF_FID); + rc = lfsck_verify_linkea(env, child2, cname, &LU_LPF_FID); + + GOTO(put, rc); } - GOTO(put, rc); + if (unlikely(!fid_is_norm(&bk->lb_lpf_fid))) { + struct lu_fid tfid = bk->lb_lpf_fid; + + /* Invalid FID record in the bookmark file, reset it. */ + fid_zero(&bk->lb_lpf_fid); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), PFID(&tfid), rc); + + if (rc != 0) + GOTO(put, rc); -check_child1: - if (child1 != NULL) - rc = lfsck_verify_lpf_pairs(env, lfsck, parent, child1, name, - pfid, LVLT_BY_BOOKMARK); + goto check_child2; + } + + child1 = lfsck_object_find_bottom(env, lfsck, &bk->lb_lpf_fid); + if (IS_ERR(child1)) { + child1 = NULL; + goto check_child2; + } + + if (unlikely(!dt_object_exists(child1) || + dt_object_remote(child1)) || + !S_ISDIR(lfsck_object_type(child1))) { + /* Invalid FID record in the bookmark file, reset it. */ + fid_zero(&bk->lb_lpf_fid); + rc = lfsck_bookmark_store(env, lfsck); + + CDEBUG(D_LFSCK, "%s: reset invalid LPF fid "DFID + " in the bookmark file: rc = %d\n", + lfsck_lfsck2name(lfsck), + PFID(lfsck_dto2fid(child1)), rc); + + if (rc != 0) + GOTO(put, rc); + + lfsck_object_put(env, child1); + child1 = NULL; + goto check_child2; + } + + if (unlikely(!dt_try_as_dir(env, child1))) { + lfsck_object_put(env, child1); + child1 = NULL; + rc = -ENOTDIR; + goto check_child2; + } + + rc = lfsck_verify_lpf_pairs(env, lfsck, child1, name, pfid, + LVLT_BY_BOOKMARK); + if (lu_fid_eq(pfid, &LU_LPF_FID)) + GOTO(put, rc); + +check_child2: + if (child2 != NULL) + rc = lfsck_verify_lpf_pairs(env, lfsck, child2, name, + pfid, LVLT_BY_NAMEENTRY); GOTO(put, rc); put: - if (lfsck->li_lpf_obj != NULL && - unlikely(!dt_try_as_dir(env, lfsck->li_lpf_obj))) - rc = -ENOTDIR; + if (lfsck->li_lpf_obj != NULL) { + if (unlikely(!dt_try_as_dir(env, lfsck->li_lpf_obj))) { + lfsck_object_put(env, lfsck->li_lpf_obj); + lfsck->li_lpf_obj = NULL; + rc = -ENOTDIR; + } + } else if (rc == 0) { + rc = lfsck_create_lpf(env, lfsck); + } if (child2 != NULL && !IS_ERR(child2)) - lu_object_put(env, &child2->do_lu); + lfsck_object_put(env, child2); if (child1 != NULL && !IS_ERR(child1)) - lu_object_put(env, &child1->do_lu); - if (parent != NULL && !IS_ERR(parent)) - lu_object_put(env, &parent->do_lu); + lfsck_object_put(env, child1); return rc; } @@ -1377,12 +1605,11 @@ put: static int lfsck_fid_init(struct lfsck_instance *lfsck) { struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; - struct seq_server_site *ss; + struct seq_server_site *ss = lfsck_dev_site(lfsck); char *prefix; int rc = 0; ENTRY; - ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); if (unlikely(ss == NULL)) RETURN(-ENXIO); @@ -1428,17 +1655,31 @@ void lfsck_instance_cleanup(const struct lu_env *env, struct ptlrpc_thread *thread = &lfsck->li_thread; struct lfsck_component *com; struct lfsck_component *next; + struct lfsck_lmv_unit *llu; + struct lfsck_lmv_unit *llu_next; + struct lfsck_lmv *llmv; ENTRY; LASSERT(list_empty(&lfsck->li_link)); LASSERT(thread_is_init(thread) || thread_is_stopped(thread)); if (lfsck->li_obj_oit != NULL) { - lu_object_put_nocache(env, &lfsck->li_obj_oit->do_lu); + lfsck_object_put(env, lfsck->li_obj_oit); lfsck->li_obj_oit = NULL; } LASSERT(lfsck->li_obj_dir == NULL); + LASSERT(lfsck->li_lmv == NULL); + + list_for_each_entry_safe(llu, llu_next, &lfsck->li_list_lmv, llu_link) { + llmv = &llu->llu_lmv; + + LASSERTF(atomic_read(&llmv->ll_ref) == 1, + "still in using: %u\n", + atomic_read(&llmv->ll_ref)); + + lfsck_lmv_put(env, llmv); + } list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) { lfsck_component_cleanup(env, com); @@ -1458,16 +1699,26 @@ void lfsck_instance_cleanup(const struct lu_env *env, lfsck_tgt_descs_fini(&lfsck->li_ost_descs); lfsck_tgt_descs_fini(&lfsck->li_mdt_descs); + if (lfsck->li_lfsck_dir != NULL) { + lfsck_object_put(env, lfsck->li_lfsck_dir); + lfsck->li_lfsck_dir = NULL; + } + if (lfsck->li_bookmark_obj != NULL) { - lu_object_put_nocache(env, &lfsck->li_bookmark_obj->do_lu); + lfsck_object_put(env, lfsck->li_bookmark_obj); lfsck->li_bookmark_obj = NULL; } if (lfsck->li_lpf_obj != NULL) { - lu_object_put(env, &lfsck->li_lpf_obj->do_lu); + lfsck_object_put(env, lfsck->li_lpf_obj); lfsck->li_lpf_obj = NULL; } + if (lfsck->li_lpf_root_obj != NULL) { + lfsck_object_put(env, lfsck->li_lpf_root_obj); + lfsck->li_lpf_root_obj = NULL; + } + if (lfsck->li_los != NULL) { local_oid_storage_fini(env, lfsck->li_los); lfsck->li_los = NULL; @@ -1526,8 +1777,8 @@ static inline int lfsck_instance_add(struct lfsck_instance *lfsck) return 0; } -int lfsck_bits_dump(struct seq_file *m, int bits, const char *names[], - const char *prefix) +void lfsck_bits_dump(struct seq_file *m, int bits, const char *names[], + const char *prefix) { int flag; int i; @@ -1549,36 +1800,36 @@ int lfsck_bits_dump(struct seq_file *m, int bits, const char *names[], } if (!newline) - seq_printf(m, "\n"); - return 0; + seq_putc(m, '\n'); } -int lfsck_time_dump(struct seq_file *m, __u64 time, const char *prefix) +void lfsck_time_dump(struct seq_file *m, time64_t time, const char *name) { - if (time != 0) - seq_printf(m, "%s: "LPU64" seconds\n", prefix, - cfs_time_current_sec() - time); - else - seq_printf(m, "%s: N/A\n", prefix); - return 0; + if (time == 0) { + seq_printf(m, "%s_time: N/A\n", name); + seq_printf(m, "time_since_%s: N/A\n", name); + } else { + seq_printf(m, "%s_time: %lld\n", name, time); + seq_printf(m, "time_since_%s: %lld seconds\n", + name, ktime_get_real_seconds() - time); + } } -int lfsck_pos_dump(struct seq_file *m, struct lfsck_position *pos, - const char *prefix) +void lfsck_pos_dump(struct seq_file *m, struct lfsck_position *pos, + const char *prefix) { if (fid_is_zero(&pos->lp_dir_parent)) { - if (pos->lp_oit_cookie == 0) - seq_printf(m, "%s: N/A, N/A, N/A\n", - prefix); - else - seq_printf(m, "%s: "LPU64", N/A, N/A\n", - prefix, pos->lp_oit_cookie); + if (pos->lp_oit_cookie == 0) { + seq_printf(m, "%s: N/A, N/A, N/A\n", prefix); + return; + } + seq_printf(m, "%s: %llu, N/A, N/A\n", + prefix, pos->lp_oit_cookie); } else { - seq_printf(m, "%s: "LPU64", "DFID", "LPX64"\n", + seq_printf(m, "%s: %llu, "DFID", %#llx\n", prefix, pos->lp_oit_cookie, PFID(&pos->lp_dir_parent), pos->lp_dir_cookie); } - return 0; } void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck, @@ -1595,7 +1846,8 @@ void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck, if (!lfsck->li_current_oit_processed && !init) pos->lp_oit_cookie--; - LASSERT(pos->lp_oit_cookie > 0); + if (unlikely(pos->lp_oit_cookie == 0)) + pos->lp_oit_cookie = 1; if (lfsck->li_di_dir != NULL) { struct dt_object *dto = lfsck->li_obj_dir; @@ -1620,12 +1872,12 @@ bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit) bool dirty = false; if (limit != LFSCK_SPEED_NO_LIMIT) { - if (limit > HZ) { - lfsck->li_sleep_rate = limit / HZ; + if (limit > cfs_time_seconds(1)) { + lfsck->li_sleep_rate = limit / cfs_time_seconds(1); lfsck->li_sleep_jif = 1; } else { lfsck->li_sleep_rate = 1; - lfsck->li_sleep_jif = HZ / limit; + lfsck->li_sleep_jif = cfs_time_seconds(1) / limit; } } else { lfsck->li_sleep_jif = 0; @@ -1643,16 +1895,12 @@ bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit) void lfsck_control_speed(struct lfsck_instance *lfsck) { struct ptlrpc_thread *thread = &lfsck->li_thread; - struct l_wait_info lwi; if (lfsck->li_sleep_jif > 0 && lfsck->li_new_scanned >= lfsck->li_sleep_rate) { - lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL, - LWI_ON_SIGNAL_NOOP, NULL); - - l_wait_event(thread->t_ctl_waitq, - !thread_is_running(thread), - &lwi); + wait_event_idle_timeout(thread->t_ctl_waitq, + !thread_is_running(thread), + lfsck->li_sleep_jif); lfsck->li_new_scanned = 0; } } @@ -1661,16 +1909,12 @@ void lfsck_control_speed_by_self(struct lfsck_component *com) { struct lfsck_instance *lfsck = com->lc_lfsck; struct ptlrpc_thread *thread = &lfsck->li_thread; - struct l_wait_info lwi; if (lfsck->li_sleep_jif > 0 && com->lc_new_scanned >= lfsck->li_sleep_rate) { - lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL, - LWI_ON_SIGNAL_NOOP, NULL); - - l_wait_event(thread->t_ctl_waitq, - !thread_is_running(thread), - &lwi); + wait_event_idle_timeout(thread->t_ctl_waitq, + !thread_is_running(thread), + lfsck->li_sleep_jif); com->lc_new_scanned = 0; } } @@ -1741,6 +1985,56 @@ lfsck_assistant_data_init(struct lfsck_assistant_operations *lao, return lad; } +struct lfsck_assistant_object * +lfsck_assistant_object_init(const struct lu_env *env, const struct lu_fid *fid, + const struct lu_attr *attr, __u64 cookie, + bool is_dir) +{ + struct lfsck_assistant_object *lso; + + OBD_ALLOC_PTR(lso); + if (lso == NULL) + return ERR_PTR(-ENOMEM); + + lso->lso_fid = *fid; + if (attr != NULL) + lso->lso_attr = *attr; + + atomic_set(&lso->lso_ref, 1); + lso->lso_oit_cookie = cookie; + if (is_dir) + lso->lso_is_dir = 1; + + return lso; +} + +struct dt_object * +lfsck_assistant_object_load(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct lfsck_assistant_object *lso) +{ + struct dt_object *obj; + + obj = lfsck_object_find_bottom(env, lfsck, &lso->lso_fid); + if (IS_ERR(obj)) + return obj; + + if (unlikely(!dt_object_exists(obj) || lfsck_is_dead_obj(obj))) { + lso->lso_dead = 1; + lfsck_object_put(env, obj); + + return ERR_PTR(-ENOENT); + } + + if (lso->lso_is_dir && unlikely(!dt_try_as_dir(env, obj))) { + lfsck_object_put(env, obj); + + return ERR_PTR(-ENOTDIR); + } + + return obj; +} + /** * Generic LFSCK asynchronous communication interpretor function. * The LFSCK RPC reply for both the event notification and status @@ -1769,6 +2063,11 @@ int lfsck_async_interpret_common(const struct lu_env *env, switch (lr->lr_event) { case LE_START: + if (unlikely(rc == -EINPROGRESS)) { + ltd->ltd_retry_start = 1; + break; + } + if (rc != 0) { CDEBUG(D_LFSCK, "%s: fail to notify %s %x for %s " "start: rc = %d\n", @@ -1868,6 +2167,11 @@ int lfsck_async_interpret_common(const struct lu_env *env, } if (rc != 0) { + if (lr->lr_flags & LEF_QUERY_ALL) { + lfsck_reset_ltd_status(ltd, com->lc_type); + break; + } + spin_lock(<ds->ltd_lock); list_del_init(phase_list); list_del_init(list); @@ -1882,6 +2186,12 @@ int lfsck_async_interpret_common(const struct lu_env *env, CDEBUG(D_LFSCK, "%s: invalid query reply for %s: " "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, rc); + + if (lr->lr_flags & LEF_QUERY_ALL) { + lfsck_reset_ltd_status(ltd, com->lc_type); + break; + } + spin_lock(<ds->ltd_lock); list_del_init(phase_list); list_del_init(list); @@ -1889,6 +2199,18 @@ int lfsck_async_interpret_common(const struct lu_env *env, break; } + if (lr->lr_flags & LEF_QUERY_ALL) { + if (com->lc_type == LFSCK_TYPE_LAYOUT) { + ltd->ltd_layout_status = reply->lr_status; + ltd->ltd_layout_repaired = reply->lr_repaired; + } else { + ltd->ltd_namespace_status = reply->lr_status; + ltd->ltd_namespace_repaired = + reply->lr_repaired; + } + break; + } + switch (reply->lr_status) { case LS_SCANNING_PHASE1: break; @@ -2027,19 +2349,19 @@ static int lfsck_stop_notify(const struct lu_env *env, spin_unlock(<ds->ltd_lock); memset(lr, 0, sizeof(*lr)); - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_event = LE_PEER_EXIT; lr->lr_active = type; lr->lr_status = LS_CO_PAUSED; if (ltds == &lfsck->li_ost_descs) lr->lr_flags = LEF_TO_OST; + memset(laia, 0, sizeof(*laia)); laia->laia_com = com; laia->laia_ltds = ltds; atomic_inc(<d->ltd_ref); laia->laia_ltd = ltd; laia->laia_lr = lr; - laia->laia_shared = 0; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, lfsck_async_interpret_common, @@ -2052,7 +2374,7 @@ static int lfsck_stop_notify(const struct lu_env *env, ltd->ltd_index, lad->lad_name, rc); lfsck_tgt_put(ltd); } else { - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); } ptlrpc_set_destroy(set); @@ -2119,16 +2441,82 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, *tmp = *lr; ptlrpc_request_set_replen(req); - laia = ptlrpc_req_async_args(req); + laia = ptlrpc_req_async_args(laia, req); *laia = *(struct lfsck_async_interpret_args *)args; if (laia->laia_com != NULL) lfsck_component_get(laia->laia_com); req->rq_interpret_reply = interpreter; + req->rq_allow_intr = 1; + req->rq_no_delay = 1; ptlrpc_set_add_req(set, req); return 0; } +int lfsck_query_all(const struct lu_env *env, struct lfsck_component *com) +{ + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_request *lr = &info->lti_lr; + struct lfsck_async_interpret_args *laia = &info->lti_laia; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs; + struct lfsck_tgt_desc *ltd; + struct ptlrpc_request_set *set; + int idx; + int rc; + ENTRY; + + memset(lr, 0, sizeof(*lr)); + lr->lr_event = LE_QUERY; + lr->lr_active = com->lc_type; + lr->lr_flags = LEF_QUERY_ALL; + + memset(laia, 0, sizeof(*laia)); + laia->laia_com = com; + laia->laia_lr = lr; + + set = ptlrpc_prep_set(); + if (set == NULL) + RETURN(-ENOMEM); + +again: + laia->laia_ltds = ltds; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_tgt_get(ltds, idx); + LASSERT(ltd != NULL); + + laia->laia_ltd = ltd; + up_read(<ds->ltd_rw_sem); + rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, + lfsck_async_interpret_common, + laia, LFSCK_QUERY); + if (rc != 0) { + struct lfsck_assistant_data *lad = com->lc_data; + + CDEBUG(D_LFSCK, "%s: Fail to query %s %x for stat %s: " + "rc = %d\n", lfsck_lfsck2name(lfsck), + (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT", + ltd->ltd_index, lad->lad_name, rc); + lfsck_reset_ltd_status(ltd, com->lc_type); + lfsck_tgt_put(ltd); + } + down_read(<ds->ltd_rw_sem); + } + up_read(<ds->ltd_rw_sem); + + if (com->lc_type == LFSCK_TYPE_LAYOUT && !(lr->lr_flags & LEF_TO_OST)) { + ltds = &lfsck->li_ost_descs; + lr->lr_flags |= LEF_TO_OST; + goto again; + } + + rc = ptlrpc_set_wait(env, set); + ptlrpc_set_destroy(set); + + RETURN(rc); +} + int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com, struct lfsck_start_param *lsp) { @@ -2143,10 +2531,8 @@ int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com, lad->lad_assistant_status = 0; lad->lad_post_result = 0; - lad->lad_to_post = 0; - lad->lad_to_double_scan = 0; - lad->lad_in_double_scan = 0; - lad->lad_exit = 0; + lad->lad_flags = 0; + lad->lad_advance_lock = false; thread_set_flags(athread, 0); lta = lfsck_thread_args_init(lfsck, com, lsp); @@ -2160,13 +2546,14 @@ int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com, "rc = %d\n", lfsck_lfsck2name(lfsck), lad->lad_name, rc); lfsck_thread_args_fini(lta); } else { - struct l_wait_info lwi = { 0 }; - - l_wait_event(mthread->t_ctl_waitq, - thread_is_running(athread) || - thread_is_stopped(athread), - &lwi); - if (unlikely(!thread_is_running(athread))) + wait_event_idle(mthread->t_ctl_waitq, + thread_is_running(athread) || + thread_is_stopped(athread) || + !thread_is_starting(mthread)); + if (unlikely(!thread_is_starting(mthread))) + /* stopped by race */ + rc = -ESRCH; + else if (unlikely(!thread_is_running(athread))) rc = lad->lad_assistant_status; else rc = 0; @@ -2181,16 +2568,11 @@ int lfsck_checkpoint_generic(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; struct ptlrpc_thread *athread = &lad->lad_thread; - struct l_wait_info lwi = { 0 }; - - if (com->lc_new_checked == 0) - return LFSCK_CHECKPOINT_SKIP; - l_wait_event(mthread->t_ctl_waitq, - list_empty(&lad->lad_req_list) || - !thread_is_running(mthread) || - thread_is_stopped(athread), - &lwi); + wait_event_idle(mthread->t_ctl_waitq, + list_empty(&lad->lad_req_list) || + !thread_is_running(mthread) || + thread_is_stopped(athread)); if (!thread_is_running(mthread) || thread_is_stopped(athread)) return LFSCK_CHECKPOINT_SKIP; @@ -2204,21 +2586,25 @@ void lfsck_post_generic(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct ptlrpc_thread *athread = &lad->lad_thread; struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; - struct l_wait_info lwi = { 0 }; lad->lad_post_result = *result; if (*result <= 0) - lad->lad_exit = 1; - lad->lad_to_post = 1; + set_bit(LAD_EXIT, &lad->lad_flags); + set_bit(LAD_TO_POST, &lad->lad_flags); + + CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s post, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result); wake_up_all(&athread->t_ctl_waitq); - l_wait_event(mthread->t_ctl_waitq, - (*result > 0 && list_empty(&lad->lad_req_list)) || - thread_is_stopped(athread), - &lwi); + wait_event_idle(mthread->t_ctl_waitq, + (*result > 0 && list_empty(&lad->lad_req_list)) || + thread_is_stopped(athread)); if (lad->lad_assistant_status < 0) *result = lad->lad_assistant_status; + + CDEBUG(D_LFSCK, "%s: the assistant has done %s post, rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result); } int lfsck_double_scan_generic(const struct lu_env *env, @@ -2227,18 +2613,24 @@ int lfsck_double_scan_generic(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; struct ptlrpc_thread *athread = &lad->lad_thread; - struct l_wait_info lwi = { 0 }; if (status != LS_SCANNING_PHASE2) - lad->lad_exit = 1; + set_bit(LAD_EXIT, &lad->lad_flags); else - lad->lad_to_double_scan = 1; + set_bit(LAD_TO_DOUBLE_SCAN, &lad->lad_flags); + + CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s double_scan, " + "status %d\n", + lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, status); wake_up_all(&athread->t_ctl_waitq); - l_wait_event(mthread->t_ctl_waitq, - lad->lad_in_double_scan || - thread_is_stopped(athread), - &lwi); + wait_event_idle(mthread->t_ctl_waitq, + test_bit(LAD_IN_DOUBLE_SCAN, &lad->lad_flags) || + thread_is_stopped(athread)); + + CDEBUG(D_LFSCK, "%s: the assistant has done %s double_scan, " + "status %d\n", lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, + lad->lad_assistant_status); if (lad->lad_assistant_status < 0) return lad->lad_assistant_status; @@ -2252,19 +2644,115 @@ void lfsck_quit_generic(const struct lu_env *env, struct lfsck_assistant_data *lad = com->lc_data; struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; struct ptlrpc_thread *athread = &lad->lad_thread; - struct l_wait_info lwi = { 0 }; - lad->lad_exit = 1; + set_bit(LAD_EXIT, &lad->lad_flags); wake_up_all(&athread->t_ctl_waitq); - l_wait_event(mthread->t_ctl_waitq, - thread_is_init(athread) || - thread_is_stopped(athread), - &lwi); + wait_event_idle(mthread->t_ctl_waitq, + thread_is_init(athread) || + thread_is_stopped(athread)); } -/* external interfaces */ +int lfsck_load_one_trace_file(const struct lu_env *env, + struct lfsck_component *com, + struct dt_object *parent, + struct dt_object **child, + const struct dt_index_features *ft, + const char *name, bool reset) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct dt_object *obj; + int rc; + ENTRY; + + if (*child != NULL) { + struct dt_it *it; + const struct dt_it_ops *iops; + struct lu_fid *fid = &lfsck_env_info(env)->lti_fid3; -int lfsck_get_speed(struct seq_file *m, struct dt_device *key) + if (!reset) + RETURN(0); + + obj = *child; + rc = obj->do_ops->do_index_try(env, obj, ft); + if (rc) + /* unlink by force */ + goto unlink; + + iops = &obj->do_index_ops->dio_it; + it = iops->init(env, obj, 0); + if (IS_ERR(it)) + /* unlink by force */ + goto unlink; + + fid_zero(fid); + rc = iops->get(env, it, (const struct dt_key *)fid); + if (rc >= 0) { + rc = iops->next(env, it); + iops->put(env, it); + } + iops->fini(env, it); + if (rc > 0) + /* "rc > 0" means the index file is empty. */ + RETURN(0); + +unlink: + /* The old index is not empty, remove it firstly. */ + rc = local_object_unlink(env, lfsck->li_bottom, parent, name); + CDEBUG_LIMIT(rc ? D_ERROR : D_LFSCK, + "%s: unlink lfsck sub trace file %s: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), name, rc); + if (rc) + RETURN(rc); + + if (*child) { + lfsck_object_put(env, *child); + *child = NULL; + } + } else if (reset) { + goto unlink; + } + + obj = local_index_find_or_create(env, lfsck->li_los, parent, name, + S_IFREG | S_IRUGO | S_IWUSR, ft); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + + rc = obj->do_ops->do_index_try(env, obj, ft); + if (rc) { + lfsck_object_put(env, obj); + CDEBUG(D_LFSCK, "%s: LFSCK fail to load " + "sub trace file %s: rc = %d\n", + lfsck_lfsck2name(com->lc_lfsck), name, rc); + } else { + *child = obj; + } + + RETURN(rc); +} + +int lfsck_load_sub_trace_files(const struct lu_env *env, + struct lfsck_component *com, + const struct dt_index_features *ft, + const char *prefix, bool reset) +{ + char *name = lfsck_env_info(env)->lti_key; + struct lfsck_sub_trace_obj *lsto; + int rc; + int i; + + for (i = 0, rc = 0, lsto = &com->lc_sub_trace_objs[0]; + i < LFSCK_STF_COUNT && rc == 0; i++, lsto++) { + snprintf(name, NAME_MAX, "%s_%02d", prefix, i); + rc = lfsck_load_one_trace_file(env, com, + com->lc_lfsck->li_lfsck_dir, + &lsto->lsto_obj, ft, name, reset); + } + + return rc; +} + +/* external interfaces */ +int lfsck_get_speed(char *buf, struct dt_device *key) { struct lu_env env; struct lfsck_instance *lfsck; @@ -2276,8 +2764,9 @@ int lfsck_get_speed(struct seq_file *m, struct dt_device *key) RETURN(rc); lfsck = lfsck_instance_find(key, true, false); - if (likely(lfsck != NULL)) { - seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_speed_limit); + if (lfsck && buf) { + rc = sprintf(buf, "%u\n", + lfsck->li_bookmark_ram.lb_speed_limit); lfsck_instance_put(&env, lfsck); } else { rc = -ENXIO; @@ -2289,7 +2778,7 @@ int lfsck_get_speed(struct seq_file *m, struct dt_device *key) } EXPORT_SYMBOL(lfsck_get_speed); -int lfsck_set_speed(struct dt_device *key, int val) +int lfsck_set_speed(struct dt_device *key, __u32 val) { struct lu_env env; struct lfsck_instance *lfsck; @@ -2317,7 +2806,7 @@ int lfsck_set_speed(struct dt_device *key, int val) } EXPORT_SYMBOL(lfsck_set_speed); -int lfsck_get_windows(struct seq_file *m, struct dt_device *key) +int lfsck_get_windows(char *buf, struct dt_device *key) { struct lu_env env; struct lfsck_instance *lfsck; @@ -2330,7 +2819,8 @@ int lfsck_get_windows(struct seq_file *m, struct dt_device *key) lfsck = lfsck_instance_find(key, true, false); if (likely(lfsck != NULL)) { - seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_async_windows); + rc = sprintf(buf, "%u\n", + lfsck->li_bookmark_ram.lb_async_windows); lfsck_instance_put(&env, lfsck); } else { rc = -ENXIO; @@ -2342,7 +2832,7 @@ int lfsck_get_windows(struct seq_file *m, struct dt_device *key) } EXPORT_SYMBOL(lfsck_get_windows); -int lfsck_set_windows(struct dt_device *key, int val) +int lfsck_set_windows(struct dt_device *key, unsigned int val) { struct lu_env env; struct lfsck_instance *lfsck; @@ -2355,12 +2845,10 @@ int lfsck_set_windows(struct dt_device *key, int val) lfsck = lfsck_instance_find(key, true, false); if (likely(lfsck != NULL)) { - if (val > LFSCK_ASYNC_WIN_MAX) { - CWARN("%s: Too large async window size, which " - "may cause memory issues. The valid range " - "is [0 - %u]. If you do not want to restrict " - "the window size for async requests pipeline, " - "just set it as 0.\n", + if (val < 1 || val > LFSCK_ASYNC_WIN_MAX) { + CWARN("%s: invalid async windows size that may " + "cause memory issues. The valid range is " + "[1 - %u].\n", lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX); rc = -EINVAL; } else if (lfsck->li_bookmark_ram.lb_async_windows != val) { @@ -2396,7 +2884,7 @@ int lfsck_dump(struct seq_file *m, struct dt_device *key, enum lfsck_type type) if (likely(lfsck != NULL)) { com = lfsck_component_find(lfsck, type); if (likely(com != NULL)) { - rc = com->lc_ops->lfsck_dump(&env, com, m); + com->lc_ops->lfsck_dump(&env, com, m); lfsck_component_put(&env, com); } else { rc = -ENOTSUPP; @@ -2437,16 +2925,15 @@ static int lfsck_stop_all(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_STOP; - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_status = stop->ls_status; lr->lr_version = bk->lb_version; lr->lr_active = LFSCK_TYPES_ALL; lr->lr_param = stop->ls_flags; - laia->laia_com = NULL; + memset(laia, 0, sizeof(*laia)); laia->laia_ltds = ltds; laia->laia_lr = lr; - laia->laia_result = 0; laia->laia_shared = 1; down_read(<ds->ltd_rw_sem); @@ -2468,7 +2955,7 @@ static int lfsck_stop_all(const struct lu_env *env, } up_read(<ds->ltd_rw_sem); - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); if (rc == 0) @@ -2497,39 +2984,48 @@ static int lfsck_start_all(const struct lu_env *env, struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; __u32 idx; int rc = 0; + bool retry = false; ENTRY; LASSERT(start->ls_flags & LPF_BROADCAST); - set = ptlrpc_prep_set(); - if (unlikely(set == NULL)) - RETURN(-ENOMEM); - memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_START; - lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_index = lfsck_dev_idx(lfsck); lr->lr_speed = bk->lb_speed_limit; lr->lr_version = bk->lb_version; lr->lr_active = start->ls_active; lr->lr_param = start->ls_flags; lr->lr_async_windows = bk->lb_async_windows; lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN | - LSV_ASYNC_WINDOWS; + LSV_ASYNC_WINDOWS | LSV_CREATE_OSTOBJ | + LSV_CREATE_MDTOBJ; - laia->laia_com = NULL; + memset(laia, 0, sizeof(*laia)); laia->laia_ltds = ltds; laia->laia_lr = lr; - laia->laia_result = 0; laia->laia_shared = 1; +again: + set = ptlrpc_prep_set(); + if (unlikely(!set)) + RETURN(-ENOMEM); + down_read(<ds->ltd_rw_sem); cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { ltd = lfsck_tgt_get(ltds, idx); LASSERT(ltd != NULL); + if (retry && !ltd->ltd_retry_start) { + lfsck_tgt_put(ltd); + continue; + } + laia->laia_ltd = ltd; + ltd->ltd_retry_start = 0; ltd->ltd_layout_done = 0; ltd->ltd_namespace_done = 0; + ltd->ltd_synced_failures = 0; rc = lfsck_async_request(env, ltd->ltd_exp, lr, set, lfsck_async_interpret, laia, LFSCK_NOTIFY); @@ -2550,12 +3046,23 @@ static int lfsck_start_all(const struct lu_env *env, RETURN(rc); } - rc = ptlrpc_set_wait(set); + rc = ptlrpc_set_wait(env, set); ptlrpc_set_destroy(set); if (rc == 0) rc = laia->laia_result; + if (unlikely(rc == -EINPROGRESS)) { + retry = true; + schedule_timeout_interruptible(cfs_time_seconds(1)); + set_current_state(TASK_RUNNING); + if (!signal_pending(current) && + thread_is_running(&lfsck->li_thread)) + goto again; + + rc = -EINTR; + } + if (rc != 0) { struct lfsck_stop *stop = &info->lti_stop; @@ -2580,33 +3087,54 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, struct lfsck_bookmark *bk; struct ptlrpc_thread *thread; struct lfsck_component *com; - struct l_wait_info lwi = { 0 }; struct lfsck_thread_args *lta; struct task_struct *task; + struct lfsck_tgt_descs *ltds; + struct lfsck_tgt_desc *ltd; + __u32 idx; int rc = 0; __u16 valid = 0; __u16 flags = 0; __u16 type = 1; ENTRY; + if (key->dd_rdonly) + RETURN(-EROFS); + lfsck = lfsck_instance_find(key, true, false); if (unlikely(lfsck == NULL)) RETURN(-ENXIO); + if (unlikely(lfsck->li_stopping)) + GOTO(put, rc = -ENXIO); + /* System is not ready, try again later. */ - if (unlikely(lfsck->li_namespace == NULL)) - GOTO(put, rc = -EAGAIN); + if (unlikely(lfsck->li_namespace == NULL || + lfsck_dev_site(lfsck)->ss_server_fld == NULL)) + GOTO(put, rc = -EINPROGRESS); /* start == NULL means auto trigger paused LFSCK. */ - if ((start == NULL) && - (list_empty(&lfsck->li_list_scan) || - OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO))) - GOTO(put, rc = 0); + if (!start) { + if (list_empty(&lfsck->li_list_scan) || + OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)) + GOTO(put, rc = 0); + } else if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) { + CERROR("%s: only allow to specify '-A | -o' via MDS\n", + lfsck_lfsck2name(lfsck)); + + GOTO(put, rc = -EPERM); + } bk = &lfsck->li_bookmark_ram; thread = &lfsck->li_thread; mutex_lock(&lfsck->li_mutex); spin_lock(&lfsck->li_lock); + if (unlikely(thread_is_stopping(thread))) { + /* Someone is stopping the LFSCK. */ + spin_unlock(&lfsck->li_lock); + GOTO(out, rc = -EBUSY); + } + if (!thread_is_init(thread) && !thread_is_stopped(thread)) { rc = -EALREADY; if (unlikely(start == NULL)) { @@ -2653,13 +3181,6 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, if (start == NULL) goto trigger; - if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) { - CERROR("%s: only allow to specify '-A | -o' via MDS\n", - lfsck_lfsck2name(lfsck)); - - GOTO(out, rc = -EPERM); - } - start->ls_version = bk->lb_version; if (start->ls_active != 0) { @@ -2729,6 +3250,38 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, } } + ltds = &lfsck->li_mdt_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + ltd->ltd_layout_done = 0; + ltd->ltd_namespace_done = 0; + ltd->ltd_synced_failures = 0; + lfsck_reset_ltd_status(ltd, LFSCK_TYPE_NAMESPACE); + lfsck_reset_ltd_status(ltd, LFSCK_TYPE_LAYOUT); + list_del_init(<d->ltd_layout_phase_list); + list_del_init(<d->ltd_layout_list); + list_del_init(<d->ltd_namespace_phase_list); + list_del_init(<d->ltd_namespace_list); + } + up_read(<ds->ltd_rw_sem); + + ltds = &lfsck->li_ost_descs; + down_read(<ds->ltd_rw_sem); + cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) { + ltd = lfsck_ltd2tgt(ltds, idx); + LASSERT(ltd != NULL); + + ltd->ltd_layout_done = 0; + ltd->ltd_synced_failures = 0; + lfsck_reset_ltd_status(ltd, LFSCK_TYPE_LAYOUT); + list_del_init(<d->ltd_layout_phase_list); + list_del_init(<d->ltd_layout_list); + } + up_read(<ds->ltd_rw_sem); + trigger: lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY | LUDA_TYPE; if (bk->lb_param & LPF_DRYRUN) @@ -2750,12 +3303,14 @@ trigger: flags |= DOIF_OUTUSED; lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid; - thread_set_flags(thread, 0); lta = lfsck_thread_args_init(lfsck, NULL, lsp); if (IS_ERR(lta)) GOTO(out, rc = PTR_ERR(lta)); __lfsck_set_speed(lfsck, bk->lb_speed_limit); + spin_lock(&lfsck->li_lock); + thread_set_flags(thread, SVC_STARTING); + spin_unlock(&lfsck->li_lock); task = kthread_run(lfsck_master_engine, lta, "lfsck"); if (IS_ERR(task)) { rc = PTR_ERR(task); @@ -2766,10 +3321,9 @@ trigger: GOTO(out, rc); } - l_wait_event(thread->t_ctl_waitq, - thread_is_running(thread) || - thread_is_stopped(thread), - &lwi); + wait_event_idle(thread->t_ctl_waitq, + thread_is_running(thread) || + thread_is_stopped(thread)); if (start == NULL || !(start->ls_flags & LPF_BROADCAST)) { lfsck->li_start_unplug = 1; wake_up_all(&thread->t_ctl_waitq); @@ -2792,9 +3346,8 @@ trigger: lfsck->li_start_unplug = 1; wake_up_all(&thread->t_ctl_waitq); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopped(thread), - &lwi); + wait_event_idle(thread->t_ctl_waitq, + thread_is_stopped(thread)); } } else { lfsck->li_start_unplug = 1; @@ -2818,7 +3371,6 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, { struct lfsck_instance *lfsck; struct ptlrpc_thread *thread; - struct l_wait_info lwi = { 0 }; int rc = 0; int rc1 = 0; ENTRY; @@ -2828,27 +3380,26 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, RETURN(-ENXIO); thread = &lfsck->li_thread; - /* release lfsck::li_mutex to avoid deadlock. */ - if (stop != NULL && stop->ls_flags & LPF_BROADCAST) { - if (!lfsck->li_master) { - CERROR("%s: only allow to specify '-A' via MDS\n", - lfsck_lfsck2name(lfsck)); - - GOTO(out, rc = -EPERM); - } - - rc1 = lfsck_stop_all(env, lfsck, stop); + if (stop && stop->ls_flags & LPF_BROADCAST && !lfsck->li_master) { + CERROR("%s: only allow to specify '-A' via MDS\n", + lfsck_lfsck2name(lfsck)); + GOTO(put, rc = -EPERM); } - mutex_lock(&lfsck->li_mutex); spin_lock(&lfsck->li_lock); - /* no error if LFSCK is already stopped, or was never started */ - if (thread_is_init(thread) || thread_is_stopped(thread)) { - spin_unlock(&lfsck->li_lock); - GOTO(out, rc = 0); - } + /* The target is umounted */ + if (stop && stop->ls_status == LS_PAUSED) + lfsck->li_stopping = 1; + + if (thread_is_init(thread) || thread_is_stopped(thread)) + /* no error if LFSCK stopped already, or not started */ + GOTO(unlock, rc = 0); + + if (thread_is_stopping(thread)) + /* Someone is stopping LFSCK. */ + GOTO(unlock, rc = -EINPROGRESS); - if (stop != NULL) { + if (stop) { lfsck->li_status = stop->ls_status; lfsck->li_flags = stop->ls_flags; } else { @@ -2857,23 +3408,76 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, } thread_set_flags(thread, SVC_STOPPING); - spin_unlock(&lfsck->li_lock); + + LASSERT(lfsck->li_task != NULL); + cfs_force_sig(SIGINT, lfsck->li_task); + + if (lfsck->li_master) { + struct lfsck_component *com; + struct lfsck_assistant_data *lad; + + list_for_each_entry(com, &lfsck->li_list_scan, lc_link) { + lad = com->lc_data; + spin_lock(&lad->lad_lock); + if (lad->lad_task != NULL) + cfs_force_sig(SIGINT, lad->lad_task); + spin_unlock(&lad->lad_lock); + } + + list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) { + lad = com->lc_data; + spin_lock(&lad->lad_lock); + if (lad->lad_task != NULL) + cfs_force_sig(SIGINT, lad->lad_task); + spin_unlock(&lad->lad_lock); + } + } wake_up_all(&thread->t_ctl_waitq); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopped(thread), - &lwi); + spin_unlock(&lfsck->li_lock); + if (stop && stop->ls_flags & LPF_BROADCAST) + rc1 = lfsck_stop_all(env, lfsck, stop); - GOTO(out, rc = 0); + /* It was me set the status as 'stopping' just now, if it is not + * 'stopping' now, then either stopped, or re-started by race. */ + wait_event_idle(thread->t_ctl_waitq, + !thread_is_stopping(thread)); -out: - mutex_unlock(&lfsck->li_mutex); + GOTO(put, rc = 0); + +unlock: + spin_unlock(&lfsck->li_lock); +put: lfsck_instance_put(env, lfsck); return rc != 0 ? rc : rc1; } EXPORT_SYMBOL(lfsck_stop); +int lfsck_in_notify_local(const struct lu_env *env, struct dt_device *key, + struct lfsck_req_local *lrl, struct thandle *th) +{ + struct lfsck_instance *lfsck; + struct lfsck_component *com; + int rc = -EOPNOTSUPP; + ENTRY; + + lfsck = lfsck_instance_find(key, true, false); + if (unlikely(!lfsck)) + RETURN(-ENXIO); + + com = lfsck_component_find(lfsck, lrl->lrl_active); + if (likely(com && com->lc_ops->lfsck_in_notify_local)) { + rc = com->lc_ops->lfsck_in_notify_local(env, com, lrl, th); + lfsck_component_put(env, com); + } + + lfsck_instance_put(env, lfsck); + + RETURN(rc); +} +EXPORT_SYMBOL(lfsck_in_notify_local); + int lfsck_in_notify(const struct lu_env *env, struct dt_device *key, struct lfsck_request *lr) { @@ -2910,9 +3514,10 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key, } case LE_PHASE1_DONE: case LE_PHASE2_DONE: - case LE_FID_ACCESSED: case LE_PEER_EXIT: case LE_CONDITIONAL_DESTROY: + case LE_SET_LMV_MASTER: + case LE_SET_LMV_SLAVE: case LE_PAIRS_VERIFY: { struct lfsck_instance *lfsck; struct lfsck_component *com; @@ -2922,7 +3527,7 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key, RETURN(-ENXIO); com = lfsck_component_find(lfsck, lr->lr_active); - if (likely(com != NULL)) { + if (likely(com)) { rc = com->lc_ops->lfsck_in_notify(env, com, lr); lfsck_component_put(env, com); } @@ -2939,28 +3544,94 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key, EXPORT_SYMBOL(lfsck_in_notify); int lfsck_query(const struct lu_env *env, struct dt_device *key, - struct lfsck_request *lr) + struct lfsck_request *req, struct lfsck_reply *rep, + struct lfsck_query *que) { struct lfsck_instance *lfsck; struct lfsck_component *com; - int rc; + int i; + int rc = 0; + __u16 type; ENTRY; lfsck = lfsck_instance_find(key, true, false); if (unlikely(lfsck == NULL)) RETURN(-ENXIO); - com = lfsck_component_find(lfsck, lr->lr_active); - if (likely(com != NULL)) { - rc = com->lc_ops->lfsck_query(env, com); - lfsck_component_put(env, com); + if (que != NULL) { + if (que->lu_types == LFSCK_TYPES_ALL) + que->lu_types = + LFSCK_TYPES_SUPPORTED & ~LFSCK_TYPE_SCRUB; + + if (que->lu_types & ~LFSCK_TYPES_SUPPORTED) { + que->lu_types &= ~LFSCK_TYPES_SUPPORTED; + + GOTO(out, rc = -ENOTSUPP); + } + + for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS; + i++, type = 1 << i) { + if (!(que->lu_types & type)) + continue; + +again: + com = lfsck_component_find(lfsck, type); + if (unlikely(com == NULL)) + GOTO(out, rc = -ENOTSUPP); + + memset(que->lu_mdts_count[i], 0, + sizeof(__u32) * (LS_MAX + 1)); + memset(que->lu_osts_count[i], 0, + sizeof(__u32) * (LS_MAX + 1)); + que->lu_repaired[i] = 0; + rc = com->lc_ops->lfsck_query(env, com, req, rep, + que, i); + lfsck_component_put(env, com); + if (rc < 0) + GOTO(out, rc); + } + + if (!(que->lu_flags & LPF_WAIT)) + GOTO(out, rc); + + for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS; + i++, type = 1 << i) { + if (!(que->lu_types & type)) + continue; + + if (que->lu_mdts_count[i][LS_SCANNING_PHASE1] != 0 || + que->lu_mdts_count[i][LS_SCANNING_PHASE2] != 0 || + que->lu_osts_count[i][LS_SCANNING_PHASE1] != 0 || + que->lu_osts_count[i][LS_SCANNING_PHASE2] != 0) { + /* If it is required to wait, then sleep + * 3 seconds and try to query again. + */ + unsigned long timeout = + msecs_to_jiffies(3000) + 1; + while (timeout && + !fatal_signal_pending(current)) + timeout = schedule_timeout_killable( + timeout); + if (timeout == 0) + goto again; + } + } } else { - rc = -ENOTSUPP; + com = lfsck_component_find(lfsck, req->lr_active); + if (likely(com != NULL)) { + rc = com->lc_ops->lfsck_query(env, com, req, rep, + que, -1); + lfsck_component_put(env, com); + } else { + rc = -ENOTSUPP; + } } - lfsck_instance_put(env, lfsck); + GOTO(out, rc); - RETURN(rc); +out: + lfsck_instance_put(env, lfsck); + return rc; } EXPORT_SYMBOL(lfsck_query); @@ -3007,6 +3678,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, INIT_LIST_HEAD(&lfsck->li_list_dir); INIT_LIST_HEAD(&lfsck->li_list_double_scan); INIT_LIST_HEAD(&lfsck->li_list_idle); + INIT_LIST_HEAD(&lfsck->li_list_lmv); atomic_set(&lfsck->li_ref, 1); atomic_set(&lfsck->li_double_scan_count, 0); init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq); @@ -3045,13 +3717,13 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, lfsck->li_local_root_fid = *fid; if (master) { lfsck->li_master = 1; - if (lfsck_dev_idx(key) == 0) { + if (lfsck_dev_idx(lfsck) == 0) { struct lu_fid *pfid = &lfsck_env_info(env)->lti_fid2; const struct lu_name *cname; rc = dt_lookup(env, root, (struct dt_rec *)(&lfsck->li_global_root_fid), - (const struct dt_key *)"ROOT", BYPASS_CAPA); + (const struct dt_key *)"ROOT"); if (rc != 0) GOTO(out, rc); @@ -3059,42 +3731,47 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); + if (unlikely(!dt_try_as_dir(env, obj))) + GOTO(out, rc = -ENOTDIR); + rc = dt_lookup(env, obj, (struct dt_rec *)fid, - (const struct dt_key *)dotlustre, BYPASS_CAPA); + (const struct dt_key *)dotlustre); if (rc != 0) GOTO(out, rc); - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); obj = dt_locate(env, key, fid); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); cname = lfsck_name_get_const(env, dotlustre, strlen(dotlustre)); - rc = lfsck_verify_linkea(env, key, obj, cname, + rc = lfsck_verify_linkea(env, obj, cname, &lfsck->li_global_root_fid); if (rc != 0) GOTO(out, rc); + if (unlikely(!dt_try_as_dir(env, obj))) + GOTO(out, rc = -ENOTDIR); + *pfid = *fid; rc = dt_lookup(env, obj, (struct dt_rec *)fid, - (const struct dt_key *)lostfound, - BYPASS_CAPA); + (const struct dt_key *)lostfound); if (rc != 0) GOTO(out, rc); - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); obj = dt_locate(env, key, fid); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); cname = lfsck_name_get_const(env, lostfound, strlen(lostfound)); - rc = lfsck_verify_linkea(env, key, obj, cname, pfid); + rc = lfsck_verify_linkea(env, obj, cname, pfid); if (rc != 0) GOTO(out, rc); - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); obj = NULL; } } @@ -3106,12 +3783,18 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); - lu_object_get(&obj->do_lu); - lfsck->li_obj_oit = obj; rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features); if (rc != 0) GOTO(out, rc); + lfsck->li_obj_oit = obj; + obj = local_file_find_or_create(env, lfsck->li_los, root, LFSCK_DIR, + S_IFDIR | S_IRUGO | S_IWUSR); + if (IS_ERR(obj)) + GOTO(out, rc = PTR_ERR(obj)); + + lu_object_get(&obj->do_lu); + lfsck->li_lfsck_dir = obj; rc = lfsck_bookmark_setup(env, lfsck); if (rc != 0) GOTO(out, rc); @@ -3137,9 +3820,9 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, rc = lfsck_add_target_from_orphan(env, lfsck); out: if (obj != NULL && !IS_ERR(obj)) - lu_object_put(env, &obj->do_lu); + lfsck_object_put(env, obj); if (root != NULL && !IS_ERR(root)) - lu_object_put(env, &root->do_lu); + lfsck_object_put(env, root); if (rc != 0) lfsck_instance_cleanup(env, lfsck); return rc; @@ -3246,7 +3929,7 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key, if (unlikely(index >= ltds->ltd_tgts_bitmap->size)) goto unlock; - ltd = LTD_TGT(ltds, index); + ltd = lfsck_ltd2tgt(ltds, index); if (unlikely(ltd == NULL)) goto unlock; @@ -3254,7 +3937,7 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key, ltds->ltd_tgtnr--; cfs_bitmap_clear(ltds->ltd_tgts_bitmap, index); - LTD_TGT(ltds, index) = NULL; + lfsck_assign_tgt(ltds, NULL, index); unlock: if (ltd == NULL) { @@ -3289,12 +3972,10 @@ static int __init lfsck_init(void) { int rc; - INIT_LIST_HEAD(&lfsck_instance_list); - INIT_LIST_HEAD(&lfsck_ost_orphan_list); - INIT_LIST_HEAD(&lfsck_mdt_orphan_list); lfsck_key_init_generic(&lfsck_thread_key, NULL); rc = lu_context_key_register(&lfsck_thread_key); - if (rc == 0) { + if (!rc) { + tgt_register_lfsck_in_notify_local(lfsck_in_notify_local); tgt_register_lfsck_in_notify(lfsck_in_notify); tgt_register_lfsck_query(lfsck_query); } @@ -3324,8 +4005,10 @@ static void __exit lfsck_exit(void) lu_context_key_degister(&lfsck_thread_key); } -MODULE_AUTHOR("Intel Corporation "); -MODULE_DESCRIPTION("LFSCK"); +MODULE_AUTHOR("OpenSFS, Inc. "); +MODULE_DESCRIPTION("Lustre File System Checker"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); -cfs_module(lfsck, LUSTRE_VERSION_STRING, lfsck_init, lfsck_exit); +module_init(lfsck_init); +module_exit(lfsck_exit);