From: Fan Yong Date: Sun, 19 May 2013 04:26:37 +0000 (+0800) Subject: LU-2915 lfsck: LFSCK 1.5 technical debts (2) X-Git-Tag: 2.4.51~21 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f5491211ed7251a45e4e6916539cd2a2ab5e6038 LU-2915 lfsck: LFSCK 1.5 technical debts (2) This patch resolves some LFSCK 1.5 technical debts, including: 1) LFSCK does NOT hold spin_lock when call dt iteration API(s). 2) Remove the hack of using dt_it_ops::put() method for events notification to low layer otable-based iteration. Test-Parameters: testlist=sanity-scrub,sanity-lfsck Signed-off-by: Fan Yong Change-Id: Ie16161370ac218860a74b23d853c4c378adce4ec Reviewed-on: http://review.whamcloud.com/6343 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger --- diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index ac7fb1c..c511e62 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -42,10 +42,10 @@ #include "lfsck_internal.h" -static void lfsck_unpack_ent(struct lu_dirent *ent) +static void lfsck_unpack_ent(struct lu_dirent *ent, __u64 *cookie) { fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid); - ent->lde_hash = le64_to_cpu(ent->lde_hash); + *cookie = le64_to_cpu(ent->lde_hash); ent->lde_reclen = le16_to_cpu(ent->lde_reclen); ent->lde_namelen = le16_to_cpu(ent->lde_namelen); ent->lde_attrs = le32_to_cpu(ent->lde_attrs); @@ -56,6 +56,33 @@ static void lfsck_unpack_ent(struct lu_dirent *ent) ent->lde_name[ent->lde_namelen] = 0; } +static void lfsck_di_oit_put(const struct lu_env *env, struct lfsck_instance *lfsck) +{ + const struct dt_it_ops *iops; + struct dt_it *di; + + spin_lock(&lfsck->li_lock); + iops = &lfsck->li_obj_oit->do_index_ops->dio_it; + di = lfsck->li_di_oit; + lfsck->li_di_oit = NULL; + spin_unlock(&lfsck->li_lock); + iops->put(env, di); +} + +static void lfsck_di_dir_put(const struct lu_env *env, struct lfsck_instance *lfsck) +{ + const struct dt_it_ops *iops; + struct dt_it *di; + + spin_lock(&lfsck->li_lock); + iops = &lfsck->li_obj_dir->do_index_ops->dio_it; + di = lfsck->li_di_dir; + lfsck->li_di_dir = NULL; + lfsck->li_cookie_dir = 0; + spin_unlock(&lfsck->li_lock); + iops->put(env, di); +} + static void lfsck_close_dir(const struct lu_env *env, struct lfsck_instance *lfsck) { @@ -63,11 +90,7 @@ static void lfsck_close_dir(const struct lu_env *env, const struct dt_it_ops *dir_iops = &dir_obj->do_index_ops->dio_it; struct dt_it *dir_di = lfsck->li_di_dir; - spin_lock(&lfsck->li_lock); - lfsck->li_di_dir = NULL; - spin_unlock(&lfsck->li_lock); - - dir_iops->put(env, dir_di); + lfsck_di_dir_put(env, lfsck); dir_iops->fini(env, dir_di); lfsck->li_obj_dir = NULL; lfsck_object_put(env, dir_obj); @@ -104,6 +127,7 @@ static int lfsck_master_dir_engine(const struct lu_env *env, lfsck->li_new_scanned++; rc = iops->rec(env, di, (struct dt_rec *)ent, lfsck->li_args_dir); + lfsck_unpack_ent(ent, &lfsck->li_cookie_dir); if (rc != 0) { lfsck_fail(env, lfsck, true); if (bk->lb_param & LPF_FAILOUT) @@ -112,7 +136,6 @@ static int lfsck_master_dir_engine(const struct lu_env *env, goto checkpoint; } - lfsck_unpack_ent(ent); if (ent->lde_attrs & LUDA_IGNORE) goto checkpoint; @@ -320,19 +343,13 @@ int lfsck_master_engine(void *args) PFID(&lfsck->li_pos_current.lp_dir_parent), cfs_curproc_pid(), rc); - if (lfsck->li_paused && cfs_list_empty(&lfsck->li_list_scan)) - oit_iops->put(&env, oit_di); - if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH)) rc = lfsck_post(&env, lfsck, rc); if (lfsck->li_di_dir != NULL) lfsck_close_dir(&env, lfsck); fini_oit: - spin_lock(&lfsck->li_lock); - lfsck->li_di_oit = NULL; - spin_unlock(&lfsck->li_lock); - + lfsck_di_oit_put(&env, lfsck); oit_iops->fini(&env, oit_di); if (rc == 1) { if (!cfs_list_empty(&lfsck->li_list_double_scan)) diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 0a0e5e6..e0efdc1 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -80,7 +80,7 @@ enum lfsck_status { }; enum lfsck_flags { - /* Finish to the cycle scanning. */ + /* Finish the first cycle scanning. */ LF_SCANNED_ONCE = 0x00000001ULL, /* There is some namespace inconsistency. */ @@ -312,6 +312,9 @@ struct lfsck_instance { /* It for directory traversal */ struct dt_it *li_di_dir; + /* namespace-based directory traversal position. */ + __u64 li_cookie_dir; + /* Arguments for low layer otable-based iteration. */ __u32 li_args_oit; diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 83e9c54..1a62adf 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -340,9 +340,7 @@ void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck, { const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it; - spin_lock(&lfsck->li_lock); if (unlikely(lfsck->li_di_oit == NULL)) { - spin_unlock(&lfsck->li_lock); memset(pos, 0, sizeof(*pos)); return; } @@ -369,7 +367,6 @@ void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck, fid_zero(&pos->lp_dir_parent); pos->lp_dir_cookie = 0; } - spin_unlock(&lfsck->li_lock); } static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit) @@ -633,6 +630,7 @@ int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck) } lfsck->li_obj_dir = lfsck_object_get(obj); + lfsck->li_cookie_dir = iops->store(env, di); spin_lock(&lfsck->li_lock); lfsck->li_di_dir = di; spin_unlock(&lfsck->li_lock); @@ -707,6 +705,7 @@ int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck, } lfsck->li_obj_dir = lfsck_object_get(obj); + lfsck->li_cookie_dir = iops->store(env, di); spin_lock(&lfsck->li_lock); lfsck->li_di_dir = di; spin_unlock(&lfsck->li_lock); @@ -1060,11 +1059,6 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, bool pause) if (pause) lfsck->li_paused = 1; thread_set_flags(thread, SVC_STOPPING); - /* The LFSCK thread may be sleeping on low layer wait queue, - * wake it up. */ - if (likely(lfsck->li_di_oit != NULL)) - lfsck->li_obj_oit->do_index_ops->dio_it.put(env, - lfsck->li_di_oit); spin_unlock(&lfsck->li_lock); cfs_waitq_broadcast(&thread->t_ctl_waitq); diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index 1ebb714..9d2950c 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -633,11 +633,11 @@ static int lfsck_namespace_reset(const struct lu_env *env, if (IS_ERR(dto)) GOTO(out, rc = PTR_ERR(dto)); + com->lc_obj = dto; rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features); if (rc != 0) GOTO(out, rc); - com->lc_obj = dto; rc = lfsck_namespace_store(env, com, true); GOTO(out, rc); @@ -1105,6 +1105,7 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, if (ns->ln_status == LS_SCANNING_PHASE1) { struct lfsck_position pos; + const struct dt_it_ops *iops; cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; __u64 checked = ns->ln_items_checked + com->lc_new_checked; @@ -1155,7 +1156,34 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, buf += rc; len -= rc; - lfsck_pos_fill(env, lfsck, &pos, false); + + LASSERT(lfsck->li_di_oit != NULL); + + iops = &lfsck->li_obj_oit->do_index_ops->dio_it; + + /* The low layer otable-based iteration position may NOT + * exactly match the namespace-based directory traversal + * cookie. Generally, it is not a serious issue. But the + * caller should NOT make assumption on that. */ + pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit); + if (!lfsck->li_current_oit_processed) + pos.lp_oit_cookie--; + + spin_lock(&lfsck->li_lock); + if (lfsck->li_di_dir != NULL) { + pos.lp_dir_cookie = lfsck->li_cookie_dir; + if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) { + fid_zero(&pos.lp_dir_parent); + pos.lp_dir_cookie = 0; + } else { + pos.lp_dir_parent = + *lu_object_fid(&lfsck->li_obj_dir->do_lu); + } + } else { + fid_zero(&pos.lp_dir_parent); + pos.lp_dir_cookie = 0; + } + spin_unlock(&lfsck->li_lock); rc = lfsck_pos_dump(&buf, &len, &pos, "current_position"); if (rc <= 0) goto out; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index f72dc05..9943631 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -5044,10 +5044,10 @@ static inline int osd_it_ea_rec(const struct lu_env *env, if (unlikely(ino == osd_sb(dev)->s_root->d_inode->i_ino)) { attr |= LUDA_IGNORE; rc = 0; - goto pack; + } else { + rc = osd_dirent_check_repair(env, obj, it, fid, id, + &attr); } - - rc = osd_dirent_check_repair(env, obj, it, fid, id, &attr); } else { attr &= ~LU_DIRENT_ATTRS_MASK; if (!fid_is_sane(fid)) { @@ -5060,15 +5060,15 @@ static inline int osd_it_ea_rec(const struct lu_env *env, } } - if (rc < 0) - RETURN(rc); - -pack: + /* Pack the entry anyway, at least the offset is right. */ osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off, it->oie_dirent->oied_name, it->oie_dirent->oied_namelen, it->oie_dirent->oied_type, attr); + if (rc < 0) + RETURN(rc); + if (osd_remote_fid(env, dev, fid)) RETURN(0); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 5aec25b..2062ccb 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -177,7 +177,7 @@ struct osd_mdobj_map { #define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \ ldiskfs_add_entry(handle, child, cinode, hlock) -#define OSD_OTABLE_IT_CACHE_SIZE 128 +#define OSD_OTABLE_IT_CACHE_SIZE 64 #define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1)) struct osd_inconsistent_item { @@ -209,7 +209,6 @@ struct osd_otable_cache { struct osd_otable_it { struct osd_device *ooi_dev; - pid_t ooi_pid; struct osd_otable_cache ooi_cache; /* The following bits can be updated/checked w/o lock protection. @@ -221,9 +220,7 @@ struct osd_otable_it { * filled into cache. */ ooi_user_ready:1, /* The user out of OSD is * ready to iterate. */ - ooi_waiting:1, /* it::next is waiting. */ - ooi_stopping:1; /* Someone is stopping - * the iteration. */ + ooi_waiting:1; /* it::next is waiting. */ }; extern const int osd_dto_credits_noquota[]; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 53dd70b..475fbca 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -973,18 +973,10 @@ static int osd_inode_iteration(struct osd_thread_info *info, brelse(param.bitmap); RETURN(rc); } - - if (preload && dev->od_otable_it->ooi_stopping) { - brelse(param.bitmap); - RETURN(0); - } } next_group: brelse(param.bitmap); - - if (preload && dev->od_otable_it->ooi_stopping) - RETURN(0); } if (*pos > limit) @@ -1822,7 +1814,6 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env, dev->od_otable_it = it; it->ooi_dev = dev; - it->ooi_pid = cfs_curproc_pid(); it->ooi_cache.ooc_consumer_idx = -1; if (flags & DOIF_OUTUSED) it->ooi_used_outside = 1; @@ -1874,30 +1865,8 @@ static int osd_otable_it_get(const struct lu_env *env, return 0; } -/** - * It is hack here: - * - * Sometimes the otable-based iteration driver (LFSCK) may be blocked in OSD - * layer when someone wants to stop/pause the iteration. Under such case, we - * need some mechanism to notify the event and wakeup the blocker. - */ static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di) { - struct osd_otable_it *it = (struct osd_otable_it *)di; - struct osd_device *dev = it->ooi_dev; - - /* od_otable_mutex: prevent curcurrent init/fini */ - mutex_lock(&dev->od_otable_mutex); - if (it->ooi_pid == cfs_curproc_pid()) { - dev->od_scrub.os_paused = 1; - } else { - struct ptlrpc_thread *thread = &dev->od_scrub.os_thread; - - it->ooi_stopping = 1; - if (it->ooi_waiting) - cfs_waitq_broadcast(&thread->t_ctl_waitq); - } - mutex_unlock(&dev->od_otable_mutex); } static inline int @@ -1905,7 +1874,7 @@ osd_otable_it_wakeup(struct osd_scrub *scrub, struct osd_otable_it *it) { spin_lock(&scrub->os_lock); if (it->ooi_cache.ooc_pos_preload < scrub->os_pos_current || - scrub->os_waiting || it->ooi_stopping || + scrub->os_waiting || !thread_is_running(&scrub->os_thread)) it->ooi_waiting = 0; else @@ -1961,9 +1930,6 @@ again: if (!thread_is_running(thread) && !it->ooi_used_outside) RETURN(1); - if (it->ooi_stopping) - RETURN(0); - rc = osd_otable_it_preload(env, it); if (rc >= 0) goto again; diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 9d12686..e8e13fa 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -498,7 +498,7 @@ test_8() { error "(4) Expect 'inconsistent', but got '$FLAGS'" #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 $START_SCRUB || error "(5) Fail to start OI scrub!" @@ -639,7 +639,7 @@ test_10a() { do_facet $SINGLEMDS \ $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || error "(6) File diff failed unexpected!" @@ -684,6 +684,7 @@ test_10a() { } run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" +# test_10b is obsolete, it will be coverded by related sanity-lfsck tests. test_10b() { scrub_prep 0 mds_backup_restore || error "(1) Fail to backup/restore!" @@ -742,7 +743,7 @@ test_10b() { FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(14) Expect empty flags, but got '$FLAGS'" } -run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" +#run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" test_11() { echo "stopall"