From 64af83190180a8f3e90aea270e2baaecb1ecdb93 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Mon, 25 Feb 2013 22:19:26 +0800 Subject: [PATCH] LU-3039 lfsck: misc patch for LFSCK 1.5 debts (1) 1) Handle backup and restore case: add FID-in-dirent by re-insert the name entry with proper ldiskfs PDO lock processed. 2) Fix some deadlock cases between LFSCK engine thread and OI scrub thread: one may fall into wait without waking up the other. 3) lfsck performance test for the cases: lfsck with load, lfsck during create, backup/restore, simulate upgrade from 1.8. 4) Other cleanup. Test-Parameters: testlist=sanity-scrub,sanity-lfsck,lfsck-performance Signed-off-by: Fan Yong Change-Id: Ib539291c604d807475cacfdd56d910e9e86d6ac7 Reviewed-on: http://review.whamcloud.com/5764 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Niu Yawei Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_fid.h | 7 +- lustre/mdd/mdd_compat.c | 2 +- lustre/mdd/mdd_dir.c | 8 +- lustre/mdd/mdd_lfsck.c | 13 +- lustre/mdt/mdt_handler.c | 2 +- lustre/mdt/mdt_reint.c | 3 +- lustre/osd-ldiskfs/osd_handler.c | 96 +++++++---- lustre/osd-ldiskfs/osd_internal.h | 4 + lustre/osd-ldiskfs/osd_scrub.c | 78 ++++++--- lustre/tests/Makefile.am | 2 +- lustre/tests/lfsck-performance.sh | 355 ++++++++++++++++++++++++++++++++++++++ lustre/tests/sanity-lfsck.sh | 6 +- lustre/tests/scrub-performance.sh | 10 +- lustre/tests/test-framework.sh | 8 +- 14 files changed, 513 insertions(+), 81 deletions(-) create mode 100644 lustre/tests/lfsck-performance.sh diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index d662c3f..c52da03 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -290,7 +290,7 @@ static inline int fid_is_quota(const struct lu_fid *fid) fid_seq(fid) == FID_SEQ_QUOTA_GLB; } -static inline int fid_is_client_mdt_visible(const struct lu_fid *fid) +static inline int fid_is_namespace_visible(const struct lu_fid *fid) { const __u64 seq = fid_seq(fid); @@ -301,11 +301,6 @@ static inline int fid_is_client_mdt_visible(const struct lu_fid *fid) fid_is_root(fid) || fid_is_dot_lustre(fid); } -static inline int fid_is_client_visible(const struct lu_fid *fid) -{ - return fid_is_client_mdt_visible(fid) || fid_is_idif(fid); -} - static inline int fid_seq_in_fldb(__u64 seq) { return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) || diff --git a/lustre/mdd/mdd_compat.c b/lustre/mdd/mdd_compat.c index fed84e3..64e3ced 100644 --- a/lustre/mdd/mdd_compat.c +++ b/lustre/mdd/mdd_compat.c @@ -359,7 +359,7 @@ int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd) LUSTRE_OSD_ZFS_NAME) != 0) { CERROR("%s: "DFID" is used on ldiskfs?!\n", mdd2obd_dev(mdd)->obd_name, PFID(&mdd->mdd_root_fid)); - RETURN(-ENOTSUPP); + GOTO(out, rc = -ENOTSUPP); } LCONSOLE_INFO("%s: FID of /ROOT has been changed. " diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index b352b6c..46c1160 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -2007,7 +2007,11 @@ static int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj, } else if (rc == -ENODATA || rc == -EOPNOTSUPP) { /* If there are no default ACL, fix mode by mask */ struct lu_ucred *uc = lu_ucred(env); - la->la_mode &= ~uc->uc_umask; + + /* The create triggered by MDT internal events, such as + * LFSCK reset, will not contain valid "uc". */ + if (unlikely(uc != NULL)) + la->la_mode &= ~uc->uc_umask; rc = 0; } @@ -2270,7 +2274,7 @@ cleanup: mdd_pdo_write_unlock(env, mdd_pobj, dlh); out_trans: - if (rc == 0 && fid_is_client_mdt_visible(mdo2fid(son))) + if (rc == 0 && fid_is_namespace_visible(mdo2fid(son))) rc = mdd_changelog_ns_store(env, mdd, S_ISDIR(attr->la_mode) ? CL_MKDIR : S_ISREG(attr->la_mode) ? CL_CREATE : diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index f012418..dd8979e 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -261,13 +261,12 @@ static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck, LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START); - if (pos->lp_dir_cookie == MDS_DIR_END_OFF) - LASSERT(dir_processed); - - /* For the dir which just to be processed, - * lp_dir_cookie will become MDS_DIR_DUMMY_START, - * which can be correctly handled by mdd_lfsck_prep. */ - if (!dir_processed) + if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) + pos->lp_dir_cookie = MDS_DIR_END_OFF; + else if (!dir_processed) + /* For the dir which just to be processed, + * lp_dir_cookie will become MDS_DIR_DUMMY_START, + * which can be correctly handled by mdd_lfsck_prep. */ pos->lp_dir_cookie--; } else { fid_zero(&pos->lp_dir_parent); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 0856f78..7610285 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5688,7 +5688,7 @@ static int mdt_fid2path(struct mdt_thread_info *info, if (!fid_is_sane(&fp->gf_fid)) RETURN(-EINVAL); - if (!fid_is_client_mdt_visible(&fp->gf_fid)) { + if (!fid_is_namespace_visible(&fp->gf_fid)) { CWARN("%s: "DFID" is invalid, sequence should be " ">= "LPX64"\n", obd->obd_name, PFID(&fp->gf_fid), (__u64)FID_SEQ_NORMAL); diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 18bea87..17935a9 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -502,7 +502,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); - if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1)) + if ((fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1)) && + !OBD_FAIL_CHECK(OBD_FAIL_OSD_FID_MAPPING)) RETURN(-EPERM); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 040ff74..2aa7636 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2224,9 +2224,6 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode, if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA)) return 0; - if (OBD_FAIL_CHECK(OBD_FAIL_FID_IGIF) && fid_is_client_visible(fid)) - return 0; - lustre_lma_init(lma, fid, flags); lustre_lma_swab(lma); @@ -2249,7 +2246,8 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode, void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param, const struct dt_rec *fid) { - if (!fid_is_client_mdt_visible((const struct lu_fid *)fid)) { + if (!fid_is_namespace_visible((const struct lu_fid *)fid) || + OBD_FAIL_CHECK(OBD_FAIL_FID_IGIF)) { param->edp_magic = 0; return; } @@ -2415,6 +2413,10 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); result = __osd_object_create(info, obj, attr, hint, dof, th); + + if (OBD_FAIL_CHECK(OBD_FAIL_FID_IGIF) && !fid_is_internal(fid)) + return result; + if ((result == 0) && (fid_is_last_id(fid) || !fid_is_on_ost(info, osd_dt_dev(th->th_dev), fid))) @@ -4560,6 +4562,19 @@ osd_dirent_has_space(__u16 reclen, __u16 namelen, unsigned blocksize) return 0; } +static inline int +osd_dot_dotdot_has_space(struct ldiskfs_dir_entry_2 *de, int dot_dotdot) +{ + LASSERTF(dot_dotdot == 1 || dot_dotdot == 2, + "dot_dotdot = %d\n", dot_dotdot); + + if (LDISKFS_DIR_REC_LEN(de) >= + __LDISKFS_DIR_REC_LEN(dot_dotdot + 1 + sizeof(struct osd_fid_pack))) + return 1; + else + return 0; +} + static int osd_dirent_reinsert(const struct lu_env *env, handle_t *jh, struct inode *dir, struct inode *inode, @@ -4653,18 +4668,15 @@ osd_dirent_check_repair(const struct lu_env *env, struct osd_object *obj, struct inode *inode; int credits; int rc; + int dot_dotdot = 0; bool dirty = false; - bool is_dotdot = false; ENTRY; if (ent->oied_name[0] == '.') { - /* Skip dot entry, even if it has stale FID-in-dirent, because - * we do not use such FID-in-dirent anymore, it is harmless. */ if (ent->oied_namelen == 1) - RETURN(0); - - if (ent->oied_namelen == 2 && ent->oied_name[1] == '.') - is_dotdot = true; + dot_dotdot = 1; + else if (ent->oied_namelen == 2 && ent->oied_name[1] == '.') + dot_dotdot = 2; } dentry = osd_child_dentry_get(env, obj, ent->oied_name, @@ -4697,26 +4709,36 @@ again: ent->oied_name, rc); RETURN(rc); } - } - if (obj->oo_hl_head != NULL) { - hlock = osd_oti_get(env)->oti_hlock; - ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, - LDISKFS_HLOCK_DEL); + if (obj->oo_hl_head != NULL) { + hlock = osd_oti_get(env)->oti_hlock; + /* "0" means exclusive lock for the whole directory. + * We need to prevent others access such name entry + * during the delete + insert. Neither HLOCK_ADD nor + * HLOCK_DEL cannot guarantee the atomicity. */ + ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, 0); + } else { + down_write(&obj->oo_ext_idx_sem); + } } else { - down_write(&obj->oo_ext_idx_sem); + if (obj->oo_hl_head != NULL) { + hlock = osd_oti_get(env)->oti_hlock; + ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, + LDISKFS_HLOCK_LOOKUP); + } else { + down_read(&obj->oo_ext_idx_sem); + } } bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock); - /* For dotdot entry, if there is not enough space to hold FID-in-dirent, - * just keep it there. It only happens when the device upgraded from 1.8 - * or restored from MDT file-level backup. For the whole directory, only - * dotdot entry has no FID-in-dirent and needs to get FID from LMA when - * readdir, it will not affect the performance much. */ + /* For dot/dotdot entry, if there is not enough space to hold the + * FID-in-dirent, just keep them there. It only happens when the + * device upgraded from 1.8 or restored from MDT file-level backup. + * For the whole directory, only dot/dotdot entry have no FID-in-dirent + * and needs to get FID from LMA when readdir, it will not affect the + * performance much. */ if ((bh == NULL) || (le32_to_cpu(de->inode) != ent->oied_ino) || - (is_dotdot && !osd_dirent_has_space(de->rec_len, - ent->oied_namelen, - sb->s_blocksize))) { + (dot_dotdot != 0 && !osd_dot_dotdot_has_space(de, dot_dotdot))) { *attr |= LUDA_IGNORE; GOTO(out_journal, rc = 0); } @@ -4752,7 +4774,7 @@ again: if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - up_write(&obj->oo_ext_idx_sem); + up_read(&obj->oo_ext_idx_sem); dev->od_dirent_journal = 1; goto again; } @@ -4766,6 +4788,7 @@ again: } else { /* Do not repair under dryrun mode. */ if (*attr & LUDA_VERIFY_DRYRUN) { + *fid = lma->lma_self_fid; *attr |= LUDA_REPAIR; GOTO(out_inode, rc = 0); } @@ -4776,7 +4799,7 @@ again: if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - up_write(&obj->oo_ext_idx_sem); + up_read(&obj->oo_ext_idx_sem); dev->od_dirent_journal = 1; goto again; } @@ -4792,10 +4815,13 @@ again: } else if (rc == -ENODATA) { /* Do not repair under dryrun mode. */ if (*attr & LUDA_VERIFY_DRYRUN) { - if (fid_is_sane(fid)) + if (fid_is_sane(fid)) { *attr |= LUDA_REPAIR; - else + } else { + lu_igif_build(fid, inode->i_ino, + inode->i_generation); *attr |= LUDA_UPGRADE; + } GOTO(out_inode, rc = 0); } @@ -4805,7 +4831,7 @@ again: if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - up_write(&obj->oo_ext_idx_sem); + up_read(&obj->oo_ext_idx_sem); dev->od_dirent_journal = 1; goto again; } @@ -4835,10 +4861,14 @@ out_inode: out_journal: brelse(bh); - if (hlock != NULL) + if (hlock != NULL) { ldiskfs_htree_unlock(hlock); - else - up_write(&obj->oo_ext_idx_sem); + } else { + if (dev->od_dirent_journal) + up_write(&obj->oo_ext_idx_sem); + else + up_read(&obj->oo_ext_idx_sem); + } if (jh != NULL) ldiskfs_journal_stop(jh); if (rc >= 0 && !dirty) diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 75a2a8a..b2c13b2 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -1009,5 +1009,9 @@ static inline loff_t ldiskfs_get_htree_eof(struct file *filp) return LDISKFS_HTREE_EOF_64BIT; } +static inline int fid_is_internal(const struct lu_fid *fid) +{ + return (!fid_is_namespace_visible(fid) && !fid_is_idif(fid)); +} #endif /* __KERNEL__ */ #endif /* _OSD_INTERNAL_H */ diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 1e27918..75b0b8a 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -455,11 +455,14 @@ iget: ops = DTO_INDEX_INSERT; idx = osd_oi_fid2idx(dev, fid); if (val == SCRUB_NEXT_NOLMA) { + sf->sf_flags |= SF_UPGRADE; + scrub->os_full_speed = 1; rc = osd_ea_fid_set(info, inode, fid, 0); if (rc != 0) GOTO(out, rc); } else { sf->sf_flags |= SF_RECREATED | SF_INCONSISTENT; + scrub->os_full_speed = 1; if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap))) ldiskfs_set_bit(idx, sf->sf_oi_bitmap); } @@ -467,6 +470,7 @@ iget: GOTO(out, rc = 0); } else { sf->sf_flags |= SF_INCONSISTENT; + scrub->os_full_speed = 1; } rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops); @@ -645,12 +649,11 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev, rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc == 0) { - if (!scrub) { - if (!fid_is_client_visible(&lma->lma_self_fid)) - rc = SCRUB_NEXT_CONTINUE; - else - *fid = lma->lma_self_fid; - } + if (fid_is_llog(&lma->lma_self_fid) || + (!scrub && fid_is_internal(&lma->lma_self_fid))) + rc = SCRUB_NEXT_CONTINUE; + else + *fid = lma->lma_self_fid; } else if (rc == -ENODATA) { lu_igif_build(fid, inode->i_ino, inode->i_generation); if (scrub) @@ -749,6 +752,21 @@ static int osd_preload_next(struct osd_thread_info *info, return rc; } +static inline int +osd_scrub_wakeup(struct osd_scrub *scrub, struct osd_otable_it *it) +{ + spin_lock(&scrub->os_lock); + if (osd_scrub_has_window(scrub, &it->ooi_cache) || + !cfs_list_empty(&scrub->os_inconsistent_items) || + it->ooi_waiting || !thread_is_running(&scrub->os_thread)) + scrub->os_waiting = 0; + else + scrub->os_waiting = 1; + spin_unlock(&scrub->os_lock); + + return !scrub->os_waiting; +} + static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, struct osd_idmap_cache *oic, int *noslot, int rc) @@ -792,28 +810,27 @@ static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev, next: scrub->os_pos_current = param->gbase + ++(param->offset); + +wait: if (it != NULL && it->ooi_waiting && ooc->ooc_pos_preload < scrub->os_pos_current) { + spin_lock(&scrub->os_lock); it->ooi_waiting = 0; cfs_waitq_broadcast(&thread->t_ctl_waitq); + spin_unlock(&scrub->os_lock); } if (scrub->os_full_speed || rc == SCRUB_NEXT_CONTINUE) return 0; -wait: if (osd_scrub_has_window(scrub, ooc)) { *noslot = 0; return 0; } - scrub->os_waiting = 1; l_wait_event(thread->t_ctl_waitq, - osd_scrub_has_window(scrub, ooc) || - !cfs_list_empty(&scrub->os_inconsistent_items) || - !thread_is_running(thread), + osd_scrub_wakeup(scrub, it), &lwi); - scrub->os_waiting = 0; if (osd_scrub_has_window(scrub, ooc)) *noslot = 0; @@ -1802,6 +1819,21 @@ static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di) mutex_unlock(&dev->od_otable_mutex); } +static inline int +osd_otable_it_wakeup(struct osd_scrub *scrub, struct osd_otable_it *it) +{ + spin_lock(&scrub->os_lock); + if (it->ooi_cache.ooc_pos_preload < scrub->os_pos_current || + scrub->os_waiting || it->ooi_stopping || + !thread_is_running(&scrub->os_thread)) + it->ooi_waiting = 0; + else + it->ooi_waiting = 1; + spin_unlock(&scrub->os_lock); + + return !it->ooi_waiting; +} + static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di) { struct osd_otable_it *it = (struct osd_otable_it *)di; @@ -1833,13 +1865,17 @@ again: RETURN(1); } - it->ooi_waiting = 1; - l_wait_event(thread->t_ctl_waitq, - ooc->ooc_pos_preload < scrub->os_pos_current || - !thread_is_running(thread) || - it->ooi_stopping, - &lwi); - it->ooi_waiting = 0; + if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) { + spin_lock(&scrub->os_lock); + scrub->os_waiting = 0; + cfs_waitq_broadcast(&scrub->os_thread.t_ctl_waitq); + spin_unlock(&scrub->os_lock); + } + + if (it->ooi_cache.ooc_pos_preload >= scrub->os_pos_current) + l_wait_event(thread->t_ctl_waitq, + osd_otable_it_wakeup(scrub, it), + &lwi); if (!thread_is_running(thread) && !it->ooi_used_outside) RETURN(1); @@ -1892,8 +1928,10 @@ static __u64 osd_otable_it_store(const struct lu_env *env, if (it->ooi_user_ready) hash = ooc->ooc_pos_preload; - else + else if (likely(ooc->ooc_consumer_idx != -1)) hash = ooc->ooc_cache[ooc->ooc_consumer_idx].oic_lid.oii_ino; + else + hash = 0; return hash; } diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 98ce34d..11a22c2 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -32,7 +32,7 @@ noinst_SCRIPTS += sgpdd-survey.sh maloo_upload.sh auster setup-nfs.sh noinst_SCRIPTS += mds-survey.sh parallel-scale-nfs.sh large-lun.sh noinst_SCRIPTS += parallel-scale-nfsv3.sh parallel-scale-nfsv4.sh noinst_SCRIPTS += posix.sh sanity-scrub.sh scrub-performance.sh ha.sh -noinst_SCRIPTS += sanity-quota-old.sh sanity-lfsck.sh +noinst_SCRIPTS += sanity-quota-old.sh sanity-lfsck.sh lfsck-performance.sh noinst_SCRIPTS += resolveip noinst_SCRIPTS += sanity-hsm.sh nobase_noinst_SCRIPTS = cfg/local.sh diff --git a/lustre/tests/lfsck-performance.sh b/lustre/tests/lfsck-performance.sh new file mode 100644 index 0000000..ec67507 --- /dev/null +++ b/lustre/tests/lfsck-performance.sh @@ -0,0 +1,355 @@ +#!/bin/bash + +set -e + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT="$LFSCK_PERFORMANCE_EXCEPT" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging + +[ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "lfsck performance only for ldiskfs" && exit 0 + +require_dsh_mds || exit 0 + +[ "$SLOW" = "no" ] && + skip "skip lfsck performance test under non-SLOW mode" && exit 0 + +NTHREADS=${NTHREADS:-0} +UNIT=${UNIT:-1048576} +MINCOUNT=${MINCOUNT:-8192} +MAXCOUNT=${MAXCOUNT:-32768} +MINCOUNT_REPAIR=${MINCOUNT_REPAIR:-8192} +MAXCOUNT_REPAIR=${MAXCOUNT_REPAIR:-32768} +BASE_COUNT=${BASE_COUNT:-1048576} +FACTOR=${FACTOR:-2} +INCFACTOR=${INCFACTOR:-25} #percent + +RCMD="do_facet ${SINGLEMDS}" +RLCTL="${RCMD} ${LCTL}" +MDT_DEV="${FSNAME}-MDT0000" +MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) +START_NAMESPACE="${RLCTL} lfsck_start -M ${MDT_DEV} -t namespace" +STOP_LFSCK="${RLCTL} lfsck_stop -M ${MDT_DEV}" +SHOW_NAMESPACE="${RLCTL} get_param -n mdd.${MDT_DEV}.lfsck_namespace" +MNTOPTS_NOSCRUB="-o user_xattr,noscrub" +remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval" + +if [ ${NTHREADS} -eq 0 ]; then + CPUCORE=$(${RCMD} cat /proc/cpuinfo | grep "processor.*:" | wc -l) + NTHREADS=$((CPUCORE * 2)) +fi + +lfsck_attach() { + ${ECHOCMD} "${LCTL} <<-EOF + attach echo_client lfsck-MDT0000 lfsck-MDT0000_UUID + setup ${MDT_DEV} mdd + EOF" +} + +lfsck_detach() { + ${ECHOCMD} "${LCTL} <<-EOF + device lfsck-MDT0000 + cleanup + detach + EOF" +} + +lfsck_create() { + local echodev=$(${RLCTL} dl | grep echo_client|awk '{print $1}') + local j + + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + test_mkdir ${tdir} + EOF" + + for ((j=1; j<${threads}; j++)); do + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + test_mkdir ${tdir}${j} + EOF" + done + + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + --threads ${threads} 0 ${echodev} test_create \ + -d ${tdir} -D ${threads} -b ${lbase} -c 0 -n ${usize} + EOF" +} + +lfsck_cleanup() { + do_rpc_nodes $(facet_active_host $SINGLEMDS) unload_modules + formatall +} + +lfsck_create_nfiles() { + local total=$1 + local lbase=$2 + local threads=$3 + local linkea=$4 + local ldir="/test-${lbase}" + local cycle=0 + local count=${UNIT} + + while true; do + [ ${count} -eq 0 -o ${count} -gt ${total} ] && count=${total} + local usize=$((count / NTHREADS)) + [ ${usize} -eq 0 ] && break + local tdir=${ldir}-${cycle}- + + echo "[cycle: ${cycle}] [threads: ${threads}]"\ + "[files: ${count}] [basedir: ${tdir}]" + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB || + error "Fail to start MDS!" + #define OBD_FAIL_FID_IGIF 0x1504 + [ ! -z $linkea ] && ${RLCTL} set_param fail_loc=0x1504 + + lfsck_attach + lfsck_create + lfsck_detach + + [ ! -z $linkea ] && ${RLCTL} set_param fail_loc=0x0 + stop ${SINGLEMDS} || error "Fail to stop MDS!" + + total=$((total - usize * NTHREADS)) + [ ${total} -eq 0 ] && break + lbase=$((lbase + usize)) + cycle=$((cycle + 1)) + done +} + +build_test_filter + +test_0() { + local BCOUNT=0 + local i + + stopall + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local + reformat_external_journal + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ + ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || + error "Fail to reformat the MDS!" + + for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do + local nfiles=$((i - BCOUNT)) + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${nfiles} ${BCOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + + BCOUNT=${i} + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + echo "start lfsck_namespace for ${i} files set at: $(date)" + $START_NAMESPACE || error "Fail to start lfsck_namespace!" + + while true; do + local STATUS=$($SHOW_NAMESPACE | + awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] && break + sleep 3 # check status every 3 seconds + done + + echo "end lfsck_namespace for ${i} files set at: $(date)" + SPEED=$($SHOW_NAMESPACE | + awk '/^average_speed_phase1/ { print $2 }') + echo "lfsck_namespace speed is ${SPEED}/sec" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + done +} +run_test 0 "lfsck performance test (routine case) without load" + +test_1() { + local BCOUNT=0 + local i + + stopall + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local + reformat_external_journal + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ + ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || + error "Fail to reformat the MDS!" + + for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + local nfiles=$((i - BCOUNT)) + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${nfiles} ${BCOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + + BCOUNT=${i} + local stime=$(date +%s) + echo "backup/restore ${i} files start at: $(date)" + mds_backup_restore || error "Fail to backup/restore!" + echo "backup/restore ${i} files end at: $(date)" + local etime=$(date +%s) + local delta=$((etime - stime)) + [ $delta -gt 0 ] || delta=1 + echo "backup/restore ${i} files used ${delta} seconds" + echo "backup/restore speed is $((i / delta))/sec" + + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + echo "start lfsck_namespace for ${i} files set at: $(date)" + $START_NAMESPACE || error "Fail to start lfsck_namespace!" + + while true; do + local STATUS=$($SHOW_NAMESPACE | + awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] && break + sleep 3 # check status every 3 seconds + done + + echo "end lfsck_namespace for ${i} files set at: $(date)" + local SPEED=$($SHOW_NAMESPACE | + awk '/^average_speed_phase1/ { print $2 }') + echo "lfsck_namespace speed is ${SPEED}/sec" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + done +} +run_test 1 "lfsck performance test (backup/restore) without load" + +test_2() { + local i + + for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + stopall + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local + reformat_external_journal + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) \ + --backfstype ldiskfs --reformat ${MDT_DEVNAME} \ + $(mdsvdevname 1) > /dev/null || + error "Fail to reformat the MDS!" + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${i} 0 ${NTHREADS} 1 || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + echo "start lfsck_namespace for ${i} files set at: $(date)" + $START_NAMESPACE || error "Fail to start lfsck_namespace!" + + while true; do + local STATUS=$($SHOW_NAMESPACE | + awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] && break + sleep 3 # check status every 3 seconds + done + + echo "end lfsck_namespace for ${i} files set at: $(date)" + local SPEED=$($SHOW_NAMESPACE | + awk '/^average_speed_phase1/ { print $2 }') + echo "lfsck_namespace speed is ${SPEED}/sec" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + done +} +run_test 2 "lfsck performance test (simulate upgrade from 1.8) without load" + +test_3() { + [ $MDSSIZE -lt 4000000 ] && + skip "MDT device is too small, expect at last 4GB" && exit 0 + + [ $BASE_COUNT -lt 1048576 ] && BASE_COUNT=1048576 + [ $INCFACTOR -gt 25 ] && INCFACTOR=25 + + local inc_count=$((BASE_COUNT * INCFACTOR / 100)) + local BCOUNT=0 + local i + + stopall + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local + reformat_external_journal + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ + ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || + error "Fail to reformat the MDS!" + + for ((i=$inc_count; i<=$BASE_COUNT; i=$((i + inc_count)))); do + local nfiles=$((i - BCOUNT)) + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${nfiles} ${BCOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + BCOUNT=${i} + done + + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + echo "start lfsck_namespace for ${BASE_COUNT} files set at: $(date)" + $START_NAMESPACE || error "Fail to start lfsck_namespace!" + + while true; do + local STATUS=$($SHOW_NAMESPACE | + awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] && break + sleep 3 # check status every 3 seconds + done + + echo "end lfsck_namespace for ${BASE_COUNT} files set at: $(date)" + local FULL_SPEED=$($SHOW_NAMESPACE | + awk '/^average_speed_phase1/ { print $2 }') + echo "lfsck_namespace full_speed is ${FULL_SPEED}/sec" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + local inc_speed=$((FULL_SPEED * INCFACTOR / 100)) + local j + + for ((j=$inc_speed; j<$FULL_SPEED; j=$((j + inc_speed)))); do + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + $STOP_LFSCK > /dev/null 2>&1 + echo "start lfsck_namespace with speed ${j} at: $(date)" + $START_NAMESPACE --reset -s ${j} || + error "Fail to start lfsck_namespace with speed ${j}!" + # lfsck_namespace will be paused when MDS stop, + # and will be restarted automatically when mount up again. + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + + local nfiles=$(((i - BCOUNT) / 2)) + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${nfiles} ${BCOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + BCOUNT=${i} + i=$((i + inc_count)) + done + + start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || + error "Fail to start MDS!" + + $STOP_LFSCK /dev/null 2>&1 + echo "start lfsck_namespace with full speed at: $(date)" + $START_NAMESPACE --reset -s 0 || + error "Fail to start lfsck_namespace with full speed!" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + + local nfiles=$(((i - BCOUNT) / 2)) + + echo "+++ start to create for ${i} files set at: $(date) +++" + lfsck_create_nfiles ${nfiles} ${BCOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" +} +run_test 3 "lfsck performance test (routine case) without load" + +# cleanup the system at last +lfsck_cleanup +complete $SECONDS +exit_status diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 4d39cae..80655d9 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -52,14 +52,14 @@ lfsck_prep() { echo "formatall" formatall > /dev/null + echo "setupall" + setupall > /dev/null + if [ ! -z $igif ]; then #define OBD_FAIL_FID_IGIF 0x1504 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504 fi - echo "setupall" - setupall > /dev/null - echo "preparing... ${nfiles} * ${ndirs} files will be created." mkdir -p $DIR/$tdir cp $LUSTRE/tests/*.sh $DIR/$tdir/ diff --git a/lustre/tests/scrub-performance.sh b/lustre/tests/scrub-performance.sh index 0c029e7..fe9f05b 100644 --- a/lustre/tests/scrub-performance.sh +++ b/lustre/tests/scrub-performance.sh @@ -18,9 +18,11 @@ init_logging [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] && skip "Need MDS version at least 2.2.90" && exit 0 require_dsh_mds || exit 0 +[ "$SLOW" = "no" ] && skip "skip scrub performance test under non-SLOW mode" + NTHREADS=${NTHREADS:-0} -UNIT=${UNIT:-0} +UNIT=${UNIT:-1048576} BACKUP=${BACKUP:-0} MINCOUNT=${MINCOUNT:-8192} MAXCOUNT=${MAXCOUNT:-32768} @@ -35,14 +37,14 @@ remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval" if [ ${NTHREADS} -eq 0 ]; then CPUCORE=$(${RCMD} cat /proc/cpuinfo | grep "processor.*:" | wc -l) - NTHREADS=$((CPUCORE * 3)) + NTHREADS=$((CPUCORE * 2)) fi stopall do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal -add $SINGLEMDS $(mkfs_opts $SINGLEMDS) --backfstype ldiskfs --reformat \ - $MDT_DEVNAME > /dev/null || exit 2 +add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype ldiskfs \ + --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || exit 2 scrub_attach() { ${ECHOCMD} "${LCTL} <<-EOF diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 4e39c29..09d6298 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2883,6 +2883,9 @@ mkfs_opts() { if [ $fstype == ldiskfs ]; then fs_mkfs_opts+=${MDSJOURNALSIZE:+" -J size=$MDSJOURNALSIZE"} + if [ ! -z $EJOURNAL ]; then + fs_mkfs_opts+=${MDSJOURNALSIZE:+" device=$EJOURNAL"} + fi fs_mkfs_opts+=${MDSISIZE:+" -i $MDSISIZE"} fi fi @@ -6008,8 +6011,9 @@ mds_backup_restore() { reformat_external_journal || return 5 # step 8: reformat dev echo "reformat new device" - add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS}) --backfstype ldiskfs \ - --reformat $devname > /dev/null || return 6 + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${devname}) --backfstype \ + ldiskfs --reformat ${devname} $(mdsvdevname 1) > /dev/null || + exit 6 # step 9: mount dev ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7 # step 10: restore metadata -- 1.8.3.1