From d01d4c697a3c4423587159d58da6e455a5a3551f Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 29 Jun 2012 19:34:50 +0800 Subject: [PATCH] LU-957 scrub: Proc interfaces and tests for OI scrub 1) Control/trace OI scrub running. 2) Verify whether the OI scrub basic functions works or not. 3) Test OI scrub performance. For autotest: Test-Parameters: testlist=sanity-scrub,scrub-performance Signed-off-by: Fan Yong Change-Id: I5be3d1a521f5f7875f56e9455ff2010016e6a344 Reviewed-on: http://review.whamcloud.com/3168 Reviewed-by: Yu Jian Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/include/obd_support.h | 9 +- lustre/osd-ldiskfs/osd_handler.c | 24 ++- lustre/osd-ldiskfs/osd_internal.h | 1 + lustre/osd-ldiskfs/osd_lproc.c | 49 ++++- lustre/osd-ldiskfs/osd_oi.c | 16 +- lustre/osd-ldiskfs/osd_scrub.c | 236 ++++++++++++++++++++- lustre/tests/Makefile.am | 2 +- lustre/tests/sanity-scrub.sh | 407 ++++++++++++++++++++++++++++++++++++ lustre/tests/scrub-performance.sh | 176 ++++++++++++++++ lustre/tests/test-framework.sh | 90 ++++++++ lustre/tests/test-groups/regression | 1 + 11 files changed, 987 insertions(+), 24 deletions(-) create mode 100644 lustre/tests/sanity-scrub.sh create mode 100644 lustre/tests/scrub-performance.sh diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3ebcf91..5cf7fbc 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -241,6 +241,11 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MDS_GET_INFO_NET 0x186 #define OBD_FAIL_MDS_DQACQ_NET 0x187 +/* OI scrub */ +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 +#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 +#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 #define OBD_FAIL_OST_DISCONNECT_NET 0x202 @@ -631,10 +636,10 @@ do { \ #ifdef __KERNEL__ /* Allocations above this size are considered too big and could not be done - * atomically. + * atomically. * * Be very careful when changing this value, especially when decreasing it, - * since vmalloc in Linux doesn't perform well on multi-cores system, calling + * since vmalloc in Linux doesn't perform well on multi-cores system, calling * vmalloc in critical path would hurt peformance badly. See LU-66. */ #define OBD_ALLOC_BIG (4 * CFS_PAGE_SIZE) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index eced8db..0a258ec 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -300,8 +300,8 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, iput(inode); inode = ERR_PTR(-ESTALE); } else if (is_bad_inode(inode)) { - CWARN("%s: bad inode: ino = %u\n", - dev->od_dt_dev.dd_lu_dev.ld_obd->obd_name, id->oii_ino); + CWARN("%.16s: bad inode: ino = %u\n", + LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, id->oii_ino); iput(inode); inode = ERR_PTR(-ENOENT); } else { @@ -462,9 +462,10 @@ trigger: result = -EINPROGRESS; } else if (!scrub->os_no_scrub) { result = osd_scrub_start(dev); - LCONSOLE_ERROR("Trigger OI scrub by RPC for " - DFID", rc = %d\n", - PFID(fid), result); + LCONSOLE_ERROR("%.16s: trigger OI scrub by RPC " + "for "DFID", rc = %d [1]\n", + LDISKFS_SB(osd_sb(dev))->s_es->\ + s_volume_name,PFID(fid), result); if (result == 0 || result == -EALREADY) result = -EINPROGRESS; else @@ -763,9 +764,10 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, GOTO(out, rc); if (!osd_param_is_sane(dev, th)) { - CWARN("%s: too many transaction credits (%d > %d)\n", - d->dd_lu_dev.ld_obd->obd_name, oh->ot_credits, - osd_journal(dev)->j_max_transaction_buffers); + CWARN("%.16s: too many transaction credits (%d > %d)\n", + LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, + oh->ot_credits, + osd_journal(dev)->j_max_transaction_buffers); /* XXX Limit the credits to 'max_transaction_buffers', and * let the underlying filesystem to catch the error if * we really need so many credits. @@ -3323,8 +3325,10 @@ again: CDEBUG(D_LFSCK, "Trigger OI scrub by RPC for "DFID"\n", PFID(fid)); rc = osd_scrub_start(dev); - CDEBUG(D_LFSCK, "Trigger OI scrub by RPC for "DFID", rc = %d\n", - PFID(fid), rc); + LCONSOLE_ERROR("%.16s: trigger OI scrub by RPC for "DFID + ", rc = %d [2]\n", + LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, + PFID(fid), rc); if (rc == 0) goto again; } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index b757b53..59b88f3 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -657,6 +657,7 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, int insert); int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, struct osd_inode_id *id); +int osd_scrub_dump(struct osd_device *dev, char *buf, int len); /* osd_quota_fmt.c */ int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index d7d56bc..e95e038 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -481,6 +481,50 @@ static int lprocfs_osd_wr_pdo(struct file *file, const char *buffer, } #endif +static int lprocfs_osd_rd_auto_scrub(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct osd_device *dev = data; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mount == NULL)) + return -EINPROGRESS; + + *eof = 1; + return snprintf(page, count, "%d\n", !dev->od_scrub.os_no_scrub); +} + +static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct osd_device *dev = data; + int val, rc; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mount == NULL)) + return -EINPROGRESS; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + dev->od_scrub.os_no_scrub = !val; + return count; +} + +static int lprocfs_osd_rd_oi_scrub(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct osd_device *dev = data; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mount == NULL)) + return -EINPROGRESS; + + *eof = 1; + return osd_scrub_dump(dev, page, count); +} + struct lprocfs_vars lprocfs_osd_obd_vars[] = { { "blocksize", lprocfs_osd_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_osd_rd_kbytestotal, 0, 0 }, @@ -493,7 +537,10 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = { #ifdef HAVE_LDISKFS_PDO { "pdo", lprocfs_osd_rd_pdo, lprocfs_osd_wr_pdo, 0 }, #endif - { 0 } + { "auto_scrub", lprocfs_osd_rd_auto_scrub, + lprocfs_osd_wr_auto_scrub, 0 }, + { "oi_scrub", lprocfs_osd_rd_oi_scrub, 0, 0 }, + { 0 } }; struct lprocfs_vars lprocfs_osd_module_vars[] = { diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index dfa42ab..b4d3ee4 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -287,7 +287,6 @@ static int osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, struct osd_oi **oi_table, unsigned oi_count, bool create) { - struct dt_device *dev = &osd->od_dt_dev; struct scrub_file *sf = &osd->od_scrub.os_file; int count = 0; int rc = 0; @@ -322,11 +321,12 @@ osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, continue; } - CERROR("%s: can't open %s: rc = %d\n", - dev->dd_lu_dev.ld_obd->obd_name, name, rc); + CERROR("%.16s: can't open %s: rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, name, rc); if (oi_count > 0) - CERROR("%s: expect to open total %d OI files.\n", - dev->dd_lu_dev.ld_obd->obd_name, oi_count); + CERROR("%.16s: expect to open total %d OI files.\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + oi_count); break; } @@ -340,7 +340,6 @@ osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) { - struct dt_device *dev = &osd->od_dt_dev; struct osd_scrub *scrub = &osd->od_scrub; struct scrub_file *sf = &scrub->os_file; struct osd_oi **oi; @@ -373,8 +372,9 @@ int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) if (rc == 0) { /* found single OI from old filesystem */ GOTO(out, rc = 1); } else if (rc != -ENOENT) { - CERROR("%s: can't open %s: rc = %d\n", - dev->dd_lu_dev.ld_obd->obd_name, OSD_OI_NAME_BASE, rc); + CERROR("%.16s: can't open %s: rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + OSD_OI_NAME_BASE, rc); GOTO(out, rc); } diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 45c8233..2f385d3 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -298,14 +298,14 @@ osd_scrub_error(struct osd_device *dev, struct osd_inode_id *lid, int rc) } static int -osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, +osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, struct osd_idmap_cache *oic) { struct osd_scrub *scrub = &dev->od_scrub; struct scrub_file *sf = &scrub->os_file; struct osd_inode_id *lid2 = &info->oti_id; struct lu_fid *oi_fid = &info->oti_fid; - struct osd_inode_id *oi_id = &info->oti_id; + struct osd_inode_id *oi_id = &info->oti_id2; handle_t *jh = NULL; struct osd_inconsistent_item *oii = NULL; struct inode *inode = NULL; @@ -495,6 +495,8 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) #define SCRUB_NEXT_CONTINUE 2 /* skip current object and process next bit */ #define SCRUB_NEXT_EXIT 3 /* exit all the loops */ #define SCRUB_NEXT_WAIT 4 /* wait for free cache slot */ +#define SCRUB_NEXT_CRASH 5 /* simulate system crash during OI scrub */ +#define SCRUB_NEXT_FATAL 6 /* simulate failure during OI scrub */ struct osd_iit_param { struct super_block *sb; @@ -571,6 +573,26 @@ static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, struct inode *inode; int rc; + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) { + struct l_wait_info lwi; + + lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + !cfs_list_empty(&scrub->os_inconsistent_items) || + !thread_is_running(thread), + &lwi); + } + + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) { + cfs_spin_lock(&scrub->os_lock); + thread_set_flags(thread, SVC_STOPPING); + cfs_spin_unlock(&scrub->os_lock); + return SCRUB_NEXT_CRASH; + } + + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL)) + return SCRUB_NEXT_FATAL; + if (unlikely(!thread_is_running(thread))) return SCRUB_NEXT_EXIT; @@ -723,6 +745,7 @@ static int osd_preload_exec(struct osd_thread_info *info, } #define SCRUB_IT_ALL 1 +#define SCRUB_IT_CRASH 2 static int osd_inode_iteration(struct osd_thread_info *info, struct osd_device *dev, __u32 max, int preload) @@ -779,6 +802,12 @@ static int osd_inode_iteration(struct osd_thread_info *info, case SCRUB_NEXT_EXIT: brelse(param.bitmap); RETURN(0); + case SCRUB_NEXT_CRASH: + brelse(param.bitmap); + RETURN(SCRUB_IT_CRASH); + case SCRUB_NEXT_FATAL: + brelse(param.bitmap); + RETURN(-EINVAL); } rc = exec(info, dev, ¶m, oic, &noslot, rc); @@ -841,6 +870,8 @@ static int osd_scrub_main(void *args) scrub->os_start_flags, scrub->os_pos_current); rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, 0); + if (unlikely(rc == SCRUB_IT_CRASH)) + GOTO(out, rc = -EINVAL); GOTO(post, rc); post: @@ -1363,3 +1394,204 @@ int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, RETURN(-ENOENT); } + +static const char *scrub_status_names[] = { + "init", + "scanning", + "completed", + "failed", + "paused", + "crashed", + NULL +}; + +static const char *scrub_flags_names[] = { + "recreated", + "inconsistent", + "auto", + NULL +}; + +static const char *scrub_param_names[] = { + "failout", + NULL +}; + +static int scrub_bits_dump(char **buf, int *len, int bits, const char *names[], + const char *prefix) +{ + int save = *len; + int flag; + int rc; + int i; + + rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n'); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) { + if (flag & bits) { + bits &= ~flag; + rc = snprintf(*buf, *len, "%s%c", names[i], + bits != 0 ? ',' : '\n'); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + } + } + return save - *len; +} + +static int scrub_time_dump(char **buf, int *len, __u64 time, const char *prefix) +{ + int rc; + + if (time != 0) + rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix, + cfs_time_current_sec() - time); + else + rc = snprintf(*buf, *len, "%s: N/A\n", prefix); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + return rc; +} + +static int scrub_pos_dump(char **buf, int *len, __u64 pos, const char *prefix) +{ + int rc; + + if (pos != 0) + rc = snprintf(*buf, *len, "%s: "LPU64"\n", prefix, pos); + else + rc = snprintf(*buf, *len, "%s: N/A\n", prefix); + if (rc <= 0) + return -ENOSPC; + + *buf += rc; + *len -= rc; + return rc; +} + +int osd_scrub_dump(struct osd_device *dev, char *buf, int len) +{ + struct osd_scrub *scrub = &dev->od_scrub; + struct scrub_file *sf = &scrub->os_file; + __u64 checked; + __u64 speed; + int save = len; + int ret = -ENOSPC; + int rc; + + cfs_down_read(&scrub->os_rwsem); + rc = snprintf(buf, len, + "name: OI scrub\n" + "magic: 0x%x\n" + "oi_files: %d\n" + "status: %s\n", + sf->sf_magic, (int)sf->sf_oi_count, + scrub_status_names[sf->sf_status]); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + rc = scrub_bits_dump(&buf, &len, sf->sf_flags, scrub_flags_names, + "flags"); + if (rc < 0) + goto out; + + rc = scrub_bits_dump(&buf, &len, sf->sf_param, scrub_param_names, + "param"); + if (rc < 0) + goto out; + + rc = scrub_time_dump(&buf, &len, sf->sf_time_last_complete, + "time_since_last_completed"); + if (rc < 0) + goto out; + + rc = scrub_time_dump(&buf, &len, sf->sf_time_latest_start, + "time_since_latest_start"); + if (rc < 0) + goto out; + + rc = scrub_time_dump(&buf, &len, sf->sf_time_last_checkpoint, + "time_since_last_checkpoint"); + if (rc < 0) + goto out; + + rc = scrub_pos_dump(&buf, &len, sf->sf_pos_latest_start, + "latest_start_position"); + if (rc < 0) + goto out; + + rc = scrub_pos_dump(&buf, &len, sf->sf_pos_last_checkpoint, + "last_checkpoint_position"); + if (rc < 0) + goto out; + + rc = scrub_pos_dump(&buf, &len, sf->sf_pos_first_inconsistent, + "first_failure_position"); + if (rc < 0) + goto out; + + checked = sf->sf_items_checked + scrub->os_new_checked; + rc = snprintf(buf, len, + "checked: "LPU64"\n" + "updated: "LPU64"\n" + "failed: "LPU64"\n" + "prior_updated: "LPU64"\n" + "success_count: %u\n", + checked, sf->sf_items_updated, sf->sf_items_failed, + sf->sf_items_updated_prior, sf->sf_success_count); + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + speed = checked; + if (thread_is_running(&scrub->os_thread)) { + cfs_duration_t duration = cfs_time_current() - + scrub->os_time_last_checkpoint; + __u64 new_checked = scrub->os_new_checked * CFS_HZ; + __u32 rtime = sf->sf_run_time + + cfs_duration_sec(duration + HALF_SEC); + + if (duration != 0) + do_div(new_checked, duration); + if (rtime != 0) + do_div(speed, rtime); + rc = snprintf(buf, len, + "run_time: %u seconds\n" + "average_speed: "LPU64" objects/sec\n" + "real-time_speed: "LPU64" objects/sec\n" + "current_position: %u\n", + rtime, speed, new_checked, scrub->os_pos_current); + } else { + if (sf->sf_run_time != 0) + do_div(speed, sf->sf_run_time); + rc = snprintf(buf, len, + "run_time: %u seconds\n" + "average_speed: "LPU64" objects/sec\n" + "real-time_speed: N/A\n" + "current_position: N/A\n", + sf->sf_run_time, speed); + } + if (rc <= 0) + goto out; + + buf += rc; + len -= rc; + ret = save - len; + +out: + cfs_up_read(&scrub->os_rwsem); + return ret; +} diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index f2c14db..f8a8765 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -28,7 +28,7 @@ noinst_SCRIPTS += lnet-selftest.sh obdfilter-survey.sh mmp.sh mmp_mark.sh noinst_SCRIPTS += sgpdd-survey.sh maloo_upload.sh auster setup-nfs.sh noinst_SCRIPTS += mds-survey.sh parallel-scale-nfs.sh noinst_SCRIPTS += parallel-scale-nfsv3.sh parallel-scale-nfsv4.sh -noinst_SCRIPTS += posix.sh +noinst_SCRIPTS += posix.sh sanity-scrub.sh scrub-performance.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh new file mode 100644 index 0000000..4d1c114 --- /dev/null +++ b/lustre/tests/sanity-scrub.sh @@ -0,0 +1,407 @@ +#!/bin/bash +# +# Run select tests by setting ONLY, or as arguments to the script. +# Skip specific tests by setting EXCEPT. +# + +set -e + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging + +[ "${MDSFSTYPE:-$FSTYPE}" != "ldiskfs" ] && + skip "test OI scrub only for ldiskfs" && exit 0 +require_dsh_mds || exit 0 + +SAVED_MDSSIZE=${MDSSIZE} +SAVED_OSTSIZE=${OSTSIZE} +# use small MDS + OST size to speed formatting time +# do not use too small MDSSIZE/OSTSIZE, which affect the default journal size +MDSSIZE=100000 +OSTSIZE=100000 + +check_and_setup_lustre +build_test_filter + +MDT_DEV="${FSNAME}-MDT0000" +MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) +SHOW_SCRUB="do_facet $SINGLEMDS \ + $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" + +scrub_prep() { + local nfiles=$1 + + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + + echo "preparing... ${nfiles} files will be created." + mkdir -p $DIR/$tdir + cp $LUSTRE/tests/*.sh $DIR/$tdir/ + [[ $nfiles -gt 0 ]] && { createmany -o $DIR/$tdir/$tfile $nfiles || + error "createmany failed"; } + + echo "prepared." + cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!" +} + +test_0() { + scrub_prep 0 + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "(1) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(2) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" + + mount_client $MOUNT || error "(4) Fail to start client!" + + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(5) File diff failed unexpected!" +} +run_test 0 "Do not auto trigger OI scrub for non-backup/restore case" + +test_1a() { + scrub_prep 0 + mds_remove_ois || error "(1) Fail to remove/recreate!" + + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "(2) Fail to start MDS!" + + sleep 3 + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(3) Expect 'completed', but got '$STATUS'" + + mount_client $MOUNT || error "(4) Fail to start client!" + + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(5) File diff failed unexpected!" +} +run_test 1a "Trigger OI scrub when MDT mounts for OI files remove/recreate case" + +test_2() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "(2) Fail to start MDS!" + + sleep 3 + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(3) Expect 'completed', but got '$STATUS'" + + mount_client $MOUNT || error "(4) Fail to start client!" + + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(5) File diff failed unexpected!" +} +run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case" + +test_3() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + echo "stopall" + stopall > /dev/null +} +run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" + +test_4() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + mount_client $MOUNT || error "(5) Fail to start client!" + + do_facet $SINGLEMDS \ + $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(6) File diff failed unexpected!" + + sleep 3 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(7) Expect 'completed', but got '$STATUS'" +} +run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" + +test_5() { + scrub_prep 1500 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + mount_client $MOUNT || error "(5) Fail to start client!" + + do_facet $SINGLEMDS \ + $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(6) File diff failed unexpected!" + + umount_client $MOUNT || error "(7) Fail to stop client!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(8) Expect 'scanning', but got '$STATUS'" + +#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000191 + sleep 4 + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(9) Fail to stop MDS!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(10) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "crashed" ] || + error "(11) Expect 'crashed', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(12) Fail to stop MDS!" + +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "(13) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(14) Expect 'scanning', but got '$STATUS'" + +#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000192 + sleep 4 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "failed" ] || + error "(15) Expect 'failed', but got '$STATUS'" + + mount_client $MOUNT || error "(16) Fail to start client!" + +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + stat $DIR/$tdir/${tfile}1000 || + error "(17) Fail to stat $DIR/$tdir/${tfile}1000!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(18) Expect 'scanning', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(19) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ -z "$FLAGS" ] || error "(20) Expect empty flags, but got '$FLAGS'" +} +run_test 5 "OI scrub state machine" + +test_6() { + scrub_prep 1000 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + mount_client $MOUNT || error "(5) Fail to start client!" + + do_facet $SINGLEMDS \ + $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(6) File diff failed unexpected!" + + # Fail the OI scrub to guarantee there is at least on checkpoint +#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000192 + sleep 4 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "failed" ] || + error "(7) Expect 'failed', but got '$STATUS'" + +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + stat $DIR/$tdir/${tfile}800 || + error "(8) Fail to stat $DIR/$tdir/${tfile}800!" + + umount_client $MOUNT || error "(9) Fail to stop client!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(10) Expect 'scanning', but got '$STATUS'" + +#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000191 + sleep 4 + local POSITION0=$($SHOW_SCRUB | sed -n '11'p | awk '{print $2}') + POSITION0=$((POSITION0 + 1)) + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(11) Fail to stop MDS!" + +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "(12) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(13) Expect 'scanning', but got '$STATUS'" + + local POSITION1=$($SHOW_SCRUB | sed -n '10'p |awk '{print $2}') + [ $POSITION0 -eq $POSITION1 ] || + error "(14) Expect position: $POSITION0, but got $POSITION1" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(15) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ -z "$FLAGS" ] || error "(16) Expect empty flags, but got '$FLAGS'" +} +run_test 6 "OI scrub resumes from last checkpoint" + +test_7() { + scrub_prep 500 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + mount_client $MOUNT || error "(5) Fail to start client!" + + do_facet $SINGLEMDS \ + $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 +#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(6) File diff failed unexpected!" + + stat $DIR/$tdir/${tfile}300 || + error "(7) Fail to stat $DIR/$tdir/${tfile}300!" + + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "scanning" ] || + error "(8) Expect 'scanning', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ "$FLAGS" == "inconsistent,auto" ] || + error "(9) Expect 'inconsistent,auto', but got '$FLAGS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + [ "$STATUS" == "completed" ] || + error "(10) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + [ -z "$FLAGS" ] || error "(11) Expect empty flags, but got '$FLAGS'" +} +run_test 7 "System is available during OI scrub scanning" + +# restore the ${facet}_MKFS_OPTS variables +for facet in MGS MDS OST; do + opts=SAVED_${facet}_MKFS_OPTS + if [[ -n ${!opts} ]]; then + eval ${facet}_MKFS_OPTS=\"${!opts}\" + fi +done + +# restore MDS/OST size +MDSSIZE=${SAVED_MDSSIZE} +OSTSIZE=${SAVED_OSTSIZE} + +# cleanup the system at last +formatall + +complete $(basename $0) $SECONDS +exit_status diff --git a/lustre/tests/scrub-performance.sh b/lustre/tests/scrub-performance.sh new file mode 100644 index 0000000..79f166d --- /dev/null +++ b/lustre/tests/scrub-performance.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +set -e + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT="$SCRUB_PERFORMANCE_EXCEPT" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging + +[ "${MDSFSTYPE:-$FSTYPE}" != "ldiskfs" ] && + skip "OI scrub performance only for ldiskfs" && exit 0 +require_dsh_mds || exit 0 + +NTHREADS=${NTHREADS:-0} +UNIT=${UNIT:-0} +BACKUP=${BACKUP:-0} +MINCOUNT=${MINCOUNT:-8192} +MAXCOUNT=${MAXCOUNT:-32768} +FACTOR=${FACTOR:-2} + +RCMD="do_facet ${SINGLEMDS}" +RLCTL="${RCMD} ${LCTL}" +MDT_DEV="${FSNAME}-MDT0000" +MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) +SHOW_SCRUB="${RLCTL} get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" +remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval" + +if [ ${NTHREADS} -eq 0 ]; then + CPUCORE=$(${RCMD} cat /proc/cpuinfo | grep "processor.*:" | wc -l) + NTHREADS=$((CPUCORE * 3)) +fi + +stopall +do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local +reformat_external_journal +add ${SINGLEMDS} $(mkfs_opts mds) --backfstype ldiskfs --reformat \ + $MDT_DEVNAME > /dev/null || exit 2 + +scrub_attach() { + ${ECHOCMD} "${LCTL} <<-EOF + attach echo_client scrub-MDT0000 scrub-MDT0000_UUID + setup ${MDT_DEV} mdd + EOF" +} + +scrub_detach() { + ${ECHOCMD} "${LCTL} <<-EOF + device scrub-MDT0000 + cleanup + detach + EOF" +} + +scrub_create() { + local echodev=$(${RLCTL} dl | grep echo_client|awk '{print $1}') + local j + + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + test_mkdir ${tdir} + EOF" + + for ((j=1; j<${threads}; j++)); do + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + test_mkdir ${tdir}${j} + EOF" + done + + ${ECHOCMD} "${LCTL} <<-EOF + cfg_device ${echodev} + --threads ${threads} 0 ${echodev} test_create \ + -d ${tdir} -D ${threads} -b ${lbase} -c 0 -n ${usize} + EOF" +} + +scrub_cleanup() { + do_rpc_nodes $(facet_active_host $SINGLEMDS) unload_modules + formatall +} + +scrub_create_nfiles() { + local total=$1 + local lbase=$2 + local threads=$3 + local ldir="/test-${lbase}" + local cycle=0 + local count=${UNIT} + + while true; do + [ ${count} -eq 0 -o ${count} -gt ${total} ] && count=${total} + local usize=$((count / NTHREADS)) + [ ${usize} -eq 0 ] && break + local tdir=${ldir}-${cycle}- + + echo "[cycle: ${cycle}] [threads: ${threads}]"\ + "[files: ${count}] [basedir: ${tdir}]" + start ${SINGLEMDS} $MDT_DEVNAME $MDS_MOUNT_OPTS || + error "Fail to start MDS!" + scrub_attach + scrub_create + scrub_detach + stop ${SINGLEMDS} || error "Fail to stop MDS!" + + total=$((total - usize * NTHREADS)) + [ ${total} -eq 0 ] && break + lbase=$((lbase + usize)) + cycle=$((cycle + 1)) + done +} + +build_test_filter + +test_0() { + local BASECOUNT=0 + local i + + for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do + local nfiles=$((i - BASECOUNT)) + local stime=$(date +%s) + + echo "+++ start to create for ${i} files set at: $(date) +++" + scrub_create_nfiles ${nfiles} ${BASECOUNT} ${NTHREADS} || + error "Fail to create files!" + echo "+++ end to create for ${i} files set at: $(date) +++" + local etime=$(date +%s) + local delta=$((etime - stime)) + [ $delta -gt 0 ] || delta=1 + echo "create ${nfiles} files used ${delta} seconds" + echo "create speed is $((nfiles / delta))/sec" + + BASECOUNT=${i} + if [ ${BACKUP} -ne 0 ]; then + stime=$(date +%s) + echo "backup/restore ${i} files start at: $(date)" + mds_backup_restore || error "Fail to backup/restore!" + echo "backup/restore ${i} files end at: $(date)" + etime=$(date +%s) + delta=$((etime - stime)) + [ $delta -gt 0 ] || delta=1 + echo "backup/restore ${i} files used ${delta} seconds" + echo "backup/restore speed is $((i / delta))/sec" + else + mds_remove_ois || error "Fail to remove/recreate!" + fi + + echo "--- start to rebuild OI for $i files set at: $(date) ---" + start ${SINGLEMDS} $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + error "Fail to start MDS!" + + while true; do + local STATUS=$($SHOW_SCRUB|sed -n '4'p|awk '{print $2}') + [ "$STATUS" == "completed" ] && break + sleep 3 # check status every 3 seconds + done + + echo "--- end to rebuild OI for ${i} files set at: $(date) ---" + local RTIME=$($SHOW_SCRUB | sed -n '18'p | awk '{print $2}') + echo "rebuild OI for ${i} files used ${RTIME} seconds" + local SPEED=$($SHOW_SCRUB | sed -n '19'p | awk '{print $2}') + echo "rebuild speed is ${SPEED}/sec" + stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" + done +} +run_test 0 "OI scrub performance test" + +# cleanup the system at last +scrub_cleanup +complete $(basename $0) $SECONDS +exit_status diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 146746c..c4ce175 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -5,6 +5,7 @@ trap 'print_summary && touch $TF_FAIL && \ set -e #set -x +export EJOURNAL=${EJOURNAL:-""} export REFORMAT=${REFORMAT:-""} export WRITECONF=${WRITECONF:-""} export VERBOSE=${VERBOSE:-false} @@ -396,6 +397,7 @@ load_modules_local() { load_module osc/osc load_module lov/lov load_module mgc/mgc + load_module obdecho/obdecho if ! client_only; then SYMLIST=/proc/kallsyms grep -q crc16 $SYMLIST || { modprobe crc16 2>/dev/null || true; } @@ -5227,3 +5229,91 @@ generate_string() { echo "$(head -c $size < /dev/zero | tr '\0' y)" } + +reformat_external_journal() { + if [ ! -z ${EJOURNAL} ]; then + local rcmd="do_facet ${SINGLEMDS}" + + echo "reformat external journal on ${SINGLEMDS}:${EJOURNAL}" + ${rcmd} mke2fs -O journal_dev ${EJOURNAL} || return 1 + fi +} + +# MDT file-level backup/restore +mds_backup_restore() { + local devname=$(mdsdevname ${SINGLEMDS//mds/}) + local mntpt=$(facet_mntpt brpt) + local rcmd="do_facet ${SINGLEMDS}" + local metaea=${TMP}/backup_restore.ea + local metadata=${TMP}/backup_restore.tgz + + echo "file-level backup/restore on ${SINGLEMDS}:${devname}" + + # step 1: build mount point + ${rcmd} mkdir -p $mntpt + # step 2: cleanup old backup + ${rcmd} rm -f $metaea $metadata + # step 3: mount dev + ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 1 + # step 4: backup metaea + echo "backup EA" + ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" || + return 2 + # step 5: backup metadata + echo "backup data" + ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3 + # step 6: umount + ${rcmd} umount -d $mntpt || return 4 + # step 7: reformat external journal if needed + reformat_external_journal || return 5 + # step 8: reformat dev + echo "reformat new device" + add ${SINGLEMDS} $(mkfs_opts mds) --backfstype ldiskfs --reformat \ + $devname > /dev/null || return 6 + # step 9: mount dev + ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 7 + # step 10: restore metadata + echo "restore data" + ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8 + # step 11: restore metaea + echo "restore EA" + ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9 + # step 12: remove recovery logs + echo "remove recovery logs" + ${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS + # step 13: umount dev + ${rcmd} umount -d $mntpt || return 10 + # step 14: cleanup tmp backup + ${rcmd} rm -f $metaea $metadata +} + +# remove OI files +mds_remove_ois() { + local devname=$(mdsdevname ${SINGLEMDS//mds/}) + local mntpt=$(facet_mntpt brpt) + local rcmd="do_facet ${SINGLEMDS}" + local idx=$1 + + echo "remove OI files: idx=${idx}" + + # step 1: build mount point + ${rcmd} mkdir -p $mntpt + # step 2: mount dev + ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 1 + if [ -z $idx ]; then + # step 3: remove all OI files + ${rcmd} rm -fv $mntpt/oi.16* + elif [ $idx -lt 2 ]; then + ${rcmd} rm -fv $mntpt/oi.16.${idx} + else + local i + + # others, rm oi.16.[idx, idx * idx, idx ** ...] + for ((i=${idx}; i<64; i=$((i * idx)))); do + ${rcmd} rm -fv $mntpt/oi.16.${i} + done + fi + # step 4: umount + ${rcmd} umount -d $mntpt || return 2 + # OI files will be recreated when mounted as lustre next time. +} diff --git a/lustre/tests/test-groups/regression b/lustre/tests/test-groups/regression index 53d9235..bf2c5fe 100644 --- a/lustre/tests/test-groups/regression +++ b/lustre/tests/test-groups/regression @@ -21,3 +21,4 @@ lnet-selftest mmp obdfilter-survey sgpdd-survey +sanity-scrub -- 1.8.3.1