From: Fan Yong Date: Wed, 4 Jul 2012 11:54:00 +0000 (+0800) Subject: LU-957 lfsck: user space tools for LFSCK/scrub X-Git-Tag: 2.2.60~2 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=7cecfcffae9737f929a2cbc8067e093a4f85c3ba LU-957 lfsck: user space tools for LFSCK/scrub Control LFSCK/scrub by lctl commands: 1) lfsck_start: start LFSCK/scrub with specified parameters. 2) lfsck_stop: stop LFSCK/scrub on the specified MDT device. 3) The LFSCK/scrub status can be obtained through some special lproc interface. For example: check OI scrub status by: lctl get_param -n osd-ldiskfs.*.oi_scrub Signed-off-by: Fan Yong Change-Id: I5828c18453c92162fa0dc211324b69d15ecd9fbc Reviewed-on: http://review.whamcloud.com/3170 Tested-by: Maloo Tested-by: Hudson Reviewed-by: Andreas Dilger Reviewed-by: Yu Jian Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre/lustre_lfsck_user.h b/lustre/include/lustre/lustre_lfsck_user.h index cfc2b50..a435e04 100644 --- a/lustre/include/lustre/lustre_lfsck_user.h +++ b/lustre/include/lustre/lustre_lfsck_user.h @@ -44,15 +44,6 @@ enum lfsck_param_flags { LPF_DRYRUN = 0x0004, }; -enum lfsck_method { - /* Object table based iteration, depends on backend filesystem. - * For ldiskfs, it is inode table based iteration. */ - LM_OTABLE = 1, - - /* Namespace based scanning. NOT support yet. */ - LM_NAMESPACE = 2, -}; - enum lfsck_type { /* For MDT-OST consistency check/repair. */ LT_LAYOUT = 0x0001, @@ -61,7 +52,7 @@ enum lfsck_type { LT_DNE = 0x0002, }; -#define LFSCK_VERSION_V1 10 +#define LFSCK_VERSION_V1 1 #define LFSCK_TYPES_ALL ((__u16)(~0)) #define LFSCK_TYPES_DEF ((__u16)0) @@ -71,9 +62,8 @@ enum lfsck_type { enum lfsck_start_valid { LSV_SPEED_LIMIT = 0x00000001, - LSV_METHOD = 0x00000002, - LSV_ERROR_HANDLE = 0x00000004, - LSV_DRYRUN = 0x00000008, + LSV_ERROR_HANDLE = 0x00000002, + LSV_DRYRUN = 0x00000004, }; /* Arguments for starting lfsck. */ @@ -81,6 +71,9 @@ struct lfsck_start { /* Which arguments are valid, see 'enum lfsck_start_valid'. */ __u32 ls_valid; + /* How many items can be scanned at most per second. */ + __u32 ls_speed_limit; + /* For compatibility between user space tools and kernel service. */ __u16 ls_version; @@ -90,11 +83,8 @@ struct lfsck_start { /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */ __u16 ls_flags; - /* Object iteration method, see 'enum lfsck_method'. */ - __u16 ls_method; - - /* How many items can be scanned at most per second. */ - __u32 ls_speed_limit; + /* For 64-bits aligned. */ + __u16 ls_padding; }; #endif /* _LUSTRE_LFSCK_USER_H */ diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 7b19c51..83c9bf3 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -564,6 +564,10 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data) #define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data) + +#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE) +#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE) + /* XXX _IOWR('f', 250, long) has been defined in * libcfs/include/libcfs/libcfs_private.h for debug, don't use it */ diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 0241d9e..154225b 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -1551,6 +1551,20 @@ static int mdd_iocontrol(const struct lu_env *env, struct md_device *m, *mntopts = mdd->mdd_dt_conf.ddp_mntopts; RETURN(0); } + case OBD_IOC_START_LFSCK: { + struct lfsck_start *start = karg; + struct md_lfsck *lfsck = &mdd->mdd_lfsck; + + /* Return the kernel service version. */ + /* XXX: version can be used for compatibility in the future. */ + start->ls_version = lfsck->ml_version; + rc = mdd_lfsck_start(env, lfsck, start); + RETURN(rc); + } + case OBD_IOC_STOP_LFSCK: { + rc = mdd_lfsck_stop(env, &mdd->mdd_lfsck); + RETURN(rc); + } } /* Below ioctls use obd_ioctl_data */ diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 192404f..28e8758 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -118,6 +118,7 @@ struct md_lfsck { /* Sleep N jiffies for each schedule. */ __u32 ml_sleep_jif; __u16 ml_version; + unsigned int ml_paused:1; /* The lfsck is paused. */ }; struct mdd_device { diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index c0c6400..fc2c469 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -166,8 +166,29 @@ static int mdd_lfsck_main(void *args) GOTO(out, rc); out: - CDEBUG(D_LFSCK, "LFSCK: iteration stop: pos = %s, rc = %d\n", - (char *)iops->key(&env, di), rc); + if (lfsck->ml_paused) { + /* XXX: It is hack here: if the lfsck is still running when MDS + * umounts, it should be restarted automatically after MDS + * remounts up. + * + * To support that, we need to record the lfsck status in + * the lfsck on-disk bookmark file. But now, there is not + * lfsck component under the lfsck framework. To avoid to + * introduce nunecessary bookmark incompatibility issues, + * we write nothing to the lfsck bookmark file now. + * + * Instead, we will reuse dt_it_ops::put() method to notify + * low layer iterator to process such case. + * + * It is just temporary solution, and will be replaced when + * some lfsck component is introduced in the future. */ + iops->put(&env, di); + CDEBUG(D_LFSCK, "LFSCK: iteration pasued: pos = %s, rc = %d\n", + (char *)iops->key(&env, di), rc); + } else { + CDEBUG(D_LFSCK, "LFSCK: iteration stop: pos = %s, rc = %d\n", + (char *)iops->key(&env, di), rc); + } iops->fini(&env, di); fini_env: @@ -203,11 +224,6 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, if (start->ls_valid & LSV_SPEED_LIMIT) mdd_lfsck_set_speed(lfsck, start->ls_speed_limit); - if (start->ls_valid & LSV_METHOD && start->ls_method != LM_OTABLE) { - cfs_mutex_unlock(&lfsck->ml_mutex); - RETURN(-EOPNOTSUPP); - } - if (start->ls_valid & LSV_ERROR_HANDLE) { valid |= DOIV_ERROR_HANDLE; if (start->ls_flags & LPF_FAILOUT) @@ -279,6 +295,7 @@ int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd) struct dt_object *obj; int rc; + memset(lfsck, 0, sizeof(*lfsck)); lfsck->ml_version = LFSCK_VERSION_V1; cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq); cfs_mutex_init(&lfsck->ml_mutex); @@ -311,6 +328,7 @@ void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd) struct md_lfsck *lfsck = &mdd->mdd_lfsck; if (lfsck->ml_it_obj != NULL) { + lfsck->ml_paused = 1; mdd_lfsck_stop(env, lfsck); lu_object_put(env, &lfsck->ml_it_obj->do_lu); lfsck->ml_it_obj = NULL; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index a39ce6e..7ab077e 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5746,6 +5746,21 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_CHANGELOG_CLEAR: rc = mdt_ioc_child(&env, mdt, cmd, len, karg); break; + case OBD_IOC_START_LFSCK: + case OBD_IOC_STOP_LFSCK: { + struct md_device *next = mdt->mdt_child; + struct obd_ioctl_data *data = karg; + + if (unlikely(data == NULL)) { + rc = -EINVAL; + break; + } + + rc = next->md_ops->mdo_iocontrol(&env, next, cmd, + data->ioc_inllen1, + data->ioc_inlbuf1); + break; + } case OBD_IOC_GET_OBJ_VERSION: { struct mdt_thread_info *mti; mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key); diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 2f385d3..e24e54a 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -475,7 +475,10 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) sf->sf_time_last_complete = sf->sf_time_last_checkpoint; sf->sf_success_count++; } else if (result == 0) { - sf->sf_status = SS_PAUSED; + if (scrub->os_paused) + sf->sf_status = SS_PAUSED; + else + sf->sf_status = SS_STOPPED; } else { sf->sf_status = SS_FAILED; } @@ -979,6 +982,7 @@ static void osd_scrub_stop(struct osd_device *dev) { /* od_otable_mutex: prevent curcurrent start/stop */ cfs_mutex_lock(&dev->od_otable_mutex); + dev->od_scrub.os_paused = 1; do_osd_scrub_stop(&dev->od_scrub); cfs_mutex_unlock(&dev->od_otable_mutex); } @@ -1002,6 +1006,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) int rc = 0; ENTRY; + memset(scrub, 0, sizeof(*scrub)); OBD_SET_CTXT_MAGIC(ctxt); ctxt->pwdmnt = dev->od_mnt; ctxt->pwd = dev->od_mnt->mnt_root; @@ -1077,7 +1082,8 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) } if (rc == 0 && !scrub->os_no_scrub && - ((sf->sf_status == SS_CRASHED && + ((sf->sf_status == SS_PAUSED) || + (sf->sf_status == SS_CRASHED && sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_AUTO)) || (sf->sf_status == SS_INIT && sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT)))) @@ -1173,6 +1179,19 @@ static void osd_otable_it_fini(const struct lu_env *env, struct dt_it *di) } /** + * XXX: Temporary used to notify otable iteration to be paused. + */ +static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di) +{ + struct osd_device *dev = ((struct osd_otable_it *)di)->ooi_dev; + + /* od_otable_mutex: prevent curcurrent init/fini */ + cfs_mutex_lock(&dev->od_otable_mutex); + dev->od_scrub.os_paused = 1; + cfs_mutex_unlock(&dev->od_otable_mutex); +} + +/** * Set the OSD layer iteration start position as the specified key. * * The LFSCK out of OSD layer does not know the detail of the key, so if there @@ -1331,6 +1350,7 @@ const struct dt_index_operations osd_otable_ops = { .dio_it = { .init = osd_otable_it_init, .fini = osd_otable_it_fini, + .put = osd_otable_it_put, .get = osd_otable_it_get, .next = osd_otable_it_next, .key = osd_otable_it_key, @@ -1400,6 +1420,7 @@ static const char *scrub_status_names[] = { "scanning", "completed", "failed", + "stopped", "paused", "crashed", NULL diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index 5c9df6d..353c590 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -55,10 +55,13 @@ enum scrub_status { SS_FAILED = 3, /* The scrub is stopped manually, the OI files may be inconsistent. */ - SS_PAUSED = 4, + SS_STOPPED = 4, + + /* The scrub is paused automatically when umount. */ + SS_PAUSED = 5, /* The scrub crashed during the scanning, should be restarted. */ - SS_CRASHED = 5, + SS_CRASHED = 6, }; enum scrub_flags { @@ -189,7 +192,8 @@ struct osd_scrub { * found by RPC prior */ os_waiting:1, /* Waiting for scan window. */ os_full_speed:1, /* run w/o speed limit */ - os_no_scrub:1; /* NOT auto trigger OI scrub*/ + os_no_scrub:1, /* NOT auto trigger OI scrub*/ + os_paused:1; /* The scrub is paused. */ }; #endif /* _OSD_SCRUB_H */ diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 4d1c114..aa32eb7 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -33,8 +33,12 @@ build_test_filter MDT_DEV="${FSNAME}-MDT0000" MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) +START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}" +STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_SCRUB="do_facet $SINGLEMDS \ $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" +MOUNT_OPTS_SCRUB="-o user_xattr" +MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub" scrub_prep() { local nfiles=$1 @@ -59,14 +63,14 @@ scrub_prep() { test_0() { scrub_prep 0 echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(1) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(2) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" mount_client $MOUNT || error "(4) Fail to start client!" @@ -81,11 +85,11 @@ test_1a() { mds_remove_ois || error "(1) Fail to remove/recreate!" echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(2) Fail to start MDS!" sleep 3 - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(3) Expect 'completed', but got '$STATUS'" @@ -96,16 +100,51 @@ test_1a() { } run_test 1a "Trigger OI scrub when MDT mounts for OI files remove/recreate case" +test_1b() { + local index + + # OI files to be removed: + # idx 0: oi.16.0 + # idx 1: oi.16.1 + # idx 2: oi.16.{2,4,8,16,32} + # idx 3: oi.16.{3,9,27} + # idx 5: oi.16.{5,25} + # idx 7: oi.16.{7,49} + for index in 0 1 2 3 5 7; do + scrub_prep 0 + mds_remove_ois ${index} || error "(1) Fail to remove/recreate!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > \ + /dev/null || error "(2) Fail to start MDS!" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "recreated" ] || + error "(3) Expect 'recreated', but got '$STATUS'" + + $START_SCRUB || error "(4) Fail to start OI scrub!" + sleep 3 + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(5) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || + error "(6) Expect empty flags, but got '$FLAGS'" + done +} +run_test 1b "Auto detect kinds of OI file(s) removed/recreated cases" + test_2() { scrub_prep 0 mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(2) Fail to start MDS!" sleep 3 - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(3) Expect 'completed', but got '$STATUS'" @@ -121,14 +160,14 @@ test_3() { mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(3) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(4) Expect 'inconsistent', but got '$FLAGS'" echo "stopall" @@ -141,14 +180,14 @@ test_4() { mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(3) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(4) Expect 'inconsistent', but got '$FLAGS'" @@ -160,7 +199,7 @@ test_4() { error "(6) File diff failed unexpected!" sleep 3 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(7) Expect 'completed', but got '$STATUS'" } @@ -171,14 +210,14 @@ test_5() { mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(3) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(4) Expect 'inconsistent', but got '$FLAGS'" @@ -186,7 +225,7 @@ test_5() { do_facet $SINGLEMDS \ $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || @@ -194,65 +233,65 @@ test_5() { umount_client $MOUNT || error "(7) Fail to stop client!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(8) Expect 'scanning', but got '$STATUS'" -#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000191 sleep 4 echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(9) Fail to stop MDS!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(10) Fail to start MDS!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "crashed" ] || error "(11) Expect 'crashed', but got '$STATUS'" echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(12) Fail to stop MDS!" -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(13) Fail to start MDS!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(14) Expect 'scanning', but got '$STATUS'" -#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000192 sleep 4 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "failed" ] || error "(15) Expect 'failed', but got '$STATUS'" mount_client $MOUNT || error "(16) Fail to start client!" -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 stat $DIR/$tdir/${tfile}1000 || error "(17) Fail to stat $DIR/$tdir/${tfile}1000!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(18) Expect 'scanning', but got '$STATUS'" do_facet $SINGLEMDS $LCTL set_param fail_loc=0 do_facet $SINGLEMDS $LCTL set_param fail_val=0 sleep 5 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(19) Expect 'completed', but got '$STATUS'" - FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(20) Expect empty flags, but got '$FLAGS'" } run_test 5 "OI scrub state machine" @@ -262,14 +301,14 @@ test_6() { mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(3) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(4) Expect 'inconsistent', but got '$FLAGS'" @@ -277,64 +316,68 @@ test_6() { do_facet $SINGLEMDS \ $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || error "(6) File diff failed unexpected!" - # Fail the OI scrub to guarantee there is at least on checkpoint -#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + sleep 8 + # Fail the OI scrub to guarantee there is at least one checkpoint + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000192 sleep 4 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "failed" ] || error "(7) Expect 'failed', but got '$STATUS'" -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + # stat will re-trigger OI scrub stat $DIR/$tdir/${tfile}800 || error "(8) Fail to stat $DIR/$tdir/${tfile}800!" umount_client $MOUNT || error "(9) Fail to stop client!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(10) Expect 'scanning', but got '$STATUS'" -#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80000191 sleep 4 - local POSITION0=$($SHOW_SCRUB | sed -n '11'p | awk '{print $2}') + local POSITION0=$($SHOW_SCRUB | \ + awk '/^last_checkpoint_position/ {print $2}') POSITION0=$((POSITION0 + 1)) echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(11) Fail to stop MDS!" -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(12) Fail to start MDS!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(13) Expect 'scanning', but got '$STATUS'" - local POSITION1=$($SHOW_SCRUB | sed -n '10'p |awk '{print $2}') + local POSITION1=$($SHOW_SCRUB | \ + awk '/^latest_start_position/ {print $2}') [ $POSITION0 -eq $POSITION1 ] || error "(14) Expect position: $POSITION0, but got $POSITION1" do_facet $SINGLEMDS $LCTL set_param fail_loc=0 do_facet $SINGLEMDS $LCTL set_param fail_val=0 sleep 5 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(15) Expect 'completed', but got '$STATUS'" - FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(16) Expect empty flags, but got '$FLAGS'" } run_test 6 "OI scrub resumes from last checkpoint" @@ -344,14 +387,14 @@ test_7() { mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MDS_MOUNT_OPTS,noscrub > /dev/null || + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(3) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(4) Expect 'inconsistent', but got '$FLAGS'" @@ -359,7 +402,7 @@ test_7() { do_facet $SINGLEMDS \ $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_facet $SINGLEMDS $LCTL set_param fail_val=3 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || @@ -368,26 +411,265 @@ test_7() { stat $DIR/$tdir/${tfile}300 || error "(7) Fail to stat $DIR/$tdir/${tfile}300!" - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning" ] || error "(8) Expect 'scanning', but got '$STATUS'" - FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent,auto" ] || error "(9) Expect 'inconsistent,auto', but got '$FLAGS'" do_facet $SINGLEMDS $LCTL set_param fail_loc=0 do_facet $SINGLEMDS $LCTL set_param fail_val=0 sleep 5 - STATUS=$($SHOW_SCRUB | sed -n '4'p | awk '{print $2}') + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] || error "(10) Expect 'completed', but got '$STATUS'" - FLAGS=$($SHOW_SCRUB | sed -n '5'p | awk '{print $2}') + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(11) Expect empty flags, but got '$FLAGS'" } run_test 7 "System is available during OI scrub scanning" +test_8() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + $START_SCRUB || error "(5) Fail to start OI scrub!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(6) Expect 'scanning', but got '$STATUS'" + + $STOP_SCRUB || error "(7) Fail to stop OI scrub!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "stopped" ] || + error "(8) Expect 'stopped', but got '$STATUS'" + + $START_SCRUB || error "(9) Fail to start OI scrub!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(10) Expect 'scanning', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(11) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(12) Expect empty flags, but got '$FLAGS'" +} +run_test 8 "Control OI scrub manually" + +test_9() { + scrub_prep 8000 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + # OI scrub should run with full speed under inconsistent case + $START_SCRUB -s 100 || error "(5) Fail to start OI scrub!" + + sleep 10 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(6) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(7) Expect empty flags, but got '$FLAGS'" + + # OI scrub should run with limited speed under non-inconsistent case + $START_SCRUB -s 100 -r || error "(8) Fail to start OI scrub!" + + sleep 10 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(9) Expect 'scanning', but got '$STATUS'" + + # Do NOT ignore that there are 1024 pre-fetched items. + # So the max speed may be (1024 + 100 * 10) / 10. + # And there may be time error, so the max speed may be more large. + local SPEED=$($SHOW_SCRUB | awk '/^average_speed/ { print $2 }') + [ $SPEED -gt 220 ] && + error "(10) Unexpected speed $SPEED, should not more than 220" + + # adjust speed limit + do_facet $SINGLEMDS \ + $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 300 + sleep 10 + + SPEED=$($SHOW_SCRUB | awk '/^average_speed/ { print $2 }') + [ $SPEED -lt 220 ] && + error "(11) Unexpected speed $SPEED, should not less than 220" + + # (1024 + 100 * 10 + 300 * 10) / 20 + [ $SPEED -gt 270 ] && + error "(12) Unexpected speed $SPEED, should not more than 270" + + do_facet $SINGLEMDS \ + $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 + sleep 6 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(13) Expect 'completed', but got '$STATUS'" +} +run_test 9 "OI scrub speed control" + +test_10a() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + mount_client $MOUNT || error "(5) Fail to start client!" + + do_facet $SINGLEMDS \ + $LCTL set_param -n osd-ldiskfs.${MDT_DEV}.auto_scrub 1 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + diff -q $LUSTRE/tests/test-framework.sh $DIR/$tdir/test-framework.sh || + error "(6) File diff failed unexpected!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(7) Expect 'scanning', but got '$STATUS'" + + umount_client $MOUNT || error "(8) Fail to stop client!" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(9) Fail to stop MDS!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(10) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "paused" ] || + error "(11) Expect 'paused', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(12) Fail to stop MDS!" + + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(13) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(14) Expect 'scanning', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(15) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(16) Expect empty flags, but got '$FLAGS'" +} +run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" + +test_10b() { + scrub_prep 0 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x190 + + $START_SCRUB || error "(5) Fail to start OI scrub!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(6) Expect 'scanning', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(7) Fail to stop MDS!" + + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(8) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "paused" ] || + error "(9) Expect 'paused', but got '$STATUS'" + + echo "stop $SINGLEMDS" + stop $SINGLEMDS > /dev/null || error "(10) Fail to stop MDS!" + + echo "start $SINGLEMDS without disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(11) Fail to start MDS!" + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning" ] || + error "(12) Expect 'scanning', but got '$STATUS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 5 + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(13) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(14) Expect empty flags, but got '$FLAGS'" +} +run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" + # restore the ${facet}_MKFS_OPTS variables for facet in MGS MDS OST; do opts=SAVED_${facet}_MKFS_OPTS diff --git a/lustre/tests/scrub-performance.sh b/lustre/tests/scrub-performance.sh index 79f166d..8c9b675 100644 --- a/lustre/tests/scrub-performance.sh +++ b/lustre/tests/scrub-performance.sh @@ -39,7 +39,7 @@ fi stopall do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal -add ${SINGLEMDS} $(mkfs_opts mds) --backfstype ldiskfs --reformat \ +add $SINGLEMDS $(mkfs_opts $SINGLEMDS) --backfstype ldiskfs --reformat \ $MDT_DEVNAME > /dev/null || exit 2 scrub_attach() { @@ -155,15 +155,16 @@ test_0() { error "Fail to start MDS!" while true; do - local STATUS=$($SHOW_SCRUB|sed -n '4'p|awk '{print $2}') + local STATUS=$($SHOW_SCRUB | \ + awk '/^status/ { print $2 }') [ "$STATUS" == "completed" ] && break sleep 3 # check status every 3 seconds done echo "--- end to rebuild OI for ${i} files set at: $(date) ---" - local RTIME=$($SHOW_SCRUB | sed -n '18'p | awk '{print $2}') + local RTIME=$($SHOW_SCRUB | awk '/^run_time/ { print $2 }') echo "rebuild OI for ${i} files used ${RTIME} seconds" - local SPEED=$($SHOW_SCRUB | sed -n '19'p | awk '{print $2}') + local SPEED=$($SHOW_SCRUB | awk '/^average_speed/ { print $2 }') echo "rebuild speed is ${SPEED}/sec" stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!" done diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index c4ce175..a1fe7ef 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -5246,6 +5246,11 @@ mds_backup_restore() { local rcmd="do_facet ${SINGLEMDS}" local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz + local opts=${MDS_MOUNT_OPTS} + + if ! ${rcmd} test -b ${devname}; then + opts=$(csa_add "$opts" -o loop) + fi echo "file-level backup/restore on ${SINGLEMDS}:${devname}" @@ -5254,7 +5259,7 @@ mds_backup_restore() { # step 2: cleanup old backup ${rcmd} rm -f $metaea $metadata # step 3: mount dev - ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 1 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1 # step 4: backup metaea echo "backup EA" ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" || @@ -5268,10 +5273,10 @@ mds_backup_restore() { reformat_external_journal || return 5 # step 8: reformat dev echo "reformat new device" - add ${SINGLEMDS} $(mkfs_opts mds) --backfstype ldiskfs --reformat \ - $devname > /dev/null || return 6 + add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS}) --backfstype ldiskfs \ + --reformat $devname > /dev/null || return 6 # step 9: mount dev - ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 7 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7 # step 10: restore metadata echo "restore data" ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8 @@ -5293,13 +5298,18 @@ mds_remove_ois() { local mntpt=$(facet_mntpt brpt) local rcmd="do_facet ${SINGLEMDS}" local idx=$1 + local opts=${MDS_MOUNT_OPTS} + + if ! ${rcmd} test -b ${devname}; then + opts=$(csa_add "$opts" -o loop) + fi echo "remove OI files: idx=${idx}" # step 1: build mount point ${rcmd} mkdir -p $mntpt # step 2: mount dev - ${rcmd} mount -t ldiskfs $MDS_MOUNT_OPTS $devname $mntpt || return 1 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1 if [ -z $idx ]; then # step 3: remove all OI files ${rcmd} rm -fv $mntpt/oi.16* diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 350acd1..c44012b 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -33,7 +33,7 @@ endif # UTILS lib_LIBRARIES = liblustreapi.a libiam.a -lctl_SOURCES = obd.c lustre_cfg.c lctl.c obdctl.h +lctl_SOURCES = lustre_lfsck.c obd.c lustre_cfg.c lctl.c obdctl.h lctl_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) $(PTHREAD_LIBS) lctl_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 78e8f4a..08b2f95 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -344,6 +344,17 @@ command_t cmdlist[] = { "get the version of an object on servers\n" "usage: getobjversion "}, + /* LFSCK commands */ + {"==== LFSCK ====", jt_noop, 0, "LFSCK"}, + {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n" + "usage: lfsck_start <-M | --device MDT_device>\n" + " [-e | --error error_handle] [-h | --help]\n" + " [-n | --dryrun switch] [-r | --reset]\n" + " [-s | --speed speed_limit]\n" + " [-t | --type lfsck_type[,lfsck_type...]]"}, + {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n" + "usage: lfsck_stop <-M | --device MDT_device> [-h | --help]"}, + {"==== obsolete (DANGEROUS) ====", jt_noop, 0, "obsolete (DANGEROUS)"}, /* some test scripts still use these */ {"cfg_device", jt_obd_device, 0, diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c new file mode 100644 index 0000000..2626b3f --- /dev/null +++ b/lustre/utils/lustre_lfsck.c @@ -0,0 +1,317 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011 Whamcloud, Inc. + */ +/* + * lustre/utils/lustre_lfsck.c + * + * Lustre user-space tools for LFSCK. + * + * Author: Fan Yong + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "obdctl.h" + +#include +#include +#include +#include + +static struct option long_opt_start[] = { + {"device", required_argument, 0, 'M'}, + {"error", required_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"dryrun", required_argument, 0, 'n'}, + {"reset", no_argument, 0, 'r'}, + {"speed", required_argument, 0, 's'}, + {"type", required_argument, 0, 't'}, + {0, 0, 0, 0} +}; + +static struct option long_opt_stop[] = { + {"device", required_argument, 0, 'M'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} +}; + +struct lfsck_types_names { + char *name; + __u16 type; +}; + +static struct lfsck_types_names lfsck_types_names[3] = { + { "layout", LT_LAYOUT }, + { "DNE", LT_DNE }, + { 0, 0 } +}; + +static void usage_start(void) +{ + fprintf(stderr, "Start LFSCK.\n" + "SYNOPSIS:\n" + "lfsck_start <-M | --device MDT_device>\n" + " [-e | --error error_handle] [-h | --help]\n" + " [-n | --dryrun switch] [-r | --reset]\n" + " [-s | --speed speed_limit]\n" + " [-t | --type lfsck_type[,lfsck_type...]]\n" + "OPTIONS:\n" + "-M: The MDT device to start LFSCK on.\n" + "-e: Error handle, 'continue'(default) or 'abort'.\n" + "-h: Help information.\n" + "-n: Check without modification. 'off'(default) or 'on'.\n" + "-r: Reset scanning start position to the device beginning.\n" + "-s: How many items can be scanned at most per second. " + "'%d' means no limit (default).\n" + "-t: The LFSCK type(s) to be started.\n", + LFSCK_SPEED_NO_LIMIT); +} + +static void usage_stop(void) +{ + fprintf(stderr, "Stop LFSCK.\n" + "SYNOPSIS:\n" + "lfsck_stop <-M | --device MDT_device> [-h | --help]\n" + "OPTIONS:\n" + "-M: The MDT device to stop LFSCK on.\n" + "-h: Help information.\n"); +} + +int jt_lfsck_start(int argc, char **argv) +{ + struct obd_ioctl_data data; + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + char device[MAX_OBD_NAME]; + struct lfsck_start start; + char *optstring = "M:e:hi:n:rs:t:"; + int opt, index, rc, val, i; + + memset(&data, 0, sizeof(data)); + memset(&start, 0, sizeof(start)); + start.ls_version = LFSCK_VERSION_V1; + start.ls_active = LFSCK_TYPES_DEF; + while ((opt = getopt_long(argc, argv, optstring, long_opt_start, + &index)) != EOF) { + switch (opt) { + case 'M': + data.ioc_inllen4 = strlen(optarg) + 1; + if (data.ioc_inllen4 > MAX_OBD_NAME) { + fprintf(stderr, "MDT device name is too long. " + "Valid length should be less than %d\n", + MAX_OBD_NAME); + return -EINVAL; + } + + data.ioc_inlbuf4 = optarg; + data.ioc_dev = OBD_DEV_BY_DEVNAME; + break; + case 'e': + if (strcmp(optarg, "abort") == 0) { + start.ls_flags |= LPF_FAILOUT; + } else if (strcmp(optarg, "continue") != 0) { + fprintf(stderr, "Invalid error handler: %s. " + "The valid value should be: 'continue'" + "(default) or 'abort'.\n", optarg); + return -EINVAL; + } + start.ls_valid |= LSV_ERROR_HANDLE; + break; + case 'h': + usage_start(); + return 0; + case 'n': + if (strcmp(optarg, "on") == 0) { + start.ls_flags |= LPF_DRYRUN; + } else if (strcmp(optarg, "off") != 0) { + fprintf(stderr, "Invalid dryrun switch: %s. " + "The valid value shou be: 'off'" + "(default) or 'on'\n", optarg); + return -EINVAL; + } + start.ls_valid |= LSV_DRYRUN; + break; + case 'r': + start.ls_flags |= LPF_RESET; + break; + case 's': + val = atoi(optarg); + start.ls_speed_limit = val; + start.ls_valid |= LSV_SPEED_LIMIT; + break; + case 't': { + char *str = optarg, *p, c; + + start.ls_active = 0; + while (*str) { + while (*str == ' ' || *str == ',') + str++; + + if (*str == 0) + break; + + p = str; + while (*p != 0 && *p != ' ' && *p != ',') + p++; + + c = *p; + *p = 0; + for (i = 0; i < 3; i++) { + if (strcmp(str, + lfsck_types_names[i].name) + == 0) { + start.ls_active |= + lfsck_types_names[i].type; + break; + } + } + *p = c; + str = p; + + if (i >= 3 ) { + fprintf(stderr, "Invalid LFSCK type.\n" + "The valid value should be " + "'layout' or 'DNE'.\n"); + return -EINVAL; + } + } + if (start.ls_active == 0) { + fprintf(stderr, "Miss LFSCK type(s).\n" + "The valid value should be " + "'layout' or 'DNE'.\n"); + return -EINVAL; + } + break; + } + default: + fprintf(stderr, "Invalid option, '-h' for help.\n"); + return -EINVAL; + } + } + + if (data.ioc_inlbuf4 == NULL) { + fprintf(stderr, + "Must sepcify MDT device to start LFSCK.\n"); + return -EINVAL; + } + + memset(device, 0, MAX_OBD_NAME); + memcpy(device, data.ioc_inlbuf4, data.ioc_inllen4); + data.ioc_inlbuf4 = device; + data.ioc_inlbuf1 = (char *)&start; + data.ioc_inllen1 = sizeof(start); + memset(buf, 0, sizeof(rawbuf)); + rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc); + return rc; + } + + rc = l_ioctl(OBD_DEV_ID, OBD_IOC_START_LFSCK, buf); + if (rc < 0) { + perror("Fail to start LFSCK"); + return rc; + } + + obd_ioctl_unpack(&data, buf, sizeof(rawbuf)); + printf("Started LFSCK on the MDT device %s:", device); + if (start.ls_active == 0) { + printf(" noop"); + } else { + for (i = 0; i < 2; i++) { + if (start.ls_active & lfsck_types_names[i].type) { + printf(" %s", lfsck_types_names[i].name); + start.ls_active &= ~lfsck_types_names[i].type; + } + } + if (start.ls_active != 0) + printf(" unknown(0x%x)", start.ls_active); + } + printf("\n"); + return 0; +} + +int jt_lfsck_stop(int argc, char **argv) +{ + struct obd_ioctl_data data; + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + char device[MAX_OBD_NAME]; + char *optstring = "M:h"; + int opt, index, rc; + + memset(&data, 0, sizeof(data)); + while ((opt = getopt_long(argc, argv, optstring, long_opt_stop, + &index)) != EOF) { + switch (opt) { + case 'M': + data.ioc_inllen4 = strlen(optarg) + 1; + if (data.ioc_inllen4 > MAX_OBD_NAME) { + fprintf(stderr, "MDT device name is too long. " + "Valid length should be less than %d\n", + MAX_OBD_NAME); + return -EINVAL; + } + + data.ioc_inlbuf4 = optarg; + data.ioc_dev = OBD_DEV_BY_DEVNAME; + break; + case 'h': + usage_stop(); + return 0; + default: + fprintf(stderr, "Invalid option, '-h' for help.\n"); + return -EINVAL; + } + } + + if (data.ioc_inlbuf4 == NULL) { + fprintf(stderr, + "Must sepcify MDT device to stop LFSCK.\n"); + return -EINVAL; + } + + memset(device, 0, MAX_OBD_NAME); + memcpy(device, data.ioc_inlbuf4, data.ioc_inllen4); + data.ioc_inlbuf4 = device; + memset(buf, 0, sizeof(rawbuf)); + rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc); + return rc; + } + + rc = l_ioctl(OBD_DEV_ID, OBD_IOC_STOP_LFSCK, buf); + if (rc < 0) { + perror("Fail to stop LFSCK"); + return rc; + } + + printf("Stopped LFSCK on the MDT device %s.\n", device); + return 0; +} diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index f4c9029..f8c3053 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -108,8 +108,6 @@ const int thread = 0; const int nthreads = 1; #endif -#define MAX_IOC_BUFLEN 8192 - static int cur_device = -1; struct lov_oinfo lov_oinfos[LOV_MAX_STRIPE_COUNT]; diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index 3de5d1a..e186f75 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -46,6 +46,8 @@ #include #include +#define MAX_IOC_BUFLEN 8192 + /* obd.c */ int do_disconnect(char *func, int verbose); int obd_initialize(int argc, char **argv); @@ -133,4 +135,8 @@ int jt_pool_cmd(int argc, char **argv); int jt_changelog_register(int argc, char **argv); int jt_changelog_deregister(int argc, char **argv); +/* lustre_lfsck.c */ +int jt_lfsck_start(int argc, char **argv); +int jt_lfsck_stop(int argc, char **argv); + #endif