From cc922789d90f9495dc8009eb082392ad41151a3d Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 26 Jun 2013 22:54:31 +0800 Subject: [PATCH] LU-3335 scrub: control OI scrub on OST from user space Not all the OI inconsistency can be detected automatically, such as /O entry lost case. So the OI scrub on OST should can be triggered by the administrator manually. Test-Parameters: testlist=sanity-scrub Signed-off-by: Fan Yong Change-Id: I9b23787b2fb14c8c93642462f9ebac948a181b70 Reviewed-on: http://review.whamcloud.com/6698 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/lfsck/lfsck_lib.c | 3 +-- lustre/ofd/ofd_dev.c | 32 +++++++++++++++++++++++----- lustre/ofd/ofd_obd.c | 19 +++++++++++++++++ lustre/osd-ldiskfs/osd_compat.c | 35 ++++++++++++++++-------------- lustre/tests/sanity-scrub.sh | 38 +++++++++++++++++++++++++++++++++ lustre/utils/lctl.c | 4 ++-- lustre/utils/lustre_lfsck.c | 47 +++++++++++++++++++++++++---------------- 8 files changed, 136 insertions(+), 43 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index b9ec568..c0f5aca 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -265,6 +265,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OSD_FID_MAPPING 0x193 #define OBD_FAIL_OSD_LMA_INCOMPAT 0x194 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 +#define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index fc612fc..4fefa7d 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -1013,8 +1013,7 @@ trigger: lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid; thread_set_flags(thread, 0); - if (lfsck->li_master) - rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck")); + rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck")); if (IS_ERR_VALUE(rc)) { CERROR("%s: cannot start LFSCK thread, rc = %ld\n", lfsck_lfsck2name(lfsck), rc); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index dfe363b..ab9b24c 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "ofd_internal.h" @@ -335,11 +336,12 @@ extern int ost_handle(struct ptlrpc_request *req); static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { - struct ofd_thread_info *info; - struct ofd_device *ofd = ofd_dev(dev); - struct obd_device *obd = ofd_obd(ofd); - struct lu_device *next = &ofd->ofd_osd->dd_lu_dev; - int rc; + struct ofd_thread_info *info; + struct ofd_device *ofd = ofd_dev(dev); + struct obd_device *obd = ofd_obd(ofd); + struct lu_device *next = &ofd->ofd_osd->dd_lu_dev; + struct lfsck_start_param lsp; + int rc; ENTRY; @@ -355,6 +357,24 @@ static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev, /* initialize lower device */ rc = next->ld_ops->ldo_prepare(env, dev, next); + if (rc != 0) + RETURN(rc); + + rc = lfsck_register(env, ofd->ofd_osd, &ofd->ofd_dt_dev, false); + if (rc != 0) { + CERROR("%s: failed to initialize lfsck: rc = %d\n", + obd->obd_name, rc); + RETURN(rc); + } + + lsp.lsp_start = NULL; + lsp.lsp_namespace = ofd->ofd_namespace; + rc = lfsck_start(env, ofd->ofd_osd, &lsp); + if (rc != 0) { + CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", + obd->obd_name, rc); + rc = 0; + } target_recovery_init(&ofd->ofd_lut, ost_handle); LASSERT(obd->obd_no_conn); @@ -756,6 +776,8 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) struct obd_device *obd = ofd_obd(m); struct lu_device *d = &m->ofd_dt_dev.dd_lu_dev; + lfsck_stop(env, m->ofd_osd, true); + lfsck_degister(env, m->ofd_osd); target_recovery_fini(obd); obd_exports_barrier(obd); obd_zombie_barrier(); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 6ff96ec..429023b 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -46,6 +46,7 @@ #include "ofd_internal.h" #include #include +#include static int ofd_export_stats_init(struct ofd_device *ofd, struct obd_export *exp, void *client_nid) @@ -1545,6 +1546,24 @@ int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (rc == 0) rc = dt_ro(&env, ofd->ofd_osd); break; + case OBD_IOC_START_LFSCK: { + struct obd_ioctl_data *data = karg; + struct lfsck_start_param lsp; + + if (unlikely(data == NULL)) { + rc = -EINVAL; + break; + } + + lsp.lsp_start = (struct lfsck_start *)(data->ioc_inlbuf1); + lsp.lsp_namespace = ofd->ofd_namespace; + rc = lfsck_start(&env, ofd->ofd_osd, &lsp); + break; + } + case OBD_IOC_STOP_LFSCK: { + rc = lfsck_stop(&env, ofd->ofd_osd, false); + break; + } case OBD_IOC_GET_OBJ_VERSION: rc = ofd_ioc_get_obj_version(&env, ofd, karg); break; diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c index 2403aa1..f8d65bf 100644 --- a/lustre/osd-ldiskfs/osd_compat.c +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -659,28 +659,31 @@ int osd_obj_add_entry(struct osd_thread_info *info, const struct osd_inode_id *id, struct thandle *th) { - struct osd_thandle *oh; - struct dentry *child; - struct inode *inode; - int rc; + struct osd_thandle *oh; + struct dentry *child; + struct inode *inode; + int rc; - ENTRY; + ENTRY; - oh = container_of(th, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle != NULL); - LASSERT(oh->ot_handle->h_transaction != NULL); + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY)) + RETURN(0); - inode = &info->oti_inode; - inode->i_sb = osd_sb(osd); + oh = container_of(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + + inode = &info->oti_inode; + inode->i_sb = osd_sb(osd); osd_id_to_inode(inode, id); inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */ - child = &info->oti_child_dentry; - child->d_name.hash = 0; - child->d_name.name = name; - child->d_name.len = strlen(name); - child->d_parent = dir; - child->d_inode = inode; + child = &info->oti_child_dentry; + child->d_name.hash = 0; + child->d_name.name = name; + child->d_name.len = strlen(name); + child->d_parent = dir; + child->d_inode = inode; if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY)) inode->i_ino++; diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 4dbf1b1..f8bc0d6 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -45,6 +45,7 @@ MDT_DEV="${FSNAME}-MDT0000" OST_DEV="${FSNAME}-OST0000" MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}" +START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV}" STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_SCRUB="do_facet $SINGLEMDS \ $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" @@ -838,6 +839,43 @@ test_12() { } run_test 12 "OI scrub can rebuild invalid /O entries" +test_13() { + echo "stopall" + stopall > /dev/null + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + + mkdir -p $DIR/$tdir + $SETSTRIPE -c 1 -i 0 $DIR/$tdir + + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 + do_facet ost1 $LCTL set_param fail_loc=0x196 + createmany -o $DIR/$tdir/f 1000 + do_facet ost1 $LCTL set_param fail_loc=0 + + echo "stopall" + stopall > /dev/null + echo "setupall" + setupall > /dev/null + + local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(1) Expect 'init', but got '$STATUS'" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + + $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!" + sleep 3 + local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(4) Expect 'completed', but got '$STATUS'" + + ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" +} +run_test 13 "OI scrub can rebuild missed /O entries" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 00ca97b..74f9ce1 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -351,13 +351,13 @@ command_t cmdlist[] = { /* LFSCK commands */ {"==== LFSCK ====", jt_noop, 0, "LFSCK"}, {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n" - "usage: lfsck_start <-M | --device MDT_device>\n" + "usage: lfsck_start <-M | --device [MDT,OST]_device>\n" " [-e | --error error_handle] [-h | --help]\n" " [-n | --dryrun switch] [-r | --reset]\n" " [-s | --speed speed_limit]\n" " [-t | --type lfsck_type[,lfsck_type...]]"}, {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n" - "usage: lfsck_stop <-M | --device MDT_device> [-h | --help]"}, + "usage: lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]"}, {"==== obsolete (DANGEROUS) ====", jt_noop, 0, "obsolete (DANGEROUS)"}, /* some test scripts still use these */ diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c index 8e2572e..20ea2be 100644 --- a/lustre/utils/lustre_lfsck.c +++ b/lustre/utils/lustre_lfsck.c @@ -70,7 +70,6 @@ struct lfsck_type_name { static struct lfsck_type_name lfsck_types_names[] = { { "layout", 6, LT_LAYOUT }, - { "DNE", 3, LT_DNE }, { "namespace", 9, LT_NAMESPACE}, { 0, 0, 0 } }; @@ -92,13 +91,13 @@ static void usage_start(void) { fprintf(stderr, "Start LFSCK.\n" "SYNOPSIS:\n" - "lfsck_start <-M | --device MDT_device>\n" + "lfsck_start <-M | --device [MDT,OST]_device>\n" " [-e | --error error_handle] [-h | --help]\n" " [-n | --dryrun switch] [-r | --reset]\n" " [-s | --speed speed_limit]\n" " [-t | --type lfsck_type[,lfsck_type...]]\n" "OPTIONS:\n" - "-M: The MDT device to start LFSCK on.\n" + "-M: The device to start LFSCK/scrub on.\n" "-e: Error handle, 'continue'(default) or 'abort'.\n" "-h: Help information.\n" "-n: Check without modification. 'off'(default) or 'on'.\n" @@ -113,9 +112,9 @@ static void usage_stop(void) { fprintf(stderr, "Stop LFSCK.\n" "SYNOPSIS:\n" - "lfsck_stop <-M | --device MDT_device> [-h | --help]\n" + "lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]\n" "OPTIONS:\n" - "-M: The MDT device to stop LFSCK on.\n" + "-M: The device to stop LFSCK/scrub on.\n" "-h: Help information.\n"); } @@ -124,7 +123,7 @@ static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg) int len = strlen(arg) + 1; if (len > MAX_OBD_NAME) { - fprintf(stderr, "MDT device name is too long. " + fprintf(stderr, "device name is too long. " "Valid length should be less than %d\n", MAX_OBD_NAME); return -EINVAL; } @@ -216,8 +215,8 @@ int jt_lfsck_start(int argc, char **argv) if (type == 0) { fprintf(stderr, "Invalid type (%s).\n" "The valid value should be " - "'layout', 'DNE' or " - "'namespace'.\n", str); + "'layout' or 'namespace'.\n", + str); *p = c; return -EINVAL; } @@ -230,7 +229,7 @@ int jt_lfsck_start(int argc, char **argv) if (start.ls_active == 0) { fprintf(stderr, "Miss LFSCK type(s).\n" "The valid value should be " - "'layout', 'DNE' or 'namespace'.\n"); + "'layout' or 'namespace'.\n"); return -EINVAL; } break; @@ -242,9 +241,15 @@ int jt_lfsck_start(int argc, char **argv) } if (data.ioc_inlbuf4 == NULL) { - fprintf(stderr, - "Must sepcify MDT device to start LFSCK.\n"); - return -EINVAL; + if (lcfg_get_devname() != NULL) { + rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); + if (rc != 0) + return rc; + } else { + fprintf(stderr, + "Must specify device to start LFSCK.\n"); + return -EINVAL; + } } data.ioc_inlbuf1 = (char *)&start; @@ -264,9 +269,9 @@ int jt_lfsck_start(int argc, char **argv) obd_ioctl_unpack(&data, buf, sizeof(rawbuf)); if (start.ls_active == 0) { - printf("Started LFSCK on the MDT device %s", device); + printf("Started LFSCK on the device %s", device); } else { - printf("Started LFSCK on the MDT device %s:", device); + printf("Started LFSCK on the device %s:", device); i = 0; while (lfsck_types_names[i].name != NULL) { if (start.ls_active & lfsck_types_names[i].type) { @@ -314,9 +319,15 @@ int jt_lfsck_stop(int argc, char **argv) } if (data.ioc_inlbuf4 == NULL) { - fprintf(stderr, - "Must sepcify MDT device to stop LFSCK.\n"); - return -EINVAL; + if (lcfg_get_devname() != NULL) { + rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); + if (rc != 0) + return rc; + } else { + fprintf(stderr, + "Must sepcify device to stop LFSCK.\n"); + return -EINVAL; + } } memset(buf, 0, sizeof(rawbuf)); @@ -332,6 +343,6 @@ int jt_lfsck_stop(int argc, char **argv) return rc; } - printf("Stopped LFSCK on the MDT device %s.\n", device); + printf("Stopped LFSCK on the device %s.\n", device); return 0; } -- 1.8.3.1