Whamcloud - gitweb
LU-3335 scrub: control OI scrub on OST from user space 98/6698/12
authorFan Yong <fan.yong@intel.com>
Wed, 26 Jun 2013 14:54:31 +0000 (22:54 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 26 Jul 2013 05:43:17 +0000 (05:43 +0000)
Not all the OI inconsistency can be detected automatically, such as /O
entry lost case. So the OI scrub on OST should can be triggered by the
administrator manually.

Test-Parameters: testlist=sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I9b23787b2fb14c8c93642462f9ebac948a181b70
Reviewed-on: http://review.whamcloud.com/6698
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/lfsck/lfsck_lib.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_obd.c
lustre/osd-ldiskfs/osd_compat.c
lustre/tests/sanity-scrub.sh
lustre/utils/lctl.c
lustre/utils/lustre_lfsck.c

index b9ec568..c0f5aca 100644 (file)
@@ -265,6 +265,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OSD_FID_MAPPING                       0x193
 #define OBD_FAIL_OSD_LMA_INCOMPAT                      0x194
 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY              0x195
+#define OBD_FAIL_OSD_COMPAT_NO_ENTRY                   0x196
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
index fc612fc..4fefa7d 100644 (file)
@@ -1013,8 +1013,7 @@ trigger:
 
        lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
        thread_set_flags(thread, 0);
-       if (lfsck->li_master)
-               rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck"));
+       rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck"));
        if (IS_ERR_VALUE(rc)) {
                CERROR("%s: cannot start LFSCK thread, rc = %ld\n",
                       lfsck_lfsck2name(lfsck), rc);
index dfe363b..ab9b24c 100644 (file)
@@ -45,6 +45,7 @@
 #include <obd_class.h>
 #include <lustre_param.h>
 #include <lustre_fid.h>
+#include <lustre_lfsck.h>
 
 #include "ofd_internal.h"
 
@@ -335,11 +336,12 @@ extern int ost_handle(struct ptlrpc_request *req);
 static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev,
                       struct lu_device *dev)
 {
-       struct ofd_thread_info *info;
-       struct ofd_device       *ofd = ofd_dev(dev);
-       struct obd_device       *obd = ofd_obd(ofd);
-       struct lu_device        *next = &ofd->ofd_osd->dd_lu_dev;
-       int                      rc;
+       struct ofd_thread_info          *info;
+       struct ofd_device               *ofd = ofd_dev(dev);
+       struct obd_device               *obd = ofd_obd(ofd);
+       struct lu_device                *next = &ofd->ofd_osd->dd_lu_dev;
+       struct lfsck_start_param         lsp;
+       int                              rc;
 
        ENTRY;
 
@@ -355,6 +357,24 @@ static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev,
 
        /* initialize lower device */
        rc = next->ld_ops->ldo_prepare(env, dev, next);
+       if (rc != 0)
+               RETURN(rc);
+
+       rc = lfsck_register(env, ofd->ofd_osd, &ofd->ofd_dt_dev, false);
+       if (rc != 0) {
+               CERROR("%s: failed to initialize lfsck: rc = %d\n",
+                      obd->obd_name, rc);
+               RETURN(rc);
+       }
+
+       lsp.lsp_start = NULL;
+       lsp.lsp_namespace = ofd->ofd_namespace;
+       rc = lfsck_start(env, ofd->ofd_osd, &lsp);
+       if (rc != 0) {
+               CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
+                     obd->obd_name, rc);
+               rc = 0;
+       }
 
        target_recovery_init(&ofd->ofd_lut, ost_handle);
        LASSERT(obd->obd_no_conn);
@@ -756,6 +776,8 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m)
        struct obd_device *obd = ofd_obd(m);
        struct lu_device  *d = &m->ofd_dt_dev.dd_lu_dev;
 
+       lfsck_stop(env, m->ofd_osd, true);
+       lfsck_degister(env, m->ofd_osd);
        target_recovery_fini(obd);
        obd_exports_barrier(obd);
        obd_zombie_barrier();
index 6ff96ec..429023b 100644 (file)
@@ -46,6 +46,7 @@
 #include "ofd_internal.h"
 #include <obd_cksum.h>
 #include <lustre_quota.h>
+#include <lustre_lfsck.h>
 
 static int ofd_export_stats_init(struct ofd_device *ofd,
                                 struct obd_export *exp, void *client_nid)
@@ -1545,6 +1546,24 @@ int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                if (rc == 0)
                        rc = dt_ro(&env, ofd->ofd_osd);
                break;
+       case OBD_IOC_START_LFSCK: {
+               struct obd_ioctl_data *data = karg;
+               struct lfsck_start_param lsp;
+
+               if (unlikely(data == NULL)) {
+                       rc = -EINVAL;
+                       break;
+               }
+
+               lsp.lsp_start = (struct lfsck_start *)(data->ioc_inlbuf1);
+               lsp.lsp_namespace = ofd->ofd_namespace;
+               rc = lfsck_start(&env, ofd->ofd_osd, &lsp);
+               break;
+       }
+       case OBD_IOC_STOP_LFSCK: {
+               rc = lfsck_stop(&env, ofd->ofd_osd, false);
+               break;
+       }
        case OBD_IOC_GET_OBJ_VERSION:
                rc = ofd_ioc_get_obj_version(&env, ofd, karg);
                break;
index 2403aa1..f8d65bf 100644 (file)
@@ -659,28 +659,31 @@ int osd_obj_add_entry(struct osd_thread_info *info,
                      const struct osd_inode_id *id,
                      struct thandle *th)
 {
-        struct osd_thandle *oh;
-        struct dentry *child;
-        struct inode *inode;
-        int rc;
+       struct osd_thandle *oh;
+       struct dentry *child;
+       struct inode *inode;
+       int rc;
 
-        ENTRY;
+       ENTRY;
 
-        oh = container_of(th, struct osd_thandle, ot_super);
-        LASSERT(oh->ot_handle != NULL);
-        LASSERT(oh->ot_handle->h_transaction != NULL);
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY))
+               RETURN(0);
 
-        inode = &info->oti_inode;
-        inode->i_sb = osd_sb(osd);
+       oh = container_of(th, struct osd_thandle, ot_super);
+       LASSERT(oh->ot_handle != NULL);
+       LASSERT(oh->ot_handle->h_transaction != NULL);
+
+       inode = &info->oti_inode;
+       inode->i_sb = osd_sb(osd);
        osd_id_to_inode(inode, id);
        inode->i_mode = S_IFREG; /* for type in ldiskfs dir entry */
 
-        child = &info->oti_child_dentry;
-        child->d_name.hash = 0;
-        child->d_name.name = name;
-        child->d_name.len = strlen(name);
-        child->d_parent = dir;
-        child->d_inode = inode;
+       child = &info->oti_child_dentry;
+       child->d_name.hash = 0;
+       child->d_name.name = name;
+       child->d_name.len = strlen(name);
+       child->d_parent = dir;
+       child->d_inode = inode;
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
                inode->i_ino++;
index 4dbf1b1..f8bc0d6 100644 (file)
@@ -45,6 +45,7 @@ MDT_DEV="${FSNAME}-MDT0000"
 OST_DEV="${FSNAME}-OST0000"
 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}"
+START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV}"
 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
 SHOW_SCRUB="do_facet $SINGLEMDS \
                $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
@@ -838,6 +839,43 @@ test_12() {
 }
 run_test 12 "OI scrub can rebuild invalid /O entries"
 
+test_13() {
+       echo "stopall"
+       stopall > /dev/null
+       echo "formatall"
+       formatall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       mkdir -p $DIR/$tdir
+       $SETSTRIPE -c 1 -i 0 $DIR/$tdir
+
+       #define OBD_FAIL_OSD_COMPAT_NO_ENTRY            0x196
+       do_facet ost1 $LCTL set_param fail_loc=0x196
+       createmany -o $DIR/$tdir/f 1000
+       do_facet ost1 $LCTL set_param fail_loc=0
+
+       echo "stopall"
+       stopall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "init" ] ||
+               error "(1) Expect 'init', but got '$STATUS'"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+
+       $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!"
+       sleep 3
+       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(4) Expect 'completed', but got '$STATUS'"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed"
+}
+run_test 13 "OI scrub can rebuild missed /O entries"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}
index 00ca97b..74f9ce1 100644 (file)
@@ -351,13 +351,13 @@ command_t cmdlist[] = {
        /* LFSCK commands */
        {"==== LFSCK ====", jt_noop, 0, "LFSCK"},
        {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n"
-        "usage: lfsck_start <-M | --device MDT_device>\n"
+        "usage: lfsck_start <-M | --device [MDT,OST]_device>\n"
         "                   [-e | --error error_handle] [-h | --help]\n"
         "                   [-n | --dryrun switch] [-r | --reset]\n"
         "                   [-s | --speed speed_limit]\n"
         "                   [-t | --type lfsck_type[,lfsck_type...]]"},
        {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
-        "usage: lfsck_stop <-M | --device MDT_device> [-h | --help]"},
+        "usage: lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]"},
 
         {"==== obsolete (DANGEROUS) ====", jt_noop, 0, "obsolete (DANGEROUS)"},
         /* some test scripts still use these */
index 8e2572e..20ea2be 100644 (file)
@@ -70,7 +70,6 @@ struct lfsck_type_name {
 
 static struct lfsck_type_name lfsck_types_names[] = {
        { "layout",     6,      LT_LAYOUT },
-       { "DNE",        3,      LT_DNE },
        { "namespace",  9,      LT_NAMESPACE},
        { 0,            0,      0 }
 };
@@ -92,13 +91,13 @@ static void usage_start(void)
 {
        fprintf(stderr, "Start LFSCK.\n"
                "SYNOPSIS:\n"
-               "lfsck_start <-M | --device MDT_device>\n"
+               "lfsck_start <-M | --device [MDT,OST]_device>\n"
                "            [-e | --error error_handle] [-h | --help]\n"
                "            [-n | --dryrun switch] [-r | --reset]\n"
                "            [-s | --speed speed_limit]\n"
                "            [-t | --type lfsck_type[,lfsck_type...]]\n"
                "OPTIONS:\n"
-               "-M: The MDT device to start LFSCK on.\n"
+               "-M: The device to start LFSCK/scrub on.\n"
                "-e: Error handle, 'continue'(default) or 'abort'.\n"
                "-h: Help information.\n"
                "-n: Check without modification. 'off'(default) or 'on'.\n"
@@ -113,9 +112,9 @@ static void usage_stop(void)
 {
        fprintf(stderr, "Stop LFSCK.\n"
                "SYNOPSIS:\n"
-               "lfsck_stop <-M | --device MDT_device> [-h | --help]\n"
+               "lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]\n"
                "OPTIONS:\n"
-               "-M: The MDT device to stop LFSCK on.\n"
+               "-M: The device to stop LFSCK/scrub on.\n"
                "-h: Help information.\n");
 }
 
@@ -124,7 +123,7 @@ static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg)
        int len = strlen(arg) + 1;
 
        if (len > MAX_OBD_NAME) {
-               fprintf(stderr, "MDT device name is too long. "
+               fprintf(stderr, "device name is too long. "
                        "Valid length should be less than %d\n", MAX_OBD_NAME);
                return -EINVAL;
        }
@@ -216,8 +215,8 @@ int jt_lfsck_start(int argc, char **argv)
                                if (type == 0) {
                                        fprintf(stderr, "Invalid type (%s).\n"
                                                "The valid value should be "
-                                               "'layout', 'DNE' or "
-                                               "'namespace'.\n", str);
+                                               "'layout' or 'namespace'.\n",
+                                               str);
                                        *p = c;
                                        return -EINVAL;
                                }
@@ -230,7 +229,7 @@ int jt_lfsck_start(int argc, char **argv)
                        if (start.ls_active == 0) {
                                fprintf(stderr, "Miss LFSCK type(s).\n"
                                        "The valid value should be "
-                                       "'layout', 'DNE' or 'namespace'.\n");
+                                       "'layout' or 'namespace'.\n");
                                return -EINVAL;
                        }
                        break;
@@ -242,9 +241,15 @@ int jt_lfsck_start(int argc, char **argv)
        }
 
        if (data.ioc_inlbuf4 == NULL) {
-               fprintf(stderr,
-                       "Must sepcify MDT device to start LFSCK.\n");
-               return -EINVAL;
+               if (lcfg_get_devname() != NULL) {
+                       rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
+                       if (rc != 0)
+                               return rc;
+               } else {
+                       fprintf(stderr,
+                               "Must specify device to start LFSCK.\n");
+                       return -EINVAL;
+               }
        }
 
        data.ioc_inlbuf1 = (char *)&start;
@@ -264,9 +269,9 @@ int jt_lfsck_start(int argc, char **argv)
 
        obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
        if (start.ls_active == 0) {
-               printf("Started LFSCK on the MDT device %s", device);
+               printf("Started LFSCK on the device %s", device);
        } else {
-               printf("Started LFSCK on the MDT device %s:", device);
+               printf("Started LFSCK on the device %s:", device);
                i = 0;
                while (lfsck_types_names[i].name != NULL) {
                        if (start.ls_active & lfsck_types_names[i].type) {
@@ -314,9 +319,15 @@ int jt_lfsck_stop(int argc, char **argv)
        }
 
        if (data.ioc_inlbuf4 == NULL) {
-               fprintf(stderr,
-                       "Must sepcify MDT device to stop LFSCK.\n");
-               return -EINVAL;
+               if (lcfg_get_devname() != NULL) {
+                       rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
+                       if (rc != 0)
+                               return rc;
+               } else {
+                       fprintf(stderr,
+                               "Must sepcify device to stop LFSCK.\n");
+                       return -EINVAL;
+               }
        }
 
        memset(buf, 0, sizeof(rawbuf));
@@ -332,6 +343,6 @@ int jt_lfsck_stop(int argc, char **argv)
                return rc;
        }
 
-       printf("Stopped LFSCK on the MDT device %s.\n", device);
+       printf("Stopped LFSCK on the device %s.\n", device);
        return 0;
 }