Whamcloud - gitweb
LU-7349 lfsck: skip auto resume lfsck when mount 72/16972/6
authorFan Yong <fan.yong@intel.com>
Mon, 12 Oct 2015 18:00:19 +0000 (02:00 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 5 Jan 2016 00:49:07 +0000 (00:49 +0000)
Usually, if during the LFSCK running the server is umounted or
crashed, then when the server is mounted again, the LFSCK will
resume from the latest checkpoint automatically.

But if the server corruption is caused by the LFSCK, such auto
resuming LFSCK when mount may cause the server to crash again.
To avoid such trouble, introduce new server side mount option
"skip_lfsck" which will prevent unfinished LFSCK auto resuming
when mount.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I773dcde9dd239abe1e82127004ffef28ff1f1ae3
Reviewed-on: http://review.whamcloud.com/16972
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/doc/mount.lustre.8
lustre/include/lustre_disk.h
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/obdclass/obd_mount.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_obd.c
lustre/tests/sanity-lfsck.sh
lustre/utils/mount_lustre.c

index c68a45d..210c0c8 100644 (file)
@@ -114,6 +114,12 @@ Only start the MGC (and MGS, if co-located) for a target service, and not the ac
 .BI nomgs
 Start a MDT with a co-located MGS without starting the MGS.
 .TP
 .BI nomgs
 Start a MDT with a co-located MGS without starting the MGS.
 .TP
+.BI noscrub
+Not trigger OI scrub automatically when detect some inconsistency, unless it is started explicitly.
+.TP
+.BI skip_lfsck
+Not resume the former paused/crashed LFSCK automatically when mount.
+.TP
 .BI exclude= ostlist
 Start a client or MDT with a (colon-separated) list of known inactive OSTs.
 .TP
 .BI exclude= ostlist
 Start a client or MDT with a (colon-separated) list of known inactive OSTs.
 .TP
index 4054435..2130784 100644 (file)
@@ -237,6 +237,7 @@ struct lustre_mount_data {
 
 #define LMD_FLG_SERVER         0x0001  /* Mounting a server */
 #define LMD_FLG_CLIENT         0x0002  /* Mounting a client */
 
 #define LMD_FLG_SERVER         0x0001  /* Mounting a server */
 #define LMD_FLG_CLIENT         0x0002  /* Mounting a client */
+#define LMD_FLG_SKIP_LFSCK     0x0004  /* NOT auto resume LFSCK when mount */
 #define LMD_FLG_ABORT_RECOV    0x0008  /* Abort recovery */
 #define LMD_FLG_NOSVC          0x0010  /* Only start MGS/MGC for servers,
                                           no other services */
 #define LMD_FLG_ABORT_RECOV    0x0008  /* Abort recovery */
 #define LMD_FLG_NOSVC          0x0010  /* Only start MGS/MGC for servers,
                                           no other services */
index 216cb4d..d462496 100644 (file)
@@ -4358,6 +4358,9 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                 LASSERT(num);
                 node_id = simple_strtol(num, NULL, 10);
                obd->u.obt.obt_magic = OBT_MAGIC;
                 LASSERT(num);
                 node_id = simple_strtol(num, NULL, 10);
                obd->u.obt.obt_magic = OBT_MAGIC;
+               if (lsi->lsi_lmd != NULL &&
+                   lsi->lsi_lmd->lmd_flags & LMD_FLG_SKIP_LFSCK)
+                       m->mdt_skip_lfsck = 1;
        }
 
        m->mdt_squash.rsi_uid = 0;
        }
 
        m->mdt_squash.rsi_uid = 0;
@@ -5676,18 +5679,21 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
 {
        struct lu_device *ld = md2lu_dev(mdt->mdt_child);
 static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
 {
        struct lu_device *ld = md2lu_dev(mdt->mdt_child);
-       struct lfsck_start_param lsp;
        int rc;
        ENTRY;
 
        int rc;
        ENTRY;
 
-       lsp.lsp_start = NULL;
-       lsp.lsp_index_valid = 0;
-       rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
-                                                  OBD_IOC_START_LFSCK,
-                                                  0, &lsp);
-       if (rc != 0 && rc != -EALREADY)
-               CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
-                     mdt_obd_name(mdt), rc);
+       if (!mdt->mdt_skip_lfsck) {
+               struct lfsck_start_param lsp;
+
+               lsp.lsp_start = NULL;
+               lsp.lsp_index_valid = 0;
+               rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
+                                                          OBD_IOC_START_LFSCK,
+                                                          0, &lsp);
+               if (rc != 0 && rc != -EALREADY)
+                       CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
+                             mdt_obd_name(mdt), rc);
+       }
 
        rc = ld->ld_ops->ldo_recovery_complete(env, ld);
        RETURN(rc);
 
        rc = ld->ld_ops->ldo_recovery_complete(env, ld);
        RETURN(rc);
index 2e163b9..178868f 100644 (file)
@@ -195,7 +195,8 @@ struct mdt_device {
 
        unsigned int               mdt_capa_conf:1,
                                   /* Enable remote dir on non-MDT0 */
 
        unsigned int               mdt_capa_conf:1,
                                   /* Enable remote dir on non-MDT0 */
-                                  mdt_enable_remote_dir:1;
+                                  mdt_enable_remote_dir:1,
+                                  mdt_skip_lfsck:1;
 
        gid_t                      mdt_enable_remote_dir_gid;
        /* statfs optimization: we cache a bit  */
 
        gid_t                      mdt_enable_remote_dir_gid;
        /* statfs optimization: we cache a bit  */
index c8b0338..153986a 100644 (file)
@@ -1194,6 +1194,9 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                } else if (strncmp(s1, "noscrub", 7) == 0) {
                        lmd->lmd_flags |= LMD_FLG_NOSCRUB;
                        clear++;
                } else if (strncmp(s1, "noscrub", 7) == 0) {
                        lmd->lmd_flags |= LMD_FLG_NOSCRUB;
                        clear++;
+               } else if (strncmp(s1, "skip_lfsck", 10) == 0) {
+                       lmd->lmd_flags |= LMD_FLG_SKIP_LFSCK;
+                       clear++;
                } else if (strncmp(s1, PARAM_MGSNODE,
                                   sizeof(PARAM_MGSNODE) - 1) == 0) {
                        s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
                } else if (strncmp(s1, PARAM_MGSNODE,
                                   sizeof(PARAM_MGSNODE) - 1) == 0) {
                        s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
index 57b3bdd..bb7ec05 100644 (file)
@@ -167,6 +167,7 @@ static int ofd_stack_init(const struct lu_env *env,
        struct lu_device        *d;
        struct ofd_thread_info  *info = ofd_info(env);
        struct lustre_mount_info *lmi;
        struct lu_device        *d;
        struct ofd_thread_info  *info = ofd_info(env);
        struct lustre_mount_info *lmi;
+       struct lustre_mount_data *lmd;
        int                      rc;
        char                    *osdname;
 
        int                      rc;
        char                    *osdname;
 
@@ -178,6 +179,10 @@ static int ofd_stack_init(const struct lu_env *env,
                RETURN(-ENODEV);
        }
 
                RETURN(-ENODEV);
        }
 
+       lmd = s2lsi(lmi->lmi_sb)->lsi_lmd;
+       if (lmd != NULL && lmd->lmd_flags & LMD_FLG_SKIP_LFSCK)
+               m->ofd_skip_lfsck = 1;
+
        /* find bottom osd */
        OBD_ALLOC(osdname, MTI_NAME_MAXLEN);
        if (osdname == NULL)
        /* find bottom osd */
        OBD_ALLOC(osdname, MTI_NAME_MAXLEN);
        if (osdname == NULL)
index 95ffe17..ad868be 100644 (file)
@@ -183,7 +183,8 @@ struct ofd_device {
                                 /* Protected by ofd_lastid_rwsem. */
                                 ofd_lastid_rebuilding:1,
                                 ofd_record_fid_accessed:1,
                                 /* Protected by ofd_lastid_rwsem. */
                                 ofd_lastid_rebuilding:1,
                                 ofd_record_fid_accessed:1,
-                                ofd_lfsck_verify_pfid:1;
+                                ofd_lfsck_verify_pfid:1,
+                                ofd_skip_lfsck:1;
        struct seq_server_site   ofd_seq_site;
        /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */
        unsigned int             ofd_soft_sync_limit;
        struct seq_server_site   ofd_seq_site;
        /* the limit of SOFT_SYNC RPCs that will trigger a soft sync */
        unsigned int             ofd_soft_sync_limit;
index 2d73ae5..a7277ec 100644 (file)
@@ -535,17 +535,20 @@ static int ofd_destroy_export(struct obd_export *exp)
 int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd)
 {
        struct lu_device *ldev = &ofd->ofd_dt_dev.dd_lu_dev;
 int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd)
 {
        struct lu_device *ldev = &ofd->ofd_dt_dev.dd_lu_dev;
-       struct lfsck_start_param lsp;
        int rc;
 
        CDEBUG(D_HA, "%s: recovery is over\n", ofd_name(ofd));
 
        int rc;
 
        CDEBUG(D_HA, "%s: recovery is over\n", ofd_name(ofd));
 
-       lsp.lsp_start = NULL;
-       lsp.lsp_index_valid = 0;
-       rc = lfsck_start(env, ofd->ofd_osd, &lsp);
-       if (rc != 0 && rc != -EALREADY)
-               CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
-                     ofd_name(ofd), rc);
+       if (!ofd->ofd_skip_lfsck) {
+               struct lfsck_start_param lsp;
+
+               lsp.lsp_start = NULL;
+               lsp.lsp_index_valid = 0;
+               rc = lfsck_start(env, ofd->ofd_osd, &lsp);
+               if (rc != 0 && rc != -EALREADY)
+                       CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n",
+                             ofd_name(ofd), rc);
+       }
 
        return ldev->ld_ops->ldo_recovery_complete(env, ldev);
 }
 
        return ldev->ld_ops->ldo_recovery_complete(env, ldev);
 }
index e3f0c74..10a892e 100644 (file)
@@ -80,6 +80,7 @@ SHOW_LAYOUT_ON_OST="do_facet ost1 \
                $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
 MOUNT_OPTS_SCRUB="-o user_xattr"
 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
                $LCTL get_param -n obdfilter.${OST_DEV}.lfsck_layout"
 MOUNT_OPTS_SCRUB="-o user_xattr"
 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
+MOUNT_OPTS_SKIP_LFSCK="-o user_xattr,skip_lfsck"
 
 lfsck_prep() {
        local ndirs=$1
 
 lfsck_prep() {
        local ndirs=$1
@@ -940,6 +941,30 @@ test_8()
        [ "$STATUS" == "paused" ] ||
                error "(20) Expect 'paused', but got '$STATUS'"
 
        [ "$STATUS" == "paused" ] ||
                error "(20) Expect 'paused', but got '$STATUS'"
 
+       echo "stop $SINGLEMDS"
+       stop $SINGLEMDS > /dev/null || error "(20.1) Fail to stop MDS!"
+
+       echo "start $SINGLEMDS without resume LFSCK"
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SKIP_LFSCK > /dev/null ||
+               error "(20.2) Fail to start MDS!"
+
+       timer=0
+       while [ $timer -lt $timeout ]; do
+               STATUS=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+                       mdt.${MDT_DEV}.recovery_status |
+                       awk '/^status/ { print \\\$2 }'")
+               [ "$STATUS" != "RECOVERING" ] && break;
+               sleep 1
+               timer=$((timer + 1))
+       done
+
+       [ $timer != $timeout ] ||
+               error "(20.3) recovery timeout"
+
+       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "paused" ] ||
+               error "(20.4) Expect 'paused', but got '$STATUS'"
+
        #define OBD_FAIL_LFSCK_DELAY3           0x1602
        do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
 
        #define OBD_FAIL_LFSCK_DELAY3           0x1602
        do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
 
index 88fcaa7..167aa61 100644 (file)
@@ -101,6 +101,8 @@ void usage(FILE *out)
                "\t\tabort_recov: abort server recovery handling\n"
                "\t\tnosvc: only start MGC/MGS obds\n"
                "\t\tnomgs: only start target obds, using existing MGS\n"
                "\t\tabort_recov: abort server recovery handling\n"
                "\t\tnosvc: only start MGC/MGS obds\n"
                "\t\tnomgs: only start target obds, using existing MGS\n"
+               "\t\tnoscrub: NOT auto start OI scrub unless start explicitly\n"
+               "\t\tskip_lfsck: NOT auto resume the paused/crashed LFSCK\n"
                "\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
                "inactive OSTs (e.g. lustre-OST0001)\n"
                "\t\tretry=<num>: number of times mount is retried by client\n"
                "\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
                "inactive OSTs (e.g. lustre-OST0001)\n"
                "\t\tretry=<num>: number of times mount is retried by client\n"