Whamcloud - gitweb
LU-3935 scrub: support dryrun mode OI scrub 67/7867/5
authorFan Yong <fan.yong@intel.com>
Tue, 22 Oct 2013 22:52:40 +0000 (06:52 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 26 Nov 2013 13:28:49 +0000 (13:28 +0000)
The administrator can detect whether there are any inconsistent
OI mappings but without repairing them via the command:

lctl lfsck_start --dryrun on -M <$device>

Under default mode, dryrun is "off". When the device is mounted,
the initial OI scrub will detect whether there are upgrade case,
or file-level backup/resotre case, or OI file(s) recreated case.
If found some inconsistency, it will trigger non-dryrun mode OI
scrub automatically. If the administrator does NOT want to such
auto non-dryrun OI scrub to avoid some unexpected repairing, he
(or she) can specify "-o noscrub" when mount the device.

Test-Parameters: testlist=sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ibab79c543dbb5b69789b688cacca10737a710f83
Reviewed-on: http://review.whamcloud.com/7867
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/dt_object.h
lustre/mdd/mdd_lfsck.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-ldiskfs/osd_scrub.h
lustre/tests/sanity-scrub.sh

index 284184d..0db86c5 100644 (file)
@@ -622,6 +622,7 @@ struct dt_index_operations {
 
 enum dt_otable_it_valid {
        DOIV_ERROR_HANDLE       = 0x0001,
+       DOIV_DRYRUN             = 0x0002,
 };
 
 enum dt_otable_it_flags {
@@ -633,6 +634,9 @@ enum dt_otable_it_flags {
 
        /* There is up layer component uses the iteration. */
        DOIF_OUTUSED    = 0x0004,
+
+       /* Check only without repairing. */
+       DOIF_DRYRUN     = 0x0008,
 };
 
 /* otable based iteration needs to use the common DT interation APIs.
index b65561c..0db8364 100644 (file)
@@ -2718,6 +2718,10 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
        }
 
        if (start->ls_valid & LSV_DRYRUN) {
+               valid |= DOIV_DRYRUN;
+               if (start->ls_flags & LPF_DRYRUN)
+                       flags |= DOIF_DRYRUN;
+
                if ((start->ls_flags & LPF_DRYRUN) &&
                    !(bk->lb_param & LPF_DRYRUN)) {
                        bk->lb_param |= LPF_DRYRUN;
@@ -2790,8 +2794,11 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
 
 trigger:
        lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
-       if (bk->lb_param & LPF_DRYRUN)
+       if (bk->lb_param & LPF_DRYRUN) {
                lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
+               valid |= DOIV_DRYRUN;
+               flags |= DOIF_DRYRUN;
+       }
 
        if (bk->lb_param & LPF_FAILOUT) {
                valid |= DOIV_ERROR_HANDLE;
index 34a2db1..0dadb8d 100644 (file)
@@ -89,7 +89,8 @@ static inline int osd_scrub_has_window(struct osd_scrub *scrub,
 static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
                                     struct osd_device *dev,
                                     const struct lu_fid *fid,
-                                    const struct osd_inode_id *id, int ops)
+                                    const struct osd_inode_id *id,
+                                    int ops, bool force)
 {
        struct lu_fid         *oi_fid = &info->oti_fid2;
        struct osd_inode_id   *oi_id  = &info->oti_id2;
@@ -99,6 +100,9 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
        int                    rc;
        ENTRY;
 
+       if (dev->od_scrub.os_file.sf_param & SP_DRYRUN && !force)
+               RETURN(0);
+
        fid_cpu_to_be(oi_fid, fid);
        if (id != NULL)
                osd_id_pack(oi_id, id);
@@ -272,7 +276,6 @@ void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags)
        memcpy(sf->sf_uuid, uuid, 16);
        sf->sf_status = SS_INIT;
        sf->sf_flags |= flags;
-       sf->sf_param = 0;
        sf->sf_run_time = 0;
        sf->sf_time_latest_start = 0;
        sf->sf_time_last_checkpoint = 0;
@@ -355,63 +358,6 @@ int osd_scrub_file_store(struct osd_scrub *scrub)
        return rc;
 }
 
-/* OI scrub APIs */
-
-static int osd_scrub_prep(struct osd_device *dev)
-{
-       struct osd_scrub     *scrub  = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
-       struct scrub_file    *sf     = &scrub->os_file;
-       __u32                 flags  = scrub->os_start_flags;
-       int                   rc;
-       ENTRY;
-
-       down_write(&scrub->os_rwsem);
-       if (flags & SS_SET_FAILOUT)
-               sf->sf_param |= SP_FAILOUT;
-
-       if (flags & SS_CLEAR_FAILOUT)
-               sf->sf_param &= ~SP_FAILOUT;
-
-       if (flags & SS_RESET)
-               osd_scrub_file_reset(scrub,
-                       LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0);
-
-       if (flags & SS_AUTO) {
-               scrub->os_full_speed = 1;
-               sf->sf_flags |= SF_AUTO;
-       } else {
-               scrub->os_full_speed = 0;
-       }
-
-       if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE))
-               scrub->os_full_speed = 1;
-
-       scrub->os_in_prior = 0;
-       scrub->os_waiting = 0;
-       scrub->os_paused = 0;
-       scrub->os_new_checked = 0;
-       if (sf->sf_pos_last_checkpoint != 0)
-               sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
-       else
-               sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1;
-
-       scrub->os_pos_current = sf->sf_pos_latest_start;
-       sf->sf_status = SS_SCANNING;
-       sf->sf_time_latest_start = cfs_time_current_sec();
-       sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
-       rc = osd_scrub_file_store(scrub);
-       if (rc == 0) {
-               spin_lock(&scrub->os_lock);
-               thread_set_flags(thread, SVC_RUNNING);
-               spin_unlock(&scrub->os_lock);
-               cfs_waitq_broadcast(&thread->t_ctl_waitq);
-       }
-       up_write(&scrub->os_rwsem);
-
-       RETURN(rc);
-}
-
 static int
 osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_idmap_cache *oic, int val)
@@ -481,9 +427,11 @@ iget:
                if (val == SCRUB_NEXT_NOLMA) {
                        sf->sf_flags |= SF_UPGRADE;
                        scrub->os_full_speed = 1;
-                       rc = osd_ea_fid_set(info, inode, fid, 0);
-                       if (rc != 0)
-                               GOTO(out, rc);
+                       if (!(sf->sf_param & SP_DRYRUN)) {
+                               rc = osd_ea_fid_set(info, inode, fid, 0);
+                               if (rc != 0)
+                                       GOTO(out, rc);
+                       }
 
                        if (!(sf->sf_flags & SF_INCONSISTENT))
                                dev->od_igif_inoi = 0;
@@ -514,7 +462,7 @@ iget:
                dev->od_igif_inoi = 1;
        }
 
-       rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops);
+       rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops, false);
        if (rc == 0) {
                if (scrub->os_in_prior)
                        sf->sf_items_updated_prior++;
@@ -539,7 +487,7 @@ out:
                 * if happend, then remove the new added OI mapping. */
                if (unlikely(inode->i_nlink == 0))
                        osd_scrub_refresh_mapping(info, dev, fid, lid,
-                                                 DTO_INDEX_DELETE);
+                                                 DTO_INDEX_DELETE, false);
                iput(inode);
        }
        up_write(&scrub->os_rwsem);
@@ -555,6 +503,78 @@ out:
        RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
 }
 
+/* OI scrub APIs */
+
+static int osd_scrub_prep(struct osd_device *dev)
+{
+       struct osd_scrub     *scrub  = &dev->od_scrub;
+       struct ptlrpc_thread *thread = &scrub->os_thread;
+       struct scrub_file    *sf     = &scrub->os_file;
+       __u32                 flags  = scrub->os_start_flags;
+       int                   rc;
+       bool                  drop_dryrun = false;
+       ENTRY;
+
+       down_write(&scrub->os_rwsem);
+       if (flags & SS_SET_FAILOUT)
+               sf->sf_param |= SP_FAILOUT;
+
+       if (flags & SS_CLEAR_FAILOUT)
+               sf->sf_param &= ~SP_FAILOUT;
+
+       if (flags & SS_SET_DRYRUN)
+               sf->sf_param |= SP_DRYRUN;
+
+       if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) {
+               sf->sf_param &= ~SP_DRYRUN;
+               drop_dryrun = true;
+       }
+
+       if (flags & SS_RESET)
+               osd_scrub_file_reset(scrub,
+                       LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0);
+
+       if (flags & SS_AUTO) {
+               scrub->os_full_speed = 1;
+               sf->sf_flags |= SF_AUTO;
+               /* For the case of OI scrub auto triggered, NOT dryrun. */
+               sf->sf_param &= ~SP_FAILOUT;
+       } else {
+               scrub->os_full_speed = 0;
+       }
+
+       if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE))
+               scrub->os_full_speed = 1;
+
+       scrub->os_in_prior = 0;
+       spin_lock(&scrub->os_lock);
+       scrub->os_waiting = 0;
+       scrub->os_paused = 0;
+       spin_unlock(&scrub->os_lock);
+       scrub->os_new_checked = 0;
+       if (drop_dryrun && sf->sf_pos_first_inconsistent != 0)
+               sf->sf_pos_latest_start = sf->sf_pos_first_inconsistent;
+       else if (sf->sf_pos_last_checkpoint != 0)
+               sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
+       else
+               sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1;
+
+       scrub->os_pos_current = sf->sf_pos_latest_start;
+       sf->sf_status = SS_SCANNING;
+       sf->sf_time_latest_start = cfs_time_current_sec();
+       sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
+       rc = osd_scrub_file_store(scrub);
+       if (rc == 0) {
+               spin_lock(&scrub->os_lock);
+               thread_set_flags(thread, SVC_RUNNING);
+               spin_unlock(&scrub->os_lock);
+               wake_up_all(&thread->t_ctl_waitq);
+       }
+       up_write(&scrub->os_rwsem);
+
+       RETURN(rc);
+}
+
 static int osd_scrub_checkpoint(struct osd_scrub *scrub)
 {
        struct scrub_file *sf = &scrub->os_file;
@@ -599,9 +619,11 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result)
 
                dev->od_igif_inoi = 1;
                sf->sf_status = SS_COMPLETED;
-               memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
-               sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
-                                 SF_UPGRADE | SF_AUTO);
+               if (!(sf->sf_param & SP_DRYRUN)) {
+                       memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
+                       sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
+                                         SF_UPGRADE | SF_AUTO);
+               }
                sf->sf_time_last_complete = sf->sf_time_last_checkpoint;
                sf->sf_success_count++;
        } else if (result == 0) {
@@ -1227,6 +1249,9 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen)
 {
        struct dentry *dentry;
 
+       CDEBUG(D_LFSCK, "init lookup one: parent = %.*s, name = %.*s\n",
+              parent->d_name.len, parent->d_name.name, namelen, name);
+
        dentry = ll_lookup_one_len(name, parent, namelen);
        if (!IS_ERR(dentry) && dentry->d_inode == NULL) {
                dput(dentry);
@@ -1267,17 +1292,19 @@ osd_ios_new_item(struct osd_device *dev, struct dentry *dentry,
                 scandir_t scandir, filldir_t filldir)
 {
        struct osd_ios_item *item;
+       ENTRY;
 
        OBD_ALLOC_PTR(item);
        if (item == NULL)
-               return -ENOMEM;
+               RETURN(-ENOMEM);
 
        CFS_INIT_LIST_HEAD(&item->oii_list);
        item->oii_dentry = dget(dentry);
        item->oii_scandir = scandir;
        item->oii_filldir = filldir;
        cfs_list_add_tail(&item->oii_list, &dev->od_ios_list);
-       return 0;
+
+       RETURN(0);
 }
 
 /**
@@ -1300,6 +1327,8 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
        int                      rc;
        ENTRY;
 
+       CDEBUG(D_LFSCK, "init scan one: ino = %ld\n", inode->i_ino);
+
        rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
        if (rc != 0 && rc != -ENODATA)
                RETURN(rc);
@@ -1323,7 +1352,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
                        RETURN(rc);
 
                rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
-                                              DTO_INDEX_INSERT);
+                                              DTO_INDEX_INSERT, true);
                RETURN(rc);
        }
 
@@ -1339,7 +1368,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
                        RETURN(rc);
        }
 
-       rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE);
+       rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE, true);
 
        RETURN(rc);
 }
@@ -1539,6 +1568,9 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info,
        int                      rc;
        ENTRY;
 
+       /* Lookup IGIF in OI by force for initial OI scrub. */
+       dev->od_igif_inoi = 1;
+
        while (1) {
                rc = scandir(info, dev, dentry, filldir);
                if (item != NULL) {
@@ -1591,7 +1623,7 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info,
                        dput(child);
                else if (PTR_ERR(child) == -ENOENT)
                        osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
-                                                 NULL, DTO_INDEX_DELETE);
+                                                 NULL, DTO_INDEX_DELETE, true);
                map++;
        }
 
@@ -1864,6 +1896,13 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env,
                        start |= SS_CLEAR_FAILOUT;
        }
 
+       if (valid & DOIV_DRYRUN) {
+               if (flags & DOIF_DRYRUN)
+                       start |= SS_SET_DRYRUN;
+               else
+                       start |= SS_CLEAR_DRYRUN;
+       }
+
        rc = do_osd_scrub_start(dev, start);
        if (rc < 0 && rc != -EALREADY) {
                dev->od_otable_it = NULL;
@@ -2177,6 +2216,7 @@ static const char *scrub_flags_names[] = {
 
 static const char *scrub_param_names[] = {
        "failout",
+       "dryrun",
        NULL
 };
 
index 60a21a0..a192fef 100644 (file)
@@ -81,6 +81,9 @@ enum scrub_flags {
 enum scrub_param {
        /* Exit when fail. */
        SP_FAILOUT      = 0x0001,
+
+       /* Check only without repairing. */
+       SP_DRYRUN       = 0x0002,
 };
 
 enum scrub_start {
@@ -95,6 +98,12 @@ enum scrub_start {
 
        /* Trigger scrub automatically. */
        SS_AUTO                 = 0x00000008,
+
+       /* Set dryrun flag. */
+       SS_SET_DRYRUN           = 0x00000010,
+
+       /* Clear dryrun flag. */
+       SS_CLEAR_DRYRUN         = 0x00000020,
 };
 
 struct scrub_file {
index 5f49006..c5dc713 100644 (file)
@@ -39,6 +39,9 @@ check_and_setup_lustre
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
 
+[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
+
 build_test_filter
 
 MDT_DEV="${FSNAME}-MDT0000"
@@ -801,6 +804,100 @@ test_11() {
 }
 run_test 11 "OI scrub skips the new created objects only once"
 
+test_15() {
+       scrub_prep 20
+       mds_backup_restore || error "(1) Fail to backup/restore!"
+
+       echo "starting MDTs with OI scrub disabled"
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
+               error "(2) Fail to start MDS!"
+
+       local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "init" ] ||
+               error "(3) Expect 'init', but got '$STATUS'"
+
+       local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
+       [ "$FLAGS" == "inconsistent" ] ||
+               error "(4) Expect 'inconsistent', but got '$FLAGS'"
+
+       # run under dryrun mode
+       $START_SCRUB -n on || error "(5) Fail to start OI scrub!"
+       sleep 3
+
+       STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(6) Expect 'completed', but got '$STATUS'"
+
+       FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
+       [ "$FLAGS" == "inconsistent" ] ||
+               error "(7) Expect 'inconsistent', but got '$FLAGS'"
+
+       local PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }')
+       [ "$PARAMS" == "dryrun" ] ||
+               error "(8) Expect 'dryrun', but got '$PARAMS'"
+
+       local REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }')
+       [ $REPAIRED -lt 20 ] &&
+               error "(9) Expect at least 20 updated, but got '$REPAIRED'"
+
+       # run under dryrun mode again
+       $START_SCRUB -n on || error "(10) Fail to start OI scrub!"
+       sleep 3
+
+       STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(11) Expect 'completed', but got '$STATUS'"
+
+       FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
+       [ "$FLAGS" == "inconsistent" ] ||
+               error "(12) Expect 'inconsistent', but got '$FLAGS'"
+
+       PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }')
+       [ "$PARAMS" == "dryrun" ] ||
+               error "(13) Expect 'dryrun', but got '$PARAMS'"
+
+       REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }')
+       [ $REPAIRED -lt 20 ] &&
+               error "(14) Expect at least 20 updated, but got '$REPAIRED'"
+
+       # run under normal mode
+       $START_SCRUB -n off || error "(15) Fail to start OI scrub!"
+       sleep 3
+
+       STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(16) Expect 'completed', but got '$STATUS'"
+
+       FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
+       [ -z "$FLAGS" ] || error "(17) Expect empty flags, but got '$FLAGS'"
+
+       PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }')
+       [ -z "$PARAMS" ] || error "(18) Expect empty param, but got '$PARAMS'"
+
+       REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }')
+       [ $REPAIRED -lt 20 ] &&
+               error "(19) Expect at least 20 updated, but got '$REPAIRED'"
+
+       # run under normal mode again
+       $START_SCRUB -n off || error "(20) Fail to start OI scrub!"
+       sleep 3
+
+       STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(21) Expect 'completed', but got '$STATUS'"
+
+       FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
+       [ -z "$FLAGS" ] || error "(22) Expect empty flags, but got '$FLAGS'"
+
+       PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }')
+       [ -z "$PARAMS" ] || error "(23) Expect empty param, but got '$PARAMS'"
+
+       REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }')
+       [ $REPAIRED -eq 0 ] ||
+               error "(24) Expect 0 updated, but got '$REPAIRED'"
+}
+run_test 15 "Dryrun mode OI scrub"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}