From e0061edae5f15f2382c05cdd645c787f1dc45220 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 23 Oct 2013 06:52:40 +0800 Subject: [PATCH] LU-3935 scrub: support dryrun mode OI scrub The administrator can detect whether there are any inconsistent OI mappings but without repairing them via the command: lctl lfsck_start --dryrun on -M <$device> Under default mode, dryrun is "off". When the device is mounted, the initial OI scrub will detect whether there are upgrade case, or file-level backup/resotre case, or OI file(s) recreated case. If found some inconsistency, it will trigger non-dryrun mode OI scrub automatically. If the administrator does NOT want to such auto non-dryrun OI scrub to avoid some unexpected repairing, he (or she) can specify "-o noscrub" when mount the device. Test-Parameters: testlist=sanity-scrub Signed-off-by: Fan Yong Change-Id: Ibab79c543dbb5b69789b688cacca10737a710f83 Reviewed-on: http://review.whamcloud.com/7867 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/dt_object.h | 4 + lustre/mdd/mdd_lfsck.c | 9 +- lustre/osd-ldiskfs/osd_scrub.c | 184 +++++++++++++++++++++++++---------------- lustre/osd-ldiskfs/osd_scrub.h | 9 ++ lustre/tests/sanity-scrub.sh | 97 ++++++++++++++++++++++ 5 files changed, 230 insertions(+), 73 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 284184d..0db86c5 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -622,6 +622,7 @@ struct dt_index_operations { enum dt_otable_it_valid { DOIV_ERROR_HANDLE = 0x0001, + DOIV_DRYRUN = 0x0002, }; enum dt_otable_it_flags { @@ -633,6 +634,9 @@ enum dt_otable_it_flags { /* There is up layer component uses the iteration. */ DOIF_OUTUSED = 0x0004, + + /* Check only without repairing. */ + DOIF_DRYRUN = 0x0008, }; /* otable based iteration needs to use the common DT interation APIs. diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index b65561c..0db8364 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -2718,6 +2718,10 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, } if (start->ls_valid & LSV_DRYRUN) { + valid |= DOIV_DRYRUN; + if (start->ls_flags & LPF_DRYRUN) + flags |= DOIF_DRYRUN; + if ((start->ls_flags & LPF_DRYRUN) && !(bk->lb_param & LPF_DRYRUN)) { bk->lb_param |= LPF_DRYRUN; @@ -2790,8 +2794,11 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck, trigger: lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY; - if (bk->lb_param & LPF_DRYRUN) + if (bk->lb_param & LPF_DRYRUN) { lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN; + valid |= DOIV_DRYRUN; + flags |= DOIF_DRYRUN; + } if (bk->lb_param & LPF_FAILOUT) { valid |= DOIV_ERROR_HANDLE; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 34a2db1..0dadb8d 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -89,7 +89,8 @@ static inline int osd_scrub_has_window(struct osd_scrub *scrub, static int osd_scrub_refresh_mapping(struct osd_thread_info *info, struct osd_device *dev, const struct lu_fid *fid, - const struct osd_inode_id *id, int ops) + const struct osd_inode_id *id, + int ops, bool force) { struct lu_fid *oi_fid = &info->oti_fid2; struct osd_inode_id *oi_id = &info->oti_id2; @@ -99,6 +100,9 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info, int rc; ENTRY; + if (dev->od_scrub.os_file.sf_param & SP_DRYRUN && !force) + RETURN(0); + fid_cpu_to_be(oi_fid, fid); if (id != NULL) osd_id_pack(oi_id, id); @@ -272,7 +276,6 @@ void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags) memcpy(sf->sf_uuid, uuid, 16); sf->sf_status = SS_INIT; sf->sf_flags |= flags; - sf->sf_param = 0; sf->sf_run_time = 0; sf->sf_time_latest_start = 0; sf->sf_time_last_checkpoint = 0; @@ -355,63 +358,6 @@ int osd_scrub_file_store(struct osd_scrub *scrub) return rc; } -/* OI scrub APIs */ - -static int osd_scrub_prep(struct osd_device *dev) -{ - struct osd_scrub *scrub = &dev->od_scrub; - struct ptlrpc_thread *thread = &scrub->os_thread; - struct scrub_file *sf = &scrub->os_file; - __u32 flags = scrub->os_start_flags; - int rc; - ENTRY; - - down_write(&scrub->os_rwsem); - if (flags & SS_SET_FAILOUT) - sf->sf_param |= SP_FAILOUT; - - if (flags & SS_CLEAR_FAILOUT) - sf->sf_param &= ~SP_FAILOUT; - - if (flags & SS_RESET) - osd_scrub_file_reset(scrub, - LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); - - if (flags & SS_AUTO) { - scrub->os_full_speed = 1; - sf->sf_flags |= SF_AUTO; - } else { - scrub->os_full_speed = 0; - } - - if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) - scrub->os_full_speed = 1; - - scrub->os_in_prior = 0; - scrub->os_waiting = 0; - scrub->os_paused = 0; - scrub->os_new_checked = 0; - if (sf->sf_pos_last_checkpoint != 0) - sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; - else - sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; - - scrub->os_pos_current = sf->sf_pos_latest_start; - sf->sf_status = SS_SCANNING; - sf->sf_time_latest_start = cfs_time_current_sec(); - sf->sf_time_last_checkpoint = sf->sf_time_latest_start; - rc = osd_scrub_file_store(scrub); - if (rc == 0) { - spin_lock(&scrub->os_lock); - thread_set_flags(thread, SVC_RUNNING); - spin_unlock(&scrub->os_lock); - cfs_waitq_broadcast(&thread->t_ctl_waitq); - } - up_write(&scrub->os_rwsem); - - RETURN(rc); -} - static int osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, struct osd_idmap_cache *oic, int val) @@ -481,9 +427,11 @@ iget: if (val == SCRUB_NEXT_NOLMA) { sf->sf_flags |= SF_UPGRADE; scrub->os_full_speed = 1; - rc = osd_ea_fid_set(info, inode, fid, 0); - if (rc != 0) - GOTO(out, rc); + if (!(sf->sf_param & SP_DRYRUN)) { + rc = osd_ea_fid_set(info, inode, fid, 0); + if (rc != 0) + GOTO(out, rc); + } if (!(sf->sf_flags & SF_INCONSISTENT)) dev->od_igif_inoi = 0; @@ -514,7 +462,7 @@ iget: dev->od_igif_inoi = 1; } - rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops); + rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops, false); if (rc == 0) { if (scrub->os_in_prior) sf->sf_items_updated_prior++; @@ -539,7 +487,7 @@ out: * if happend, then remove the new added OI mapping. */ if (unlikely(inode->i_nlink == 0)) osd_scrub_refresh_mapping(info, dev, fid, lid, - DTO_INDEX_DELETE); + DTO_INDEX_DELETE, false); iput(inode); } up_write(&scrub->os_rwsem); @@ -555,6 +503,78 @@ out: RETURN(sf->sf_param & SP_FAILOUT ? rc : 0); } +/* OI scrub APIs */ + +static int osd_scrub_prep(struct osd_device *dev) +{ + struct osd_scrub *scrub = &dev->od_scrub; + struct ptlrpc_thread *thread = &scrub->os_thread; + struct scrub_file *sf = &scrub->os_file; + __u32 flags = scrub->os_start_flags; + int rc; + bool drop_dryrun = false; + ENTRY; + + down_write(&scrub->os_rwsem); + if (flags & SS_SET_FAILOUT) + sf->sf_param |= SP_FAILOUT; + + if (flags & SS_CLEAR_FAILOUT) + sf->sf_param &= ~SP_FAILOUT; + + if (flags & SS_SET_DRYRUN) + sf->sf_param |= SP_DRYRUN; + + if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) { + sf->sf_param &= ~SP_DRYRUN; + drop_dryrun = true; + } + + if (flags & SS_RESET) + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); + + if (flags & SS_AUTO) { + scrub->os_full_speed = 1; + sf->sf_flags |= SF_AUTO; + /* For the case of OI scrub auto triggered, NOT dryrun. */ + sf->sf_param &= ~SP_FAILOUT; + } else { + scrub->os_full_speed = 0; + } + + if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) + scrub->os_full_speed = 1; + + scrub->os_in_prior = 0; + spin_lock(&scrub->os_lock); + scrub->os_waiting = 0; + scrub->os_paused = 0; + spin_unlock(&scrub->os_lock); + scrub->os_new_checked = 0; + if (drop_dryrun && sf->sf_pos_first_inconsistent != 0) + sf->sf_pos_latest_start = sf->sf_pos_first_inconsistent; + else if (sf->sf_pos_last_checkpoint != 0) + sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; + else + sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; + + scrub->os_pos_current = sf->sf_pos_latest_start; + sf->sf_status = SS_SCANNING; + sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_last_checkpoint = sf->sf_time_latest_start; + rc = osd_scrub_file_store(scrub); + if (rc == 0) { + spin_lock(&scrub->os_lock); + thread_set_flags(thread, SVC_RUNNING); + spin_unlock(&scrub->os_lock); + wake_up_all(&thread->t_ctl_waitq); + } + up_write(&scrub->os_rwsem); + + RETURN(rc); +} + static int osd_scrub_checkpoint(struct osd_scrub *scrub) { struct scrub_file *sf = &scrub->os_file; @@ -599,9 +619,11 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) dev->od_igif_inoi = 1; sf->sf_status = SS_COMPLETED; - memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); - sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | - SF_UPGRADE | SF_AUTO); + if (!(sf->sf_param & SP_DRYRUN)) { + memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); + sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | + SF_UPGRADE | SF_AUTO); + } sf->sf_time_last_complete = sf->sf_time_last_checkpoint; sf->sf_success_count++; } else if (result == 0) { @@ -1227,6 +1249,9 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen) { struct dentry *dentry; + CDEBUG(D_LFSCK, "init lookup one: parent = %.*s, name = %.*s\n", + parent->d_name.len, parent->d_name.name, namelen, name); + dentry = ll_lookup_one_len(name, parent, namelen); if (!IS_ERR(dentry) && dentry->d_inode == NULL) { dput(dentry); @@ -1267,17 +1292,19 @@ osd_ios_new_item(struct osd_device *dev, struct dentry *dentry, scandir_t scandir, filldir_t filldir) { struct osd_ios_item *item; + ENTRY; OBD_ALLOC_PTR(item); if (item == NULL) - return -ENOMEM; + RETURN(-ENOMEM); CFS_INIT_LIST_HEAD(&item->oii_list); item->oii_dentry = dget(dentry); item->oii_scandir = scandir; item->oii_filldir = filldir; cfs_list_add_tail(&item->oii_list, &dev->od_ios_list); - return 0; + + RETURN(0); } /** @@ -1300,6 +1327,8 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, int rc; ENTRY; + CDEBUG(D_LFSCK, "init scan one: ino = %ld\n", inode->i_ino); + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc != 0 && rc != -ENODATA) RETURN(rc); @@ -1323,7 +1352,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, RETURN(rc); rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, - DTO_INDEX_INSERT); + DTO_INDEX_INSERT, true); RETURN(rc); } @@ -1339,7 +1368,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, RETURN(rc); } - rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE); + rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE, true); RETURN(rc); } @@ -1539,6 +1568,9 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info, int rc; ENTRY; + /* Lookup IGIF in OI by force for initial OI scrub. */ + dev->od_igif_inoi = 1; + while (1) { rc = scandir(info, dev, dentry, filldir); if (item != NULL) { @@ -1591,7 +1623,7 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info, dput(child); else if (PTR_ERR(child) == -ENOENT) osd_scrub_refresh_mapping(info, dev, &map->olm_fid, - NULL, DTO_INDEX_DELETE); + NULL, DTO_INDEX_DELETE, true); map++; } @@ -1864,6 +1896,13 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env, start |= SS_CLEAR_FAILOUT; } + if (valid & DOIV_DRYRUN) { + if (flags & DOIF_DRYRUN) + start |= SS_SET_DRYRUN; + else + start |= SS_CLEAR_DRYRUN; + } + rc = do_osd_scrub_start(dev, start); if (rc < 0 && rc != -EALREADY) { dev->od_otable_it = NULL; @@ -2177,6 +2216,7 @@ static const char *scrub_flags_names[] = { static const char *scrub_param_names[] = { "failout", + "dryrun", NULL }; diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index 60a21a0..a192fef 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -81,6 +81,9 @@ enum scrub_flags { enum scrub_param { /* Exit when fail. */ SP_FAILOUT = 0x0001, + + /* Check only without repairing. */ + SP_DRYRUN = 0x0002, }; enum scrub_start { @@ -95,6 +98,12 @@ enum scrub_start { /* Trigger scrub automatically. */ SS_AUTO = 0x00000008, + + /* Set dryrun flag. */ + SS_SET_DRYRUN = 0x00000010, + + /* Clear dryrun flag. */ + SS_CLEAR_DRYRUN = 0x00000020, }; struct scrub_file { diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 5f49006..c5dc713 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -39,6 +39,9 @@ check_and_setup_lustre [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a" +[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] && + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" + build_test_filter MDT_DEV="${FSNAME}-MDT0000" @@ -801,6 +804,100 @@ test_11() { } run_test 11 "OI scrub skips the new created objects only once" +test_15() { + scrub_prep 20 + mds_backup_restore || error "(1) Fail to backup/restore!" + + echo "starting MDTs with OI scrub disabled" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(4) Expect 'inconsistent', but got '$FLAGS'" + + # run under dryrun mode + $START_SCRUB -n on || error "(5) Fail to start OI scrub!" + sleep 3 + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(6) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(7) Expect 'inconsistent', but got '$FLAGS'" + + local PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }') + [ "$PARAMS" == "dryrun" ] || + error "(8) Expect 'dryrun', but got '$PARAMS'" + + local REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }') + [ $REPAIRED -lt 20 ] && + error "(9) Expect at least 20 updated, but got '$REPAIRED'" + + # run under dryrun mode again + $START_SCRUB -n on || error "(10) Fail to start OI scrub!" + sleep 3 + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(11) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(12) Expect 'inconsistent', but got '$FLAGS'" + + PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }') + [ "$PARAMS" == "dryrun" ] || + error "(13) Expect 'dryrun', but got '$PARAMS'" + + REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }') + [ $REPAIRED -lt 20 ] && + error "(14) Expect at least 20 updated, but got '$REPAIRED'" + + # run under normal mode + $START_SCRUB -n off || error "(15) Fail to start OI scrub!" + sleep 3 + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(16) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(17) Expect empty flags, but got '$FLAGS'" + + PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }') + [ -z "$PARAMS" ] || error "(18) Expect empty param, but got '$PARAMS'" + + REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }') + [ $REPAIRED -lt 20 ] && + error "(19) Expect at least 20 updated, but got '$REPAIRED'" + + # run under normal mode again + $START_SCRUB -n off || error "(20) Fail to start OI scrub!" + sleep 3 + + STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(21) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(22) Expect empty flags, but got '$FLAGS'" + + PARAMS=$($SHOW_SCRUB | awk '/^param/ { print $2 }') + [ -z "$PARAMS" ] || error "(23) Expect empty param, but got '$PARAMS'" + + REPAIRED=$($SHOW_SCRUB | awk '/^updated/ { print $2 }') + [ $REPAIRED -eq 0 ] || + error "(24) Expect 0 updated, but got '$REPAIRED'" +} +run_test 15 "Dryrun mode OI scrub" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} -- 1.8.3.1