From: Fan Yong Date: Sun, 25 Aug 2013 14:11:25 +0000 (+0800) Subject: LU-3935 scrub: support dryrun mode OI scrub X-Git-Tag: 2.4.93~9 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=bfbc2ea49b14a56791911557e01f966acb58b8da LU-3935 scrub: support dryrun mode OI scrub The administrator can detect whether there are any inconsistent OI mappings but without repairing them via the command: lctl lfsck_start --dryrun on -M <$device> Under default mode, dryrun is "off". When the device is mounted, the initial OI scrub will detect whether there are upgrade case, or file-level backup/resotre case, or OI file(s) recreated case. If found some inconsistency, it will trigger non-dryrun mode OI scrub automatically. If the administrator does NOT want to such auto non-dryrun OI scrub to avoid some unexpected repairing, he (or she) can specify "-o noscrub" when mount the device. Test-Parameters: testlist=sanity-scrub Signed-off-by: Fan Yong Change-Id: If00433aa40b43f0be284215009a2b01898fbad35 Reviewed-on: http://review.whamcloud.com/7720 Tested-by: Hudson Reviewed-by: Andreas Dilger Reviewed-by: wangdi Tested-by: Maloo --- diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index e538256..7e6032d 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -622,6 +622,7 @@ struct dt_index_operations { enum dt_otable_it_valid { DOIV_ERROR_HANDLE = 0x0001, + DOIV_DRYRUN = 0x0002, }; enum dt_otable_it_flags { @@ -633,6 +634,9 @@ enum dt_otable_it_flags { /* There is up layer component uses the iteration. */ DOIF_OUTUSED = 0x0004, + + /* Check only without repairing. */ + DOIF_DRYRUN = 0x0008, }; /* otable based iteration needs to use the common DT interation APIs. diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index dbd9485..0da2614 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -922,6 +922,10 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, } if (start->ls_valid & LSV_DRYRUN) { + valid |= DOIV_DRYRUN; + if (start->ls_flags & LPF_DRYRUN) + flags |= DOIF_DRYRUN; + if ((start->ls_flags & LPF_DRYRUN) && !(bk->lb_param & LPF_DRYRUN)) { bk->lb_param |= LPF_DRYRUN; @@ -994,8 +998,11 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, trigger: lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY; - if (bk->lb_param & LPF_DRYRUN) + if (bk->lb_param & LPF_DRYRUN) { lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN; + valid |= DOIV_DRYRUN; + flags |= DOIF_DRYRUN; + } if (bk->lb_param & LPF_FAILOUT) { valid |= DOIV_ERROR_HANDLE; diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index a93baed..5eb8cee 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -1022,10 +1022,16 @@ out: if (!(bk->lb_param & LPF_FAILOUT)) rc = 0; } else { - if (repaired) + if (repaired) { ns->ln_items_repaired++; - else + if (bk->lb_param & LPF_DRYRUN && + lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) + lfsck_pos_fill(env, lfsck, + &ns->ln_pos_first_inconsistent, + false); + } else { com->lc_journal = 0; + } rc = 0; } up_write(&com->lc_sem); diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c index 2dbd4e4..c6ac60cb 100644 --- a/lustre/osd-ldiskfs/osd_compat.c +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -1120,15 +1120,28 @@ int osd_obj_map_recover(struct osd_thread_info *info, * * So keep it there before we have suitable solution. */ brelse(bh); - mutex_unlock(&dir->i_mutex); - mutex_unlock(&src_parent->i_mutex); rc = -EEXIST; /* If the src object has never been modified, then remove it. */ if (inode->i_size == 0 && inode->i_mode & S_ISUID && - inode->i_mode & S_ISGID) - rc = vfs_unlink(src_parent, src_child); + inode->i_mode & S_ISGID) { + bh = osd_ldiskfs_find_entry(src_parent, src_child, &de, + NULL); + if (unlikely(bh == NULL)) { + mutex_unlock(&src_parent->i_mutex); + ldiskfs_journal_stop(jh); + RETURN(0); + } + + rc = ldiskfs_delete_entry(jh, src_parent, de, bh); + brelse(bh); + if (rc == 0) { + drop_nlink(inode); + ll_dirty_inode(inode, I_DIRTY_DATASYNC); + } + } + mutex_unlock(&src_parent->i_mutex); ldiskfs_journal_stop(jh); RETURN(rc); } diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 06363da..b7b1b2d 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -91,7 +91,8 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info, struct osd_device *dev, const struct lu_fid *fid, const struct osd_inode_id *id, - int ops, enum oi_check_flags flags) + int ops, bool force, + enum oi_check_flags flags) { const struct lu_env *env = info->oti_env; struct thandle *th; @@ -99,6 +100,9 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info, int rc; ENTRY; + if (dev->od_scrub.os_file.sf_param & SP_DRYRUN && !force) + RETURN(0); + th = dt_trans_create(env, &dev->od_dt_dev); if (IS_ERR(th)) RETURN(PTR_ERR(th)); @@ -279,7 +283,6 @@ void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags) memcpy(sf->sf_uuid, uuid, 16); sf->sf_status = SS_INIT; sf->sf_flags |= flags; - sf->sf_param = 0; sf->sf_run_time = 0; sf->sf_time_latest_start = 0; sf->sf_time_last_checkpoint = 0; @@ -362,65 +365,6 @@ int osd_scrub_file_store(struct osd_scrub *scrub) return rc; } -/* OI scrub APIs */ - -static int osd_scrub_prep(struct osd_device *dev) -{ - struct osd_scrub *scrub = &dev->od_scrub; - struct ptlrpc_thread *thread = &scrub->os_thread; - struct scrub_file *sf = &scrub->os_file; - __u32 flags = scrub->os_start_flags; - int rc; - ENTRY; - - down_write(&scrub->os_rwsem); - if (flags & SS_SET_FAILOUT) - sf->sf_param |= SP_FAILOUT; - - if (flags & SS_CLEAR_FAILOUT) - sf->sf_param &= ~SP_FAILOUT; - - if (flags & SS_RESET) - osd_scrub_file_reset(scrub, - LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); - - if (flags & SS_AUTO) { - scrub->os_full_speed = 1; - sf->sf_flags |= SF_AUTO; - } else { - scrub->os_full_speed = 0; - } - - if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) - scrub->os_full_speed = 1; - - scrub->os_in_prior = 0; - spin_lock(&scrub->os_lock); - scrub->os_waiting = 0; - scrub->os_paused = 0; - spin_unlock(&scrub->os_lock); - scrub->os_new_checked = 0; - if (sf->sf_pos_last_checkpoint != 0) - sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; - else - sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; - - scrub->os_pos_current = sf->sf_pos_latest_start; - sf->sf_status = SS_SCANNING; - sf->sf_time_latest_start = cfs_time_current_sec(); - sf->sf_time_last_checkpoint = sf->sf_time_latest_start; - rc = osd_scrub_file_store(scrub); - if (rc == 0) { - spin_lock(&scrub->os_lock); - thread_set_flags(thread, SVC_RUNNING); - spin_unlock(&scrub->os_lock); - wake_up_all(&thread->t_ctl_waitq); - } - up_write(&scrub->os_rwsem); - - RETURN(rc); -} - static int osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, struct inode *inode, const struct lu_fid *fid) @@ -434,6 +378,9 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, bool reset = true; ENTRY; + if (dev->od_scrub.os_file.sf_param & SP_DRYRUN) + RETURN(0); + /* We want the LMA to fit into the 256-byte OST inode, so operate * as following: * 1) read old XATTR_NAME_FID and save the parent FID; @@ -541,8 +488,6 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID; dev->od_check_ff = 1; rc = osd_scrub_convert_ff(info, dev, inode, fid); - rc = osd_ea_fid_set(info, inode, fid, - LMAC_FID_ON_OST, 0); if (rc != 0) GOTO(out, rc); @@ -581,9 +526,11 @@ iget: switch (val) { case SCRUB_NEXT_NOLMA: sf->sf_flags |= SF_UPGRADE; - rc = osd_ea_fid_set(info, inode, fid, 0, 0); - if (rc != 0) - GOTO(out, rc); + if (!(sf->sf_param & SP_DRYRUN)) { + rc = osd_ea_fid_set(info, inode, fid, 0, 0); + if (rc != 0) + GOTO(out, rc); + } if (!(sf->sf_flags & SF_INCONSISTENT)) dev->od_igif_inoi = 0; @@ -622,7 +569,7 @@ iget: dev->od_igif_inoi = 1; } - rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops, + rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops, false, (val == SCRUB_NEXT_OSTOBJ || val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0); if (rc == 0) { @@ -652,7 +599,7 @@ out: if (ops == DTO_INDEX_INSERT && inode != NULL && !IS_ERR(inode) && unlikely(inode->i_nlink == 0)) osd_scrub_refresh_mapping(info, dev, fid, lid, - DTO_INDEX_DELETE, + DTO_INDEX_DELETE, false, (val == SCRUB_NEXT_OSTOBJ || val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0); @@ -672,6 +619,78 @@ out: RETURN(sf->sf_param & SP_FAILOUT ? rc : 0); } +/* OI scrub APIs */ + +static int osd_scrub_prep(struct osd_device *dev) +{ + struct osd_scrub *scrub = &dev->od_scrub; + struct ptlrpc_thread *thread = &scrub->os_thread; + struct scrub_file *sf = &scrub->os_file; + __u32 flags = scrub->os_start_flags; + int rc; + bool drop_dryrun = false; + ENTRY; + + down_write(&scrub->os_rwsem); + if (flags & SS_SET_FAILOUT) + sf->sf_param |= SP_FAILOUT; + + if (flags & SS_CLEAR_FAILOUT) + sf->sf_param &= ~SP_FAILOUT; + + if (flags & SS_SET_DRYRUN) + sf->sf_param |= SP_DRYRUN; + + if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) { + sf->sf_param &= ~SP_DRYRUN; + drop_dryrun = true; + } + + if (flags & SS_RESET) + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); + + if (flags & SS_AUTO) { + scrub->os_full_speed = 1; + sf->sf_flags |= SF_AUTO; + /* For the case of OI scrub auto triggered, NOT dryrun. */ + sf->sf_param &= ~SP_FAILOUT; + } else { + scrub->os_full_speed = 0; + } + + if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) + scrub->os_full_speed = 1; + + scrub->os_in_prior = 0; + spin_lock(&scrub->os_lock); + scrub->os_waiting = 0; + scrub->os_paused = 0; + spin_unlock(&scrub->os_lock); + scrub->os_new_checked = 0; + if (drop_dryrun && sf->sf_pos_first_inconsistent != 0) + sf->sf_pos_latest_start = sf->sf_pos_first_inconsistent; + else if (sf->sf_pos_last_checkpoint != 0) + sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; + else + sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; + + scrub->os_pos_current = sf->sf_pos_latest_start; + sf->sf_status = SS_SCANNING; + sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_last_checkpoint = sf->sf_time_latest_start; + rc = osd_scrub_file_store(scrub); + if (rc == 0) { + spin_lock(&scrub->os_lock); + thread_set_flags(thread, SVC_RUNNING); + spin_unlock(&scrub->os_lock); + wake_up_all(&thread->t_ctl_waitq); + } + up_write(&scrub->os_rwsem); + + RETURN(rc); +} + static int osd_scrub_checkpoint(struct osd_scrub *scrub) { struct scrub_file *sf = &scrub->os_file; @@ -717,9 +736,11 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) dev->od_igif_inoi = 1; dev->od_check_ff = 0; sf->sf_status = SS_COMPLETED; - memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); - sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | - SF_UPGRADE | SF_AUTO); + if (!(sf->sf_param & SP_DRYRUN)) { + memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); + sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | + SF_UPGRADE | SF_AUTO); + } sf->sf_time_last_complete = sf->sf_time_last_checkpoint; sf->sf_success_count++; } else if (result == 0) { @@ -789,7 +810,10 @@ static int osd_scrub_check_local_fldb(struct osd_thread_info *info, * a small local FLDB according to the . If the given FID * is in the local FLDB, then it is FID-on-OST; otherwise it's * quite possible for FID-on-MDT. */ - return 0; + if (dev->od_is_ost) + return SCRUB_NEXT_OSTOBJ_OLD; + else + return 0; } static int osd_scrub_get_fid(struct osd_thread_info *info, @@ -1439,6 +1463,9 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen) { struct dentry *dentry; + CDEBUG(D_LFSCK, "init lookup one: parent = %.*s, name = %.*s\n", + parent->d_name.len, parent->d_name.name, namelen, name); + dentry = ll_lookup_one_len(name, parent, namelen); if (!IS_ERR(dentry) && dentry->d_inode == NULL) { dput(dentry); @@ -1453,17 +1480,19 @@ osd_ios_new_item(struct osd_device *dev, struct dentry *dentry, scandir_t scandir, filldir_t filldir) { struct osd_ios_item *item; + ENTRY; OBD_ALLOC_PTR(item); if (item == NULL) - return -ENOMEM; + RETURN(-ENOMEM); CFS_INIT_LIST_HEAD(&item->oii_list); item->oii_dentry = dget(dentry); item->oii_scandir = scandir; item->oii_filldir = filldir; cfs_list_add_tail(&item->oii_list, &dev->od_ios_list); - return 0; + + RETURN(0); } /** @@ -1486,6 +1515,8 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, int rc; ENTRY; + CDEBUG(D_LFSCK, "init scan one: ino = %ld\n", inode->i_ino); + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc != 0 && rc != -ENODATA) RETURN(rc); @@ -1512,7 +1543,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, RETURN(rc); rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, - DTO_INDEX_INSERT, 0); + DTO_INDEX_INSERT, true, 0); if (rc > 0) rc = 0; @@ -1532,7 +1563,7 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev, } rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, - DTO_INDEX_UPDATE, 0); + DTO_INDEX_UPDATE, true, 0); if (rc > 0) rc = 0; @@ -1828,6 +1859,9 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info, int rc; ENTRY; + /* Lookup IGIF in OI by force for initial OI scrub. */ + dev->od_igif_inoi = 1; + while (1) { rc = scandir(info, dev, dentry, filldir); if (item != NULL) { @@ -1880,7 +1914,8 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info, dput(child); else if (PTR_ERR(child) == -ENOENT) osd_scrub_refresh_mapping(info, dev, &map->olm_fid, - NULL, DTO_INDEX_DELETE, 0); + NULL, DTO_INDEX_DELETE, + true, 0); map++; } @@ -2099,7 +2134,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) !(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID || sf->sf_success_count > 0)) { dev->od_igif_inoi = 0; - dev->od_check_ff = 1; + dev->od_check_ff = dev->od_is_ost; } else { dev->od_igif_inoi = 1; dev->od_check_ff = 0; @@ -2192,6 +2227,13 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env, start |= SS_CLEAR_FAILOUT; } + if (valid & DOIV_DRYRUN) { + if (flags & DOIF_DRYRUN) + start |= SS_SET_DRYRUN; + else + start |= SS_CLEAR_DRYRUN; + } + rc = do_osd_scrub_start(dev, start); if (rc < 0 && rc != -EALREADY) { dev->od_otable_it = NULL; @@ -2480,6 +2522,7 @@ static const char *scrub_flags_names[] = { static const char *scrub_param_names[] = { "failout", + "dryrun", NULL }; diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index 03d90ab..3c32dcb 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -81,6 +81,9 @@ enum scrub_flags { enum scrub_param { /* Exit when fail. */ SP_FAILOUT = 0x0001, + + /* Check only without repairing. */ + SP_DRYRUN = 0x0002, }; enum scrub_start { @@ -95,6 +98,12 @@ enum scrub_start { /* Trigger scrub automatically. */ SS_AUTO = 0x00000008, + + /* Set dryrun flag. */ + SS_SET_DRYRUN = 0x00000010, + + /* Clear dryrun flag. */ + SS_CLEAR_DRYRUN = 0x00000020, }; /* The flags here are only used inside OSD, NOT be visible by dump(). */ diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 4db0e25..c12ab5c 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -178,6 +178,46 @@ scrub_check_flags() { done } +scrub_check_params() { + local error_id=$1 + local expected=$2 + local actual + local n + + for n in $(seq $MDSCOUNT); do + actual=$(do_facet mds$n $LCTL get_param -n \ + osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + awk '/^param/ { print $2 }') + if [ "$actual" != "$expected" ]; then + error "($error_id) Expected '$expected' on mds$n, but" \ + "got '$actual'" + fi + done +} + +scrub_check_repaired() { + local error_id=$1 + local expected=$2 + local actual + local n + + for n in $(seq $MDSCOUNT); do + actual=$(do_facet mds$n $LCTL get_param -n \ + osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + awk '/^updated/ { print $2 }') + + if [ $expected -eq 0 -a $actual -ne 0 ]; then + error "($error_id) Expected no repaired on mds$n, but" \ + "got '$actual'" + fi + + if [ $expected -ne 0 -a $actual -lt $expected ]; then + error "($error_id) Expected '$expected' on mds$n, but" \ + "got '$actual'" + fi + done +} + scrub_check_data() { local error_id=$1 local n @@ -958,6 +998,48 @@ test_14() { } run_test 14 "OI scrub can repair objects under lost+found" +test_15() { + scrub_prep 20 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_status 3 init + scrub_check_flags 4 inconsistent + + # run under dryrun mode + scrub_start 5 -n on + sleep 3 + scrub_check_status 6 completed + scrub_check_flags 7 inconsistent + scrub_check_params 8 dryrun + scrub_check_repaired 9 20 + + # run under dryrun mode again + scrub_start 10 -n on + sleep 3 + scrub_check_status 11 completed + scrub_check_flags 12 inconsistent + scrub_check_params 13 dryrun + scrub_check_repaired 14 20 + + # run under normal mode + scrub_start 15 -n off + sleep 3 + scrub_check_status 16 completed + scrub_check_flags 17 "" + scrub_check_params 18 "" + scrub_check_repaired 19 20 + + # run under normal mode again + scrub_start 20 -n off + sleep 3 + scrub_check_status 21 completed + scrub_check_flags 22 "" + scrub_check_params 23 "" + scrub_check_repaired 24 0 +} +run_test 15 "Dryrun mode OI scrub" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE}