From 6b0fa766a4444cf655e965aba067a07143101966 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sun, 28 Sep 2014 07:10:34 +0800 Subject: [PATCH] LU-6012 scrub: NOT miss to auto detect inconsistent OI mapping When full scrub is triggered automatically, its flags should be set as SF_INCONSISTENT. For lookup case, we should check whether current OI mapping is consistent or not, even if the current OI scrub flags is NOT SF_INCONSISTENT. Signed-off-by: Fan Yong Change-Id: I99ea077ae79fcdfedd7bb16c2a664714e0ea5ea3 Reviewed-on: http://review.whamcloud.com/13020 Tested-by: Jenkins Tested-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Andreas Dilger --- lustre/osd-ldiskfs/osd_handler.c | 35 +++++--------------- lustre/osd-ldiskfs/osd_internal.h | 4 +-- lustre/osd-ldiskfs/osd_scrub.c | 70 +++++++++++++++++++++++++-------------- lustre/tests/sanity-scrub.sh | 7 +++- 4 files changed, 62 insertions(+), 54 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index d958695..e26fb73 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -4132,6 +4132,9 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, if (!fid_is_norm(fid) && !fid_is_igif(fid)) RETURN_EXIT; + if (scrub->os_pos_current > id->oii_ino) + RETURN_EXIT; + again: rc = osd_oi_lookup(oti, dev, fid, id, OI_CHECK_FLD); if (rc != 0 && rc != -ENOENT) @@ -4191,16 +4194,14 @@ static int osd_fail_fid_lookup(struct osd_thread_info *oti, return rc; } -int osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd, - struct osd_inode_id *id, const struct lu_fid *fid) +void osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd, + struct osd_inode_id *id, const struct lu_fid *fid) { CDEBUG(D_INODE, "add "DFID" %u:%u to info %p\n", PFID(fid), id->oii_ino, id->oii_gen, info); info->oti_cache.oic_lid = *id; info->oti_cache.oic_fid = *fid; info->oti_cache.oic_dev = osd; - - return 0; } /** @@ -4321,8 +4322,6 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, struct osd_inode_id *id = &oti->oti_id; struct osd_idmap_cache *oic = &oti->oti_cache; struct osd_device *dev = osd_obj2dev(obj); - struct osd_scrub *scrub = &dev->od_scrub; - struct scrub_file *sf = &scrub->os_file; ino = le32_to_cpu(de->inode); if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) { @@ -4355,16 +4354,8 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, if (osd_remote_fid(env, dev, fid)) GOTO(out, rc = 0); - rc = osd_add_oi_cache(osd_oti_get(env), osd_obj2dev(obj), id, - fid); - if (rc != 0) - GOTO(out, rc); - if ((scrub->os_pos_current <= ino) && - ((sf->sf_flags & SF_INCONSISTENT) || - (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) || - ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), - sf->sf_oi_bitmap))) - osd_consistency_check(oti, dev, oic); + osd_add_oi_cache(osd_oti_get(env), osd_obj2dev(obj), id, fid); + osd_consistency_check(oti, dev, oic); } else { rc = -ENOENT; } @@ -5586,11 +5577,8 @@ static inline int osd_it_ea_rec(const struct lu_env *env, struct osd_it_ea *it = (struct osd_it_ea *)di; struct osd_object *obj = it->oie_obj; struct osd_device *dev = osd_obj2dev(obj); - struct osd_scrub *scrub = &dev->od_scrub; - struct scrub_file *sf = &scrub->os_file; struct osd_thread_info *oti = osd_oti_get(env); struct osd_inode_id *id = &oti->oti_id; - struct osd_idmap_cache *oic = &oti->oti_cache; struct lu_fid *fid = &it->oie_dirent->oied_fid; struct lu_dirent *lde = (struct lu_dirent *)dtrec; __u32 ino = it->oie_dirent->oied_ino; @@ -5644,14 +5632,7 @@ static inline int osd_it_ea_rec(const struct lu_env *env, RETURN(0); if (likely(!(attr & LUDA_IGNORE) && rc == 0)) - rc = osd_add_oi_cache(oti, dev, id, fid); - - if (!(attr & LUDA_VERIFY) && - (scrub->os_pos_current <= ino) && - ((sf->sf_flags & SF_INCONSISTENT) || - (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) || - ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap))) - osd_consistency_check(oti, dev, oic); + osd_add_oi_cache(oti, dev, id, fid); RETURN(rc > 0 ? 0 : rc); } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 65ae8c4..ed3571f 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -662,8 +662,8 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode, const struct lu_fid *fid, __u32 compat, __u32 incompat); int osd_get_lma(struct osd_thread_info *info, struct inode *inode, struct dentry *dentry, struct lustre_mdt_attrs *lma); -int osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd, - struct osd_inode_id *id, const struct lu_fid *fid); +void osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd, + struct osd_inode_id *id, const struct lu_fid *fid); int osd_get_idif(struct osd_thread_info *info, struct inode *inode, struct dentry *dentry, struct lu_fid *fid); diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index bc7bf68..77e5639 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -271,11 +271,14 @@ void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags) { struct scrub_file *sf = &scrub->os_file; - CDEBUG(D_LFSCK, "%.16s: reset OI scrub file, flags = "LPX64"\n", - osd_scrub2name(scrub), flags); + CDEBUG(D_LFSCK, "%.16s: reset OI scrub file, old flags = " + LPX64", add flags = "LPX64"\n", + osd_scrub2name(scrub), sf->sf_flags, flags); + memcpy(sf->sf_uuid, uuid, 16); sf->sf_status = SS_INIT; sf->sf_flags |= flags; + sf->sf_flags &= ~SF_AUTO; sf->sf_run_time = 0; sf->sf_time_latest_start = 0; sf->sf_time_last_checkpoint = 0; @@ -633,6 +636,9 @@ static int osd_scrub_prep(struct osd_device *dev) bool drop_dryrun = false; ENTRY; + CDEBUG(D_LFSCK, "%.16s: OI scrub prep, flags = 0x%x\n", + osd_scrub2name(scrub), flags); + down_write(&scrub->os_rwsem); if (flags & SS_SET_FAILOUT) sf->sf_param |= SP_FAILOUT; @@ -686,6 +692,7 @@ static int osd_scrub_prep(struct osd_device *dev) sf->sf_status = SS_SCANNING; sf->sf_time_latest_start = cfs_time_current_sec(); sf->sf_time_last_checkpoint = sf->sf_time_latest_start; + sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1; rc = osd_scrub_file_store(scrub); if (rc == 0) { spin_lock(&scrub->os_lock); @@ -726,6 +733,9 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) struct scrub_file *sf = &scrub->os_file; ENTRY; + CDEBUG(D_LFSCK, "%.16s: OI scrub post, result = %d\n", + osd_scrub2name(scrub), result); + down_write(&scrub->os_rwsem); spin_lock(&scrub->os_lock); thread_set_flags(&scrub->os_thread, SVC_STOPPING); @@ -1142,7 +1152,8 @@ static int osd_preload_exec(struct osd_thread_info *info, #define SCRUB_IT_ALL 1 #define SCRUB_IT_CRASH 2 -static void osd_scrub_join(struct osd_device *dev, __u32 flags) +static void osd_scrub_join(struct osd_device *dev, __u32 flags, + bool inconsistent) { struct osd_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; @@ -1164,12 +1175,19 @@ static void osd_scrub_join(struct osd_device *dev, __u32 flags) else if (flags & SS_CLEAR_DRYRUN) sf->sf_param &= ~SP_DRYRUN; - if (flags & SS_RESET) + if (flags & SS_RESET) { osd_scrub_file_reset(scrub, - LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); + LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, + inconsistent ? SF_INCONSISTENT : 0); + sf->sf_status = SS_SCANNING; + } + + if (flags & SS_AUTO_FULL) { + sf->sf_flags |= SF_AUTO; + scrub->os_full_speed = 1; + } - if (flags & SS_AUTO_FULL || - sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) + if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) scrub->os_full_speed = 1; else scrub->os_full_speed = 0; @@ -1183,6 +1201,7 @@ static void osd_scrub_join(struct osd_device *dev, __u32 flags) scrub->os_pos_current = sf->sf_pos_latest_start; sf->sf_time_latest_start = cfs_time_current_sec(); sf->sf_time_last_checkpoint = sf->sf_time_latest_start; + sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1; rc = osd_scrub_file_store(scrub); if (rc != 0) CDEBUG(D_LFSCK, "%.16s: fail to store scrub file when join " @@ -1235,7 +1254,7 @@ static int osd_inode_iteration(struct osd_thread_info *info, RETURN(-EINVAL); case SCRUB_NEXT_WAIT: { struct kstatfs *ksfs = &info->oti_ksfs; - __u64 used; + __u64 saved_flags; if (dev->od_full_scrub_ratio == OFSR_NEVER || unlikely(sf->sf_items_updated_prior == 0)) @@ -1243,21 +1262,24 @@ static int osd_inode_iteration(struct osd_thread_info *info, if (dev->od_full_scrub_ratio == OFSR_DIRECTLY || scrub->os_full_scrub) { - osd_scrub_join(dev, SS_AUTO_FULL | SS_RESET); + osd_scrub_join(dev, SS_AUTO_FULL | SS_RESET, + true); goto full; } rc = param.sb->s_op->statfs(param.sb->s_root, ksfs); - if (rc != 0) - goto wait; - - used = ksfs->f_files - ksfs->f_ffree; - do_div(used, sf->sf_items_updated_prior); - /* If we hit too much inconsistent OI mappings during - * the partial scan, then scan the device completely. */ - if (used < dev->od_full_scrub_ratio) { - osd_scrub_join(dev, SS_AUTO_FULL | SS_RESET); - goto full; + if (rc == 0) { + __u64 used = ksfs->f_files - ksfs->f_ffree; + + do_div(used, sf->sf_items_updated_prior); + /* If we hit too much inconsistent OI + * mappings during the partial scan, + * then scan the device completely. */ + if (used < dev->od_full_scrub_ratio) { + osd_scrub_join(dev, + SS_AUTO_FULL | SS_RESET, true); + goto full; + } } wait: @@ -1265,15 +1287,18 @@ wait: cfs_fail_val > 0) continue; - sf->sf_status = SS_COMPLETED; + saved_flags = sf->sf_flags; sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE | SF_AUTO); + sf->sf_status = SS_COMPLETED; l_wait_event(thread->t_ctl_waitq, !thread_is_running(thread) || !scrub->os_partial_scan || scrub->os_in_join || !list_empty(&scrub->os_inconsistent_items), &lwi); + sf->sf_flags = saved_flags; + sf->sf_status = SS_SCANNING; if (unlikely(!thread_is_running(thread))) RETURN(0); @@ -1286,8 +1311,6 @@ wait: default: LASSERTF(rc == 0, "rc = %d\n", rc); - sf->sf_status = SS_SCANNING; - sf->sf_flags |= SF_AUTO; osd_scrub_exec(info, dev, ¶m, oic, &noslot, rc); break; } @@ -1295,7 +1318,6 @@ wait: full: if (!preload) { - sf->sf_status = SS_SCANNING; l_wait_event(thread->t_ctl_waitq, !thread_is_running(thread) || !scrub->os_in_join, &lwi); @@ -2201,7 +2223,7 @@ again: if (!scrub->os_partial_scan || flags & SS_AUTO_PARTIAL) RETURN(-EALREADY); - osd_scrub_join(dev, flags); + osd_scrub_join(dev, flags, false); spin_lock(&scrub->os_lock); if (!thread_is_running(thread)) goto again; diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 860e082..14440d9 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -129,6 +129,10 @@ scrub_prep() { fi cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" + mkdir -p $DIR/$tdir/mds$n/d_$tfile || + error "mkdir failed on mds$n" + createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \ + /dev/null || error "create failed on mds$n" if [[ $nfiles -gt 0 ]]; then createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ /dev/null || error "createmany failed on mds$n" @@ -495,7 +499,7 @@ test_4b() { done for n in $(seq $MDSCOUNT); do - ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls" + ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls" done sleep 3 @@ -624,6 +628,7 @@ test_5() { scrub_check_status 15 failed mount_client $MOUNT || error "(16) Fail to start client!" + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 -- 1.8.3.1